Passed
Push — master ( 63d321...dcdb1d )
by Lars
04:45
created

UTF8::str_isubstr_after_first_separator()   A

Complexity

Conditions 5
Paths 4

Size

Total Lines 26
Code Lines 14

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 10
CRAP Score 5.9256

Importance

Changes 2
Bugs 0 Features 1
Metric Value
cc 5
eloc 14
nc 4
nop 3
dl 0
loc 26
ccs 10
cts 15
cp 0.6667
crap 5.9256
rs 9.4888
c 2
b 0
f 1
1
<?php
2
3
declare(strict_types=1);
4
5
namespace voku\helper;
6
7
/**
8
 * @psalm-immutable
9
 */
10
final class UTF8
11
{
12
    /**
13
     * (CRLF|([ZWNJ-ZWJ]|T+|L*(LV?V+|LV|LVT)T*|L+|[^Control])[Extend]*|[Control])
14
     * This regular expression is a work around for http://bugs.exim.org/1279
15
     *
16
     * @deprecated <p>please don't use it anymore</p>
17
     */
18
    const GRAPHEME_CLUSTER_RX = "(?:\r\n|(?:[ -~\x{200C}\x{200D}]|[ᆨ-ᇹ]+|[ᄀ-ᅟ]*(?:[가개갸걔거게겨계고과괘괴교구궈궤귀규그긔기까깨꺄꺠꺼께껴꼐꼬꽈꽤꾀꾜꾸꿔꿰뀌뀨끄끠끼나내냐냬너네녀녜노놔놰뇌뇨누눠눼뉘뉴느늬니다대댜댸더데뎌뎨도돠돼되됴두둬뒈뒤듀드듸디따때땨떄떠떼뗘뗴또똬뙈뙤뚀뚜뚸뛔뛰뜌뜨띄띠라래랴럐러레려례로롸뢔뢰료루뤄뤠뤼류르릐리마매먀먜머메며몌모뫄뫠뫼묘무뭐뭬뮈뮤므믜미바배뱌뱨버베벼볘보봐봬뵈뵤부붜붸뷔뷰브븨비빠빼뺘뺴뻐뻬뼈뼤뽀뽜뽸뾔뾰뿌뿨쀄쀠쀼쁘쁴삐사새샤섀서세셔셰소솨쇄쇠쇼수숴쉐쉬슈스싀시싸쌔쌰썌써쎄쎠쎼쏘쏴쐐쐬쑈쑤쒀쒜쒸쓔쓰씌씨아애야얘어에여예오와왜외요우워웨위유으의이자재쟈쟤저제져졔조좌좨죄죠주줘줴쥐쥬즈즤지짜째쨔쨰쩌쩨쪄쪠쪼쫘쫴쬐쬬쭈쭤쮀쮜쮸쯔쯰찌차채챠챼처체쳐쳬초촤쵀최쵸추춰췌취츄츠츼치카캐캬컈커케켜켸코콰쾌쾨쿄쿠쿼퀘퀴큐크킈키타태탸턔터테텨톄토톼퇘퇴툐투퉈퉤튀튜트틔티파패퍄퍠퍼페펴폐포퐈퐤푀표푸풔풰퓌퓨프픠피하해햐햬허헤혀혜호화홰회효후훠훼휘휴흐희히]?[ᅠ-ᆢ]+|[가-힣])[ᆨ-ᇹ]*|[ᄀ-ᅟ]+|[^\p{Cc}\p{Cf}\p{Zl}\p{Zp}])[\p{Mn}\p{Me}\x{09BE}\x{09D7}\x{0B3E}\x{0B57}\x{0BBE}\x{0BD7}\x{0CC2}\x{0CD5}\x{0CD6}\x{0D3E}\x{0D57}\x{0DCF}\x{0DDF}\x{200C}\x{200D}\x{1D165}\x{1D16E}-\x{1D172}]*|[\p{Cc}\p{Cf}\p{Zl}\p{Zp}])";
19
20
    /**
21
     * Bom => Byte-Length
22
     *
23
     * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
24
     *
25
     * @var array<string, int>
26
     */
27
    private static $BOM = [
28
        "\xef\xbb\xbf"     => 3, // UTF-8 BOM
29
        ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
30
        "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
31
        '  þÿ'             => 6, // UTF-32 (BE) BOM as "WINDOWS-1252"
32
        "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
33
        'ÿþ  '             => 6, // UTF-32 (LE) BOM as "WINDOWS-1252"
34
        "\xfe\xff"         => 2, // UTF-16 (BE) BOM
35
        'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
36
        "\xff\xfe"         => 2, // UTF-16 (LE) BOM
37
        'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
38
    ];
39
40
    /**
41
     * Numeric code point => UTF-8 Character
42
     *
43
     * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
44
     *
45
     * @var array<int, string>
46
     */
47
    private static $WHITESPACE = [
48
        // NULL Byte
49
        0 => "\x0",
50
        // Tab
51
        9 => "\x9",
52
        // New Line
53
        10 => "\xa",
54
        // Vertical Tab
55
        11 => "\xb",
56
        // Carriage Return
57
        13 => "\xd",
58
        // Ordinary Space
59
        32 => "\x20",
60
        // NO-BREAK SPACE
61
        160 => "\xc2\xa0",
62
        // OGHAM SPACE MARK
63
        5760 => "\xe1\x9a\x80",
64
        // MONGOLIAN VOWEL SEPARATOR
65
        6158 => "\xe1\xa0\x8e",
66
        // EN QUAD
67
        8192 => "\xe2\x80\x80",
68
        // EM QUAD
69
        8193 => "\xe2\x80\x81",
70
        // EN SPACE
71
        8194 => "\xe2\x80\x82",
72
        // EM SPACE
73
        8195 => "\xe2\x80\x83",
74
        // THREE-PER-EM SPACE
75
        8196 => "\xe2\x80\x84",
76
        // FOUR-PER-EM SPACE
77
        8197 => "\xe2\x80\x85",
78
        // SIX-PER-EM SPACE
79
        8198 => "\xe2\x80\x86",
80
        // FIGURE SPACE
81
        8199 => "\xe2\x80\x87",
82
        // PUNCTUATION SPACE
83
        8200 => "\xe2\x80\x88",
84
        // THIN SPACE
85
        8201 => "\xe2\x80\x89",
86
        // HAIR SPACE
87
        8202 => "\xe2\x80\x8a",
88
        // LINE SEPARATOR
89
        8232 => "\xe2\x80\xa8",
90
        // PARAGRAPH SEPARATOR
91
        8233 => "\xe2\x80\xa9",
92
        // NARROW NO-BREAK SPACE
93
        8239 => "\xe2\x80\xaf",
94
        // MEDIUM MATHEMATICAL SPACE
95
        8287 => "\xe2\x81\x9f",
96
        // HALFWIDTH HANGUL FILLER
97
        65440 => "\xef\xbe\xa0",
98
        // IDEOGRAPHIC SPACE
99
        12288 => "\xe3\x80\x80",
100
    ];
101
102
    /**
103
     * @var array<string, string>
104
     */
105
    private static $WHITESPACE_TABLE = [
106
        'SPACE'                     => "\x20",
107
        'NO-BREAK SPACE'            => "\xc2\xa0",
108
        'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
109
        'EN QUAD'                   => "\xe2\x80\x80",
110
        'EM QUAD'                   => "\xe2\x80\x81",
111
        'EN SPACE'                  => "\xe2\x80\x82",
112
        'EM SPACE'                  => "\xe2\x80\x83",
113
        'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
114
        'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
115
        'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
116
        'FIGURE SPACE'              => "\xe2\x80\x87",
117
        'PUNCTUATION SPACE'         => "\xe2\x80\x88",
118
        'THIN SPACE'                => "\xe2\x80\x89",
119
        'HAIR SPACE'                => "\xe2\x80\x8a",
120
        'LINE SEPARATOR'            => "\xe2\x80\xa8",
121
        'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
122
        'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
123
        'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
124
        'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
125
        'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
126
        'HALFWIDTH HANGUL FILLER'   => "\xef\xbe\xa0",
127
    ];
128
129
    /**
130
     * @var array{upper: string[], lower: string[]}
131
     */
132
    private static $COMMON_CASE_FOLD = [
133
        'upper' => [
134
            'µ',
135
            'ſ',
136
            "\xCD\x85",
137
            'ς',
138
            'ẞ',
139
            "\xCF\x90",
140
            "\xCF\x91",
141
            "\xCF\x95",
142
            "\xCF\x96",
143
            "\xCF\xB0",
144
            "\xCF\xB1",
145
            "\xCF\xB5",
146
            "\xE1\xBA\x9B",
147
            "\xE1\xBE\xBE",
148
        ],
149
        'lower' => [
150
            'μ',
151
            's',
152
            'ι',
153
            'σ',
154
            'ß',
155
            'β',
156
            'θ',
157
            'φ',
158
            'π',
159
            'κ',
160
            'ρ',
161
            'ε',
162
            "\xE1\xB9\xA1",
163
            'ι',
164
        ],
165
    ];
166
167
    /**
168
     * @var array<string, mixed>
169
     */
170
    private static $SUPPORT = [];
171
172
    /**
173
     * @var array<string, string>|null
174
     */
175
    private static $BROKEN_UTF8_FIX;
176
177
    /**
178
     * @var array<int, string>|null
179
     */
180
    private static $WIN1252_TO_UTF8;
181
182
    /**
183
     * @var array<int ,string>|null
0 ignored issues
show
Documentation Bug introduced by
The doc comment array<int at position 2 could not be parsed: Expected '>' at position 2, but found 'int'.
Loading history...
184
     */
185
    private static $INTL_TRANSLITERATOR_LIST;
186
187
    /**
188
     * @var array<string>|null
189
     */
190
    private static $ENCODINGS;
191
192
    /**
193
     * @var array<string ,int>|null
0 ignored issues
show
Documentation Bug introduced by
The doc comment array<string at position 2 could not be parsed: Expected '>' at position 2, but found 'string'.
Loading history...
194
     */
195
    private static $ORD;
196
197
    /**
198
     * @var array<string, string>|null
199
     */
200
    private static $EMOJI;
201
202
    /**
203
     * @var array<string>|null
204
     */
205
    private static $EMOJI_VALUES_CACHE;
206
207
    /**
208
     * @var array<string>|null
209
     */
210
    private static $EMOJI_KEYS_CACHE;
211
212
    /**
213
     * @var array<string>|null
214
     */
215
    private static $EMOJI_KEYS_REVERSIBLE_CACHE;
216
217
    /**
218
     * @var string[]|null
219
     *
220
     * @psalm-var array<int, string>|null
221
     */
222
    private static $CHR;
223
224
    /**
225
     * __construct()
226
     */
227 34
    public function __construct()
228
    {
229 34
    }
230
231
    /**
232
     * Return the character at the specified position: $str[1] like functionality.
233
     *
234
     * EXAMPLE: <code>UTF8::access('fòô', 1); // 'ò'</code>
235
     *
236
     * @param string $str      <p>A UTF-8 string.</p>
237
     * @param int    $pos      <p>The position of character to return.</p>
238
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
239
     *
240
     * @psalm-pure
241
     *
242
     * @return string
243
     *                <p>Single multi-byte character.</p>
244
     */
245 3
    public static function access(string $str, int $pos, string $encoding = 'UTF-8'): string
246
    {
247 3
        if ($str === '' || $pos < 0) {
248 2
            return '';
249
        }
250
251 3
        if ($encoding === 'UTF-8') {
252 3
            return (string) \mb_substr($str, $pos, 1);
253
        }
254
255
        return (string) self::substr($str, $pos, 1, $encoding);
256
    }
257
258
    /**
259
     * Prepends UTF-8 BOM character to the string and returns the whole string.
260
     *
261
     * INFO: If BOM already existed there, the Input string is returned.
262
     *
263
     * EXAMPLE: <code>UTF8::add_bom_to_string('fòô'); // "\xEF\xBB\xBF" . 'fòô'</code>
264
     *
265
     * @param string $str <p>The input string.</p>
266
     *
267
     * @psalm-pure
268
     *
269
     * @return string
270
     *                <p>The output string that contains BOM.</p>
271
     */
272 2
    public static function add_bom_to_string(string $str): string
273
    {
274 2
        if (!self::string_has_bom($str)) {
275 2
            $str = self::bom() . $str;
276
        }
277
278 2
        return $str;
279
    }
280
281
    /**
282
     * Changes all keys in an array.
283
     *
284
     * @param array<string, mixed> $array    <p>The array to work on</p>
285
     * @param int                  $case     [optional] <p> Either <strong>CASE_UPPER</strong><br>
286
     *                                       or <strong>CASE_LOWER</strong> (default)</p>
287
     * @param string               $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
288
     *
289
     * @psalm-pure
290
     *
291
     * @return string[]
292
     *                  <p>An array with its keys lower- or uppercased.</p>
293
     */
294 2
    public static function array_change_key_case(
295
        array $array,
296
        int $case = \CASE_LOWER,
297
        string $encoding = 'UTF-8'
298
    ): array {
299
        if (
300 2
            $case !== \CASE_LOWER
301
            &&
302 2
            $case !== \CASE_UPPER
303
        ) {
304
            $case = \CASE_LOWER;
305
        }
306
307 2
        $return = [];
308 2
        foreach ($array as $key => &$value) {
309 2
            $key = $case === \CASE_LOWER
310 2
                ? self::strtolower((string) $key, $encoding)
311 2
                : self::strtoupper((string) $key, $encoding);
312
313 2
            $return[$key] = $value;
314
        }
315
316 2
        return $return;
317
    }
318
319
    /**
320
     * Returns the substring between $start and $end, if found, or an empty
321
     * string. An optional offset may be supplied from which to begin the
322
     * search for the start string.
323
     *
324
     * @param string $str
325
     * @param string $start    <p>Delimiter marking the start of the substring.</p>
326
     * @param string $end      <p>Delimiter marking the end of the substring.</p>
327
     * @param int    $offset   [optional] <p>Index from which to begin the search. Default: 0</p>
328
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
329
     *
330
     * @psalm-pure
331
     *
332
     * @return string
333
     */
334 16
    public static function between(
335
        string $str,
336
        string $start,
337
        string $end,
338
        int $offset = 0,
339
        string $encoding = 'UTF-8'
340
    ): string {
341 16
        if ($encoding === 'UTF-8') {
342 8
            $start_position = \mb_strpos($str, $start, $offset);
343 8
            if ($start_position === false) {
344 1
                return '';
345
            }
346
347 7
            $substr_index = $start_position + (int) \mb_strlen($start);
348 7
            $end_position = \mb_strpos($str, $end, $substr_index);
349
            if (
350 7
                $end_position === false
351
                ||
352 7
                $end_position === $substr_index
353
            ) {
354 2
                return '';
355
            }
356
357 5
            return (string) \mb_substr($str, $substr_index, $end_position - $substr_index);
358
        }
359
360 8
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
361
362 8
        $start_position = self::strpos($str, $start, $offset, $encoding);
363 8
        if ($start_position === false) {
364 1
            return '';
365
        }
366
367 7
        $substr_index = $start_position + (int) self::strlen($start, $encoding);
368 7
        $end_position = self::strpos($str, $end, $substr_index, $encoding);
369
        if (
370 7
            $end_position === false
371
            ||
372 7
            $end_position === $substr_index
373
        ) {
374 2
            return '';
375
        }
376
377 5
        return (string) self::substr(
378 5
            $str,
379 5
            $substr_index,
380 5
            $end_position - $substr_index,
381 5
            $encoding
382
        );
383
    }
384
385
    /**
386
     * Convert binary into a string.
387
     *
388
     * INFO: opposite to UTF8::str_to_binary()
389
     *
390
     * EXAMPLE: <code>UTF8::binary_to_str('11110000100111111001100010000011'); // '😃'</code>
391
     *
392
     * @param string $bin 1|0
393
     *
394
     * @psalm-pure
395
     *
396
     * @return string
397
     */
398 2
    public static function binary_to_str($bin): string
399
    {
400 2
        if (!isset($bin[0])) {
401
            return '';
402
        }
403
404 2
        $convert = \base_convert($bin, 2, 16);
405 2
        if ($convert === '0') {
406 1
            return '';
407
        }
408
409 2
        return \pack('H*', $convert);
410
    }
411
412
    /**
413
     * Returns the UTF-8 Byte Order Mark Character.
414
     *
415
     * INFO: take a look at UTF8::$bom for e.g. UTF-16 and UTF-32 BOM values
416
     *
417
     * EXAMPLE: <code>UTF8::bom(); // "\xEF\xBB\xBF"</code>
418
     *
419
     * @psalm-pure
420
     *
421
     * @return string
422
     *                <p>UTF-8 Byte Order Mark.</p>
423
     */
424 4
    public static function bom(): string
425
    {
426 4
        return "\xef\xbb\xbf";
427
    }
428
429
    /**
430
     * @alias of UTF8::chr_map()
431
     *
432
     * @param callable $callback
433
     * @param string   $str
434
     *
435
     * @psalm-pure
436
     *
437
     * @return string[]
438
     *
439
     * @see   UTF8::chr_map()
440
     */
441 2
    public static function callback($callback, string $str): array
442
    {
443 2
        return self::chr_map($callback, $str);
444
    }
445
446
    /**
447
     * Returns the character at $index, with indexes starting at 0.
448
     *
449
     * @param string $str      <p>The input string.</p>
450
     * @param int    $index    <p>Position of the character.</p>
451
     * @param string $encoding [optional] <p>Default is UTF-8</p>
452
     *
453
     * @psalm-pure
454
     *
455
     * @return string
456
     *                <p>The character at $index.</p>
457
     */
458 9
    public static function char_at(string $str, int $index, string $encoding = 'UTF-8'): string
459
    {
460 9
        if ($encoding === 'UTF-8') {
461 5
            return (string) \mb_substr($str, $index, 1);
462
        }
463
464 4
        return (string) self::substr($str, $index, 1, $encoding);
465
    }
466
467
    /**
468
     * Returns an array consisting of the characters in the string.
469
     *
470
     * @param string $str <p>The input string.</p>
471
     *
472
     * @psalm-pure
473
     *
474
     * @return string[]
475
     *                  <p>An array of chars.</p>
476
     */
477 3
    public static function chars(string $str): array
478
    {
479
        /** @var string[] */
480 3
        return self::str_split($str);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::str_split($str) returns an array which contains values of type array|string[] which are incompatible with the documented value type string.
Loading history...
481
    }
482
483
    /**
484
     * This method will auto-detect your server environment for UTF-8 support.
485
     *
486
     * @return true|null
487
     *
488
     * @internal <p>You don't need to run it manually, it will be triggered if it's needed.</p>
489
     */
490 5
    public static function checkForSupport()
491
    {
492 5
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
493
            self::$SUPPORT['already_checked_via_portable_utf8'] = true;
494
495
            // http://php.net/manual/en/book.mbstring.php
496
            self::$SUPPORT['mbstring'] = self::mbstring_loaded();
497
            self::$SUPPORT['mbstring_func_overload'] = self::mbstring_overloaded();
498
            if (self::$SUPPORT['mbstring'] === true) {
499
                \mb_internal_encoding('UTF-8');
500
                /** @noinspection UnusedFunctionResultInspection */
501
                /** @noinspection PhpComposerExtensionStubsInspection */
502
                \mb_regex_encoding('UTF-8');
503
                self::$SUPPORT['mbstring_internal_encoding'] = 'UTF-8';
504
            }
505
506
            // http://php.net/manual/en/book.iconv.php
507
            self::$SUPPORT['iconv'] = self::iconv_loaded();
508
509
            // http://php.net/manual/en/book.intl.php
510
            self::$SUPPORT['intl'] = self::intl_loaded();
511
512
            // http://php.net/manual/en/class.intlchar.php
513
            self::$SUPPORT['intlChar'] = self::intlChar_loaded();
514
515
            // http://php.net/manual/en/book.ctype.php
516
            self::$SUPPORT['ctype'] = self::ctype_loaded();
517
518
            // http://php.net/manual/en/class.finfo.php
519
            self::$SUPPORT['finfo'] = self::finfo_loaded();
520
521
            // http://php.net/manual/en/book.json.php
522
            self::$SUPPORT['json'] = self::json_loaded();
523
524
            // http://php.net/manual/en/book.pcre.php
525
            self::$SUPPORT['pcre_utf8'] = self::pcre_utf8_support();
526
527
            self::$SUPPORT['symfony_polyfill_used'] = self::symfony_polyfill_used();
528
            if (self::$SUPPORT['symfony_polyfill_used'] === true) {
529
                \mb_internal_encoding('UTF-8');
530
                self::$SUPPORT['mbstring_internal_encoding'] = 'UTF-8';
531
            }
532
533
            return true;
534
        }
535
536 5
        return null;
537
    }
538
539
    /**
540
     * Generates a UTF-8 encoded character from the given code point.
541
     *
542
     * INFO: opposite to UTF8::ord()
543
     *
544
     * EXAMPLE: <code>UTF8::chr(0x2603); // '☃'</code>
545
     *
546
     * @param int    $code_point <p>The code point for which to generate a character.</p>
547
     * @param string $encoding   [optional] <p>Default is UTF-8</p>
548
     *
549
     * @psalm-pure
550
     *
551
     * @return string|null
552
     *                     <p>Multi-byte character, returns null on failure or empty input.</p>
553
     */
554 21
    public static function chr($code_point, string $encoding = 'UTF-8')
555
    {
556
        // init
557
        /**
558
         * @psalm-suppress ImpureStaticVariable
559
         *
560
         * @var array<string,string>
561
         */
562 21
        static $CHAR_CACHE = [];
563
564 21
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
565 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
566
        }
567
568
        if (
569 21
            $encoding !== 'UTF-8'
570
            &&
571 21
            $encoding !== 'ISO-8859-1'
572
            &&
573 21
            $encoding !== 'WINDOWS-1252'
574
            &&
575 21
            self::$SUPPORT['mbstring'] === false
576
        ) {
577
            /**
578
             * @psalm-suppress ImpureFunctionCall - is is only a warning
579
             */
580
            \trigger_error('UTF8::chr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
581
        }
582
583 21
        if ($code_point <= 0) {
584 5
            return null;
585
        }
586
587 21
        $cache_key = $code_point . '_' . $encoding;
588 21
        if (isset($CHAR_CACHE[$cache_key])) {
589 19
            return $CHAR_CACHE[$cache_key];
590
        }
591
592 10
        if ($code_point <= 0x80) { // only for "simple"-chars
593
594 9
            if (self::$CHR === null) {
595
                self::$CHR = self::getData('chr');
596
            }
597
598
            /**
599
             * @psalm-suppress PossiblyNullArrayAccess
600
             */
601 9
            $chr = self::$CHR[$code_point];
602
603 9
            if ($encoding !== 'UTF-8') {
604 1
                $chr = self::encode($encoding, $chr);
605
            }
606
607 9
            return $CHAR_CACHE[$cache_key] = $chr;
608
        }
609
610
        //
611
        // fallback via "IntlChar"
612
        //
613
614 6
        if (self::$SUPPORT['intlChar'] === true) {
615
            /** @noinspection PhpComposerExtensionStubsInspection */
616 6
            $chr = \IntlChar::chr($code_point);
617
618 6
            if ($encoding !== 'UTF-8') {
619
                $chr = self::encode($encoding, $chr);
620
            }
621
622 6
            return $CHAR_CACHE[$cache_key] = $chr;
623
        }
624
625
        //
626
        // fallback via vanilla php
627
        //
628
629
        if (self::$CHR === null) {
630
            self::$CHR = self::getData('chr');
631
        }
632
633
        $code_point = (int) $code_point;
634
        if ($code_point <= 0x7FF) {
635
            /**
636
             * @psalm-suppress PossiblyNullArrayAccess
637
             */
638
            $chr = self::$CHR[($code_point >> 6) + 0xC0] .
639
                   self::$CHR[($code_point & 0x3F) + 0x80];
640
        } elseif ($code_point <= 0xFFFF) {
641
            /**
642
             * @psalm-suppress PossiblyNullArrayAccess
643
             */
644
            $chr = self::$CHR[($code_point >> 12) + 0xE0] .
645
                   self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
646
                   self::$CHR[($code_point & 0x3F) + 0x80];
647
        } else {
648
            /**
649
             * @psalm-suppress PossiblyNullArrayAccess
650
             */
651
            $chr = self::$CHR[($code_point >> 18) + 0xF0] .
652
                   self::$CHR[(($code_point >> 12) & 0x3F) + 0x80] .
653
                   self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
654
                   self::$CHR[($code_point & 0x3F) + 0x80];
655
        }
656
657
        if ($encoding !== 'UTF-8') {
658
            $chr = self::encode($encoding, $chr);
659
        }
660
661
        return $CHAR_CACHE[$cache_key] = $chr;
662
    }
663
664
    /**
665
     * Applies callback to all characters of a string.
666
     *
667
     * EXAMPLE: <code>UTF8::chr_map([UTF8::class, 'strtolower'], 'Κόσμε'); // ['κ','ό', 'σ', 'μ', 'ε']</code>
668
     *
669
     * @param callable $callback <p>The callback function.</p>
670
     * @param string   $str      <p>UTF-8 string to run callback on.</p>
671
     *
672
     * @psalm-pure
673
     *
674
     * @return string[]
675
     *                  <p>The outcome of the callback, as array.</p>
676
     */
677 2
    public static function chr_map($callback, string $str): array
678
    {
679 2
        return \array_map(
680 2
            $callback,
681 2
            self::str_split($str)
682
        );
683
    }
684
685
    /**
686
     * Generates an array of byte length of each character of a Unicode string.
687
     *
688
     * 1 byte => U+0000  - U+007F
689
     * 2 byte => U+0080  - U+07FF
690
     * 3 byte => U+0800  - U+FFFF
691
     * 4 byte => U+10000 - U+10FFFF
692
     *
693
     * EXAMPLE: <code>UTF8::chr_size_list('中文空白-test'); // [3, 3, 3, 3, 1, 1, 1, 1, 1]</code>
694
     *
695
     * @param string $str <p>The original unicode string.</p>
696
     *
697
     * @psalm-pure
698
     *
699
     * @return int[]
700
     *               <p>An array of byte lengths of each character.</p>
701
     */
702 4
    public static function chr_size_list(string $str): array
703
    {
704 4
        if ($str === '') {
705 4
            return [];
706
        }
707
708 4
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
709
            return \array_map(
710
                static function (string $data): int {
711
                    // "mb_" is available if overload is used, so use it ...
712
                    return \mb_strlen($data, 'CP850'); // 8-BIT
713
                },
714
                self::str_split($str)
715
            );
716
        }
717
718 4
        return \array_map('\strlen', self::str_split($str));
719
    }
720
721
    /**
722
     * Get a decimal code representation of a specific character.
723
     *
724
     * INFO: opposite to UTF8::decimal_to_chr()
725
     *
726
     * EXAMPLE: <code>UTF8::chr_to_decimal('§'); // 0xa7</code>
727
     *
728
     * @param string $char <p>The input character.</p>
729
     *
730
     * @psalm-pure
731
     *
732
     * @return int
733
     */
734 5
    public static function chr_to_decimal(string $char): int
735
    {
736 5
        if (self::$SUPPORT['iconv'] === true) {
737 5
            $chr_tmp = \iconv('UTF-8', 'UCS-4LE', $char);
738 5
            if ($chr_tmp !== false) {
739
                /** @noinspection OffsetOperationsInspection */
740 5
                return \unpack('V', $chr_tmp)[1];
741
            }
742
        }
743
744
        $code = self::ord($char[0]);
745
        $bytes = 1;
746
747
        if (!($code & 0x80)) {
748
            // 0xxxxxxx
749
            return $code;
750
        }
751
752
        if (($code & 0xe0) === 0xc0) {
753
            // 110xxxxx
754
            $bytes = 2;
755
            $code &= ~0xc0;
756
        } elseif (($code & 0xf0) === 0xe0) {
757
            // 1110xxxx
758
            $bytes = 3;
759
            $code &= ~0xe0;
760
        } elseif (($code & 0xf8) === 0xf0) {
761
            // 11110xxx
762
            $bytes = 4;
763
            $code &= ~0xf0;
764
        }
765
766
        for ($i = 2; $i <= $bytes; ++$i) {
767
            // 10xxxxxx
768
            $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
769
        }
770
771
        return $code;
772
    }
773
774
    /**
775
     * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
776
     *
777
     * EXAMPLE: <code>UTF8::chr_to_hex('§'); // U+00a7</code>
778
     *
779
     * @param int|string $char   <p>The input character</p>
780
     * @param string     $prefix [optional]
781
     *
782
     * @psalm-pure
783
     *
784
     * @return string
785
     *                <p>The code point encoded as U+xxxx.</p>
786
     */
787 2
    public static function chr_to_hex($char, string $prefix = 'U+'): string
788
    {
789 2
        if ($char === '') {
790 2
            return '';
791
        }
792
793 2
        if ($char === '&#0;') {
794 2
            $char = '';
795
        }
796
797 2
        return self::int_to_hex(self::ord((string) $char), $prefix);
798
    }
799
800
    /**
801
     * alias for "UTF8::chr_to_decimal()"
802
     *
803
     * @param string $chr
804
     *
805
     * @psalm-pure
806
     *
807
     * @return int
808
     *
809
     * @see        UTF8::chr_to_decimal()
810
     * @deprecated <p>please use "UTF8::chr_to_decimal()"</p>
811
     */
812 2
    public static function chr_to_int(string $chr): int
813
    {
814 2
        return self::chr_to_decimal($chr);
815
    }
816
817
    /**
818
     * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
819
     *
820
     * EXAMPLE: <code>UTF8::chunk_split('ABC-ÖÄÜ-中文空白-κόσμε', 3); // "ABC\r\n-ÖÄ\r\nÜ-中\r\n文空白\r\n-κό\r\nσμε"</code>
821
     *
822
     * @param string $body         <p>The original string to be split.</p>
823
     * @param int    $chunk_length [optional] <p>The maximum character length of a chunk.</p>
824
     * @param string $end          [optional] <p>The character(s) to be inserted at the end of each chunk.</p>
825
     *
826
     * @psalm-pure
827
     *
828
     * @return string
829
     *                <p>The chunked string.</p>
830
     */
831 4
    public static function chunk_split(string $body, int $chunk_length = 76, string $end = "\r\n"): string
832
    {
833 4
        return \implode($end, self::str_split($body, $chunk_length));
834
    }
835
836
    /**
837
     * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
838
     *
839
     * EXAMPLE: <code>UTF8::clean("\xEF\xBB\xBF„Abcdef\xc2\xa0\x20…” — 😃 - Düsseldorf", true, true); // '„Abcdef  …” — 😃 - Düsseldorf'</code>
840
     *
841
     * @param string $str                                     <p>The string to be sanitized.</p>
842
     * @param bool   $remove_bom                              [optional] <p>Set to true, if you need to remove
843
     *                                                        UTF-BOM.</p>
844
     * @param bool   $normalize_whitespace                    [optional] <p>Set to true, if you need to normalize the
845
     *                                                        whitespace.</p>
846
     * @param bool   $normalize_msword                        [optional] <p>Set to true, if you need to normalize MS
847
     *                                                        Word chars e.g.: "…"
848
     *                                                        => "..."</p>
849
     * @param bool   $keep_non_breaking_space                 [optional] <p>Set to true, to keep non-breaking-spaces,
850
     *                                                        in
851
     *                                                        combination with
852
     *                                                        $normalize_whitespace</p>
853
     * @param bool   $replace_diamond_question_mark           [optional] <p>Set to true, if you need to remove diamond
854
     *                                                        question mark e.g.: "�"</p>
855
     * @param bool   $remove_invisible_characters             [optional] <p>Set to false, if you not want to remove
856
     *                                                        invisible characters e.g.: "\0"</p>
857
     * @param bool   $remove_invisible_characters_url_encoded [optional] <p>Set to true, if you not want to remove
858
     *                                                        invisible url encoded characters e.g.: "%0B"<br> WARNING:
859
     *                                                        maybe contains false-positives e.g. aa%0Baa -> aaaa.
860
     *                                                        </p>
861
     *
862
     * @psalm-pure
863
     *
864
     * @return string
865
     *                <p>An clean UTF-8 encoded string.</p>
866
     *
867
     * @noinspection PhpTooManyParametersInspection
868
     */
869 89
    public static function clean(
870
        string $str,
871
        bool $remove_bom = false,
872
        bool $normalize_whitespace = false,
873
        bool $normalize_msword = false,
874
        bool $keep_non_breaking_space = false,
875
        bool $replace_diamond_question_mark = false,
876
        bool $remove_invisible_characters = true,
877
        bool $remove_invisible_characters_url_encoded = false
878
    ): string {
879
        // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
880
        // caused connection reset problem on larger strings
881
882 89
        $regex = '/
883
          (
884
            (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
885
            |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
886
            |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
887
            |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
888
            ){1,100}                      # ...one or more times
889
          )
890
        | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
891
        | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
892
        /x';
893
        /** @noinspection NotOptimalRegularExpressionsInspection */
894 89
        $str = (string) \preg_replace($regex, '$1', $str);
895
896 89
        if ($replace_diamond_question_mark) {
897 33
            $str = self::replace_diamond_question_mark($str);
898
        }
899
900 89
        if ($remove_invisible_characters) {
901 89
            $str = self::remove_invisible_characters($str, $remove_invisible_characters_url_encoded);
902
        }
903
904 89
        if ($normalize_whitespace) {
905 37
            $str = self::normalize_whitespace($str, $keep_non_breaking_space);
906
        }
907
908 89
        if ($normalize_msword) {
909 4
            $str = self::normalize_msword($str);
910
        }
911
912 89
        if ($remove_bom) {
913 37
            $str = self::remove_bom($str);
914
        }
915
916 89
        return $str;
917
    }
918
919
    /**
920
     * Clean-up a string and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
921
     *
922
     * EXAMPLE: <code>UTF8::cleanup("\xEF\xBB\xBF„Abcdef\xc2\xa0\x20…” — 😃 - Düsseldorf", true, true); // '„Abcdef  …” — 😃 - Düsseldorf'</code>
923
     *
924
     * @param string $str <p>The input string.</p>
925
     *
926
     * @psalm-pure
927
     *
928
     * @return string
929
     */
930 33
    public static function cleanup($str): string
931
    {
932
        // init
933 33
        $str = (string) $str;
934
935 33
        if ($str === '') {
936 5
            return '';
937
        }
938
939
        // fixed ISO <-> UTF-8 Errors
940 33
        $str = self::fix_simple_utf8($str);
941
942
        // remove all none UTF-8 symbols
943
        // && remove diamond question mark (�)
944
        // && remove remove invisible characters (e.g. "\0")
945
        // && remove BOM
946
        // && normalize whitespace chars (but keep non-breaking-spaces)
947 33
        return self::clean(
948 33
            $str,
949 33
            true,
950 33
            true,
951 33
            false,
952 33
            true,
953 33
            true
954
        );
955
    }
956
957
    /**
958
     * Accepts a string or a array of strings and returns an array of Unicode code points.
959
     *
960
     * INFO: opposite to UTF8::string()
961
     *
962
     * EXAMPLE: <code>
963
     * UTF8::codepoints('κöñ'); // array(954, 246, 241)
964
     * // ... OR ...
965
     * UTF8::codepoints('κöñ', true); // array('U+03ba', 'U+00f6', 'U+00f1')
966
     * </code>
967
     *
968
     * @param string|string[] $arg         <p>A UTF-8 encoded string or an array of such strings.</p>
969
     * @param bool            $use_u_style <p>If True, will return code points in U+xxxx format,
970
     *                                     default, code points will be returned as integers.</p>
971
     *
972
     * @psalm-pure
973
     *
974
     * @return array<int|string>
975
     *                           <p>
976
     *                           The array of code points:<br>
977
     *                           array<int> for $u_style === false<br>
978
     *                           array<string> for $u_style === true<br>
979
     *                           </p>
980
     */
981 12
    public static function codepoints($arg, bool $use_u_style = false): array
982
    {
983 12
        if (\is_string($arg)) {
984 12
            $arg = self::str_split($arg);
985
        }
986
987
        /**
988
         * @psalm-suppress DocblockTypeContradiction
989
         */
990 12
        if (!\is_array($arg)) {
0 ignored issues
show
introduced by
The condition is_array($arg) is always true.
Loading history...
991 4
            return [];
992
        }
993
994 12
        if ($arg === []) {
995 7
            return [];
996
        }
997
998 11
        $arg = \array_map(
999
            [
1000 11
                self::class,
1001
                'ord',
1002
            ],
1003 11
            $arg
1004
        );
1005
1006 11
        if ($use_u_style) {
1007 2
            $arg = \array_map(
1008
                [
1009 2
                    self::class,
1010
                    'int_to_hex',
1011
                ],
1012 2
                $arg
1013
            );
1014
        }
1015
1016 11
        return $arg;
1017
    }
1018
1019
    /**
1020
     * Trims the string and replaces consecutive whitespace characters with a
1021
     * single space. This includes tabs and newline characters, as well as
1022
     * multibyte whitespace such as the thin space and ideographic space.
1023
     *
1024
     * @param string $str <p>The input string.</p>
1025
     *
1026
     * @psalm-pure
1027
     *
1028
     * @return string
1029
     *                <p>A string with trimmed $str and condensed whitespace.</p>
1030
     */
1031 13
    public static function collapse_whitespace(string $str): string
1032
    {
1033 13
        if (self::$SUPPORT['mbstring'] === true) {
1034
            /** @noinspection PhpComposerExtensionStubsInspection */
1035 13
            return \trim((string) \mb_ereg_replace('[[:space:]]+', ' ', $str));
1036
        }
1037
1038
        return \trim(self::regex_replace($str, '[[:space:]]+', ' '));
1039
    }
1040
1041
    /**
1042
     * Returns count of characters used in a string.
1043
     *
1044
     * EXAMPLE: <code>UTF8::count_chars('κaκbκc'); // array('κ' => 3, 'a' => 1, 'b' => 1, 'c' => 1)</code>
1045
     *
1046
     * @param string $str                     <p>The input string.</p>
1047
     * @param bool   $clean_utf8              [optional] <p>Remove non UTF-8 chars from the string.</p>
1048
     * @param bool   $try_to_use_mb_functions [optional] <p>Set to false, if you don't want to use
1049
     *
1050
     * @psalm-pure
1051
     *
1052
     * @return int[]
1053
     *               <p>An associative array of Character as keys and
1054
     *               their count as values.</p>
1055
     */
1056 19
    public static function count_chars(
1057
        string $str,
1058
        bool $clean_utf8 = false,
1059
        bool $try_to_use_mb_functions = true
1060
    ): array {
1061 19
        return \array_count_values(
1062 19
            self::str_split(
1063 19
                $str,
1064 19
                1,
1065 19
                $clean_utf8,
1066 19
                $try_to_use_mb_functions
1067
            )
1068
        );
1069
    }
1070
1071
    /**
1072
     * Create a valid CSS identifier for e.g. "class"- or "id"-attributes.
1073
     *
1074
     * EXAMPLE: <code>UTF8::css_identifier('123foo/bar!!!'); // _23foo-bar</code>
1075
     *
1076
     * copy&past from https://github.com/drupal/core/blob/8.8.x/lib/Drupal/Component/Utility/Html.php#L95
1077
     *
1078
     * @param string               $str         <p>INFO: if no identifier is given e.g. " " or "", we will create a unique string automatically</p>
1079
     * @param array<string,string> $filter
1080
     * @param bool                 $stripe_tags
1081
     * @param bool                 $strtolower
1082
     *
1083
     * @psalm-pure
1084
     *
1085
     * @return string
1086
     */
1087 1
    public static function css_identifier(
1088
        string $str = '',
1089
        array $filter = [
1090
            ' ' => '-',
1091
            '/' => '-',
1092
            '[' => '',
1093
            ']' => '',
1094
        ],
1095
        bool $stripe_tags = false,
1096
        bool $strtolower = true
1097
    ): string {
1098
        // We could also use strtr() here but its much slower than str_replace(). In
1099
        // order to keep '__' to stay '__' we first replace it with a different
1100
        // placeholder after checking that it is not defined as a filter.
1101 1
        $double_underscore_replacements = 0;
1102
1103
        // Fallback ...
1104 1
        if (\trim($str) === '') {
1105 1
            $str = \uniqid('auto-generated-css-class', true);
1106
        } else {
1107 1
            $str = self::clean($str);
1108
        }
1109
1110 1
        if ($stripe_tags) {
1111
            $str = \strip_tags($str);
1112
        }
1113
1114 1
        if ($strtolower) {
1115 1
            $str = \strtolower($str);
1116
        }
1117
1118 1
        if (!isset($filter['__'])) {
1119 1
            $str = \str_replace('__', '##', $str, $double_underscore_replacements);
1120
        }
1121
1122
        /* @noinspection ArrayValuesMissUseInspection */
1123 1
        $str = \str_replace(\array_keys($filter), \array_values($filter), $str);
1124
        // Replace temporary placeholder '##' with '__' only if the original
1125
        // $identifier contained '__'.
1126 1
        if ($double_underscore_replacements > 0) {
1127
            $str = \str_replace('##', '__', $str);
1128
        }
1129
1130
        // Valid characters in a CSS identifier are:
1131
        // - the hyphen (U+002D)
1132
        // - a-z (U+0030 - U+0039)
1133
        // - A-Z (U+0041 - U+005A)
1134
        // - the underscore (U+005F)
1135
        // - 0-9 (U+0061 - U+007A)
1136
        // - ISO 10646 characters U+00A1 and higher
1137
        // We strip out any character not in the above list.
1138 1
        $str = (string) \preg_replace('/[^\x{002D}\x{0030}-\x{0039}\x{0041}-\x{005A}\x{005F}\x{0061}-\x{007A}\x{00A1}-\x{FFFF}]/u', '', $str);
1139
        // Identifiers cannot start with a digit, two hyphens, or a hyphen followed by a digit.
1140 1
        $str = (string) \preg_replace(['/^[0-9]/', '/^(-[0-9])|^(--)/'], ['_', '__'], $str);
1141
1142 1
        return \trim($str, '-');
1143
    }
1144
1145
    /**
1146
     * Remove css media-queries.
1147
     *
1148
     * @param string $str
1149
     *
1150
     * @psalm-pure
1151
     *
1152
     * @return string
1153
     */
1154 1
    public static function css_stripe_media_queries(string $str): string
1155
    {
1156 1
        return (string) \preg_replace(
1157 1
            '#@media\\s+(?:only\\s)?(?:[\\s{(]|screen|all)\\s?[^{]+{.*}\\s*}\\s*#isumU',
1158 1
            '',
1159 1
            $str
1160
        );
1161
    }
1162
1163
    /**
1164
     * Checks whether ctype is available on the server.
1165
     *
1166
     * @psalm-pure
1167
     *
1168
     * @return bool
1169
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
1170
     */
1171
    public static function ctype_loaded(): bool
1172
    {
1173
        return \extension_loaded('ctype');
1174
    }
1175
1176
    /**
1177
     * Converts an int value into a UTF-8 character.
1178
     *
1179
     * INFO: opposite to UTF8::string()
1180
     *
1181
     * EXAMPLE: <code>UTF8::decimal_to_chr(931); // 'Σ'</code>
1182
     *
1183
     * @param int|string $int
1184
     *
1185
     * @psalm-param int|numeric-string $int
1186
     *
1187
     * @psalm-pure
1188
     *
1189
     * @return string
1190
     */
1191 20
    public static function decimal_to_chr($int): string
1192
    {
1193 20
        return self::html_entity_decode('&#' . $int . ';', \ENT_QUOTES | \ENT_HTML5);
1194
    }
1195
1196
    /**
1197
     * Decodes a MIME header field
1198
     *
1199
     * @param string $str
1200
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
1201
     *
1202
     * @psalm-pure
1203
     *
1204
     * @return false|string
1205
     *                      <p>A decoded MIME field on success,
1206
     *                      or false if an error occurs during the decoding.</p>
1207
     */
1208 2
    public static function decode_mimeheader($str, string $encoding = 'UTF-8')
1209
    {
1210 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
1211 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
1212
        }
1213
1214
        // always fallback via symfony polyfill
1215 2
        return \iconv_mime_decode($str, \ICONV_MIME_DECODE_CONTINUE_ON_ERROR, $encoding);
1216
    }
1217
1218
    /**
1219
     * Convert any two-letter country code (ISO 3166-1) to the corresponding Emoji.
1220
     *
1221
     * @see https://en.wikipedia.org/wiki/ISO_3166-1
1222
     *
1223
     * @param string $country_code_iso_3166_1 <p>e.g. DE</p>
1224
     *
1225
     * @return string
1226
     *                <p>Emoji or empty string on error.</p>
1227
     */
1228 1
    public static function emoji_from_country_code(string $country_code_iso_3166_1): string
1229
    {
1230 1
        if ($country_code_iso_3166_1 === '') {
1231 1
            return '';
1232
        }
1233
1234 1
        if (self::strlen($country_code_iso_3166_1) !== 2) {
1235 1
            return '';
1236
        }
1237
1238 1
        $country_code_iso_3166_1 = \strtoupper($country_code_iso_3166_1);
1239
1240 1
        $flagOffset = 0x1F1E6;
1241 1
        $asciiOffset = 0x41;
1242
1243 1
        return (self::chr((self::ord($country_code_iso_3166_1[0]) - $asciiOffset + $flagOffset)) ?? '') .
1244 1
               (self::chr((self::ord($country_code_iso_3166_1[1]) - $asciiOffset + $flagOffset)) ?? '');
1245
    }
1246
1247
    /**
1248
     * Decodes a string which was encoded by "UTF8::emoji_encode()".
1249
     *
1250
     * INFO: opposite to UTF8::emoji_encode()
1251
     *
1252
     * EXAMPLE: <code>
1253
     * UTF8::emoji_decode('foo CHARACTER_OGRE', false); // 'foo 👹'
1254
     * //
1255
     * UTF8::emoji_decode('foo _-_PORTABLE_UTF8_-_308095726_-_627590803_-_8FTU_ELBATROP_-_', true); // 'foo 👹'
1256
     * </code>
1257
     *
1258
     * @param string $str                            <p>The input string.</p>
1259
     * @param bool   $use_reversible_string_mappings [optional] <p>
1260
     *                                               When <b>TRUE</b>, we se a reversible string mapping
1261
     *                                               between "emoji_encode" and "emoji_decode".</p>
1262
     *
1263
     * @psalm-pure
1264
     *
1265
     * @return string
1266
     */
1267 9
    public static function emoji_decode(
1268
        string $str,
1269
        bool $use_reversible_string_mappings = false
1270
    ): string {
1271 9
        self::initEmojiData();
1272
1273 9
        if ($use_reversible_string_mappings) {
1274 9
            return (string) \str_replace(
1275 9
                (array) self::$EMOJI_KEYS_REVERSIBLE_CACHE,
1276 9
                (array) self::$EMOJI_VALUES_CACHE,
1277 9
                $str
1278
            );
1279
        }
1280
1281 1
        return (string) \str_replace(
1282 1
            (array) self::$EMOJI_KEYS_CACHE,
1283 1
            (array) self::$EMOJI_VALUES_CACHE,
1284 1
            $str
1285
        );
1286
    }
1287
1288
    /**
1289
     * Encode a string with emoji chars into a non-emoji string.
1290
     *
1291
     * INFO: opposite to UTF8::emoji_decode()
1292
     *
1293
     * EXAMPLE: <code>
1294
     * UTF8::emoji_encode('foo 👹', false)); // 'foo CHARACTER_OGRE'
1295
     * //
1296
     * UTF8::emoji_encode('foo 👹', true)); // 'foo _-_PORTABLE_UTF8_-_308095726_-_627590803_-_8FTU_ELBATROP_-_'
1297
     * </code>
1298
     *
1299
     * @param string $str                            <p>The input string</p>
1300
     * @param bool   $use_reversible_string_mappings [optional] <p>
1301
     *                                               when <b>TRUE</b>, we use a reversible string mapping
1302
     *                                               between "emoji_encode" and "emoji_decode"</p>
1303
     *
1304
     * @psalm-pure
1305
     *
1306
     * @return string
1307
     */
1308 12
    public static function emoji_encode(
1309
        string $str,
1310
        bool $use_reversible_string_mappings = false
1311
    ): string {
1312 12
        self::initEmojiData();
1313
1314 12
        if ($use_reversible_string_mappings) {
1315 9
            return (string) \str_replace(
1316 9
                (array) self::$EMOJI_VALUES_CACHE,
1317 9
                (array) self::$EMOJI_KEYS_REVERSIBLE_CACHE,
1318 9
                $str
1319
            );
1320
        }
1321
1322 4
        return (string) \str_replace(
1323 4
            (array) self::$EMOJI_VALUES_CACHE,
1324 4
            (array) self::$EMOJI_KEYS_CACHE,
1325 4
            $str
1326
        );
1327
    }
1328
1329
    /**
1330
     * Encode a string with a new charset-encoding.
1331
     *
1332
     * INFO:  This function will also try to fix broken / double encoding,
1333
     *        so you can call this function also on a UTF-8 string and you don't mess up the string.
1334
     *
1335
     * EXAMPLE: <code>
1336
     * UTF8::encode('ISO-8859-1', '-ABC-中文空白-'); // '-ABC-????-'
1337
     * //
1338
     * UTF8::encode('UTF-8', '-ABC-中文空白-'); // '-ABC-中文空白-'
1339
     * //
1340
     * UTF8::encode('HTML', '-ABC-中文空白-'); // '-ABC-&#20013;&#25991;&#31354;&#30333;-'
1341
     * //
1342
     * UTF8::encode('BASE64', '-ABC-中文空白-'); // 'LUFCQy3kuK3mlofnqbrnmb0t'
1343
     * </code>
1344
     *
1345
     * @param string $to_encoding                   <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.</p>
1346
     * @param string $str                           <p>The input string</p>
1347
     * @param bool   $auto_detect_the_from_encoding [optional] <p>Force the new encoding (we try to fix broken / double
1348
     *                                              encoding for UTF-8)<br> otherwise we auto-detect the current
1349
     *                                              string-encoding</p>
1350
     * @param string $from_encoding                 [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
1351
     *                                              A empty string will trigger the autodetect anyway.</p>
1352
     *
1353
     * @psalm-pure
1354
     *
1355
     * @return string
1356
     *
1357
     * @psalm-suppress InvalidReturnStatement
1358
     */
1359 29
    public static function encode(
1360
        string $to_encoding,
1361
        string $str,
1362
        bool $auto_detect_the_from_encoding = true,
1363
        string $from_encoding = ''
1364
    ): string {
1365 29
        if ($str === '' || $to_encoding === '') {
1366 13
            return $str;
1367
        }
1368
1369 29
        if ($to_encoding !== 'UTF-8' && $to_encoding !== 'CP850') {
1370 7
            $to_encoding = self::normalize_encoding($to_encoding, 'UTF-8');
1371
        }
1372
1373 29
        if ($from_encoding && $from_encoding !== 'UTF-8' && $from_encoding !== 'CP850') {
1374 2
            $from_encoding = self::normalize_encoding($from_encoding);
1375
        }
1376
1377
        if (
1378 29
            $to_encoding
1379
            &&
1380 29
            $from_encoding
1381
            &&
1382 29
            $from_encoding === $to_encoding
1383
        ) {
1384
            return $str;
1385
        }
1386
1387 29
        if ($to_encoding === 'JSON') {
1388 1
            $return = self::json_encode($str);
1389 1
            if ($return === false) {
1390
                throw new \InvalidArgumentException('The input string [' . $str . '] can not be used for json_encode().');
1391
            }
1392
1393 1
            return $return;
1394
        }
1395 29
        if ($from_encoding === 'JSON') {
1396 1
            $str = self::json_decode($str);
1397 1
            $from_encoding = '';
1398
        }
1399
1400 29
        if ($to_encoding === 'BASE64') {
1401 2
            return \base64_encode($str);
1402
        }
1403 29
        if ($from_encoding === 'BASE64') {
1404 2
            $str = \base64_decode($str, true);
1405 2
            $from_encoding = '';
1406
        }
1407
1408 29
        if ($to_encoding === 'HTML-ENTITIES') {
1409 2
            return self::html_encode($str, true);
1410
        }
1411 29
        if ($from_encoding === 'HTML-ENTITIES') {
1412 2
            $str = self::html_entity_decode($str, \ENT_COMPAT);
1413 2
            $from_encoding = '';
1414
        }
1415
1416 29
        $from_encoding_auto_detected = false;
1417
        if (
1418 29
            $auto_detect_the_from_encoding
1419
            ||
1420 29
            !$from_encoding
1421
        ) {
1422 29
            $from_encoding_auto_detected = self::str_detect_encoding($str);
1423
        }
1424
1425
        // DEBUG
1426
        //var_dump($to_encoding, $from_encoding, $from_encoding_auto_detected, $str, "\n\n");
1427
1428 29
        if ($from_encoding_auto_detected !== false) {
1429
            /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
1430 25
            $from_encoding = $from_encoding_auto_detected;
1431 7
        } elseif ($auto_detect_the_from_encoding) {
1432
            // fallback for the "autodetect"-mode
1433 7
            return self::to_utf8($str);
1434
        }
1435
1436
        if (
1437 25
            !$from_encoding
1438
            ||
1439 25
            $from_encoding === $to_encoding
1440
        ) {
1441 15
            return $str;
1442
        }
1443
1444
        if (
1445 20
            $to_encoding === 'UTF-8'
1446
            &&
1447
            (
1448 18
                $from_encoding === 'WINDOWS-1252'
1449
                ||
1450 20
                $from_encoding === 'ISO-8859-1'
1451
            )
1452
        ) {
1453 14
            return self::to_utf8($str);
1454
        }
1455
1456
        if (
1457 12
            $to_encoding === 'ISO-8859-1'
1458
            &&
1459
            (
1460 6
                $from_encoding === 'WINDOWS-1252'
1461
                ||
1462 12
                $from_encoding === 'UTF-8'
1463
            )
1464
        ) {
1465 6
            return self::to_iso8859($str);
1466
        }
1467
1468
        if (
1469 10
            $to_encoding !== 'UTF-8'
1470
            &&
1471 10
            $to_encoding !== 'ISO-8859-1'
1472
            &&
1473 10
            $to_encoding !== 'WINDOWS-1252'
1474
            &&
1475 10
            self::$SUPPORT['mbstring'] === false
1476
        ) {
1477
            /**
1478
             * @psalm-suppress ImpureFunctionCall - is is only a warning
1479
             */
1480
            \trigger_error('UTF8::encode() without mbstring cannot handle "' . $to_encoding . '" encoding', \E_USER_WARNING);
1481
        }
1482
1483 10
        if (self::$SUPPORT['mbstring'] === true) {
1484
            // warning: do not use the symfony polyfill here
1485 10
            $str_encoded = \mb_convert_encoding(
1486 10
                $str,
1487 10
                $to_encoding,
1488 10
                $from_encoding
1489
            );
1490
1491 10
            if ($str_encoded) {
1492 10
                return $str_encoded;
1493
            }
1494
        }
1495
1496
        /** @noinspection PhpUsageOfSilenceOperatorInspection - Detected an incomplete multibyte character in input string */
1497
        $return = @\iconv($from_encoding, $to_encoding, $str);
1498
        if ($return !== false) {
1499
            return $return;
1500
        }
1501
1502
        return $str;
1503
    }
1504
1505
    /**
1506
     * @param string $str
1507
     * @param string $from_charset      [optional] <p>Set the input charset.</p>
1508
     * @param string $to_charset        [optional] <p>Set the output charset.</p>
1509
     * @param string $transfer_encoding [optional] <p>Set the transfer encoding.</p>
1510
     * @param string $linefeed          [optional] <p>Set the used linefeed.</p>
1511
     * @param int    $indent            [optional] <p>Set the max length indent.</p>
1512
     *
1513
     * @psalm-pure
1514
     *
1515
     * @return false|string
1516
     *                      <p>An encoded MIME field on success,
1517
     *                      or false if an error occurs during the encoding.</p>
1518
     */
1519 1
    public static function encode_mimeheader(
1520
        string $str,
1521
        string $from_charset = 'UTF-8',
1522
        string $to_charset = 'UTF-8',
1523
        string $transfer_encoding = 'Q',
1524
        string $linefeed = "\r\n",
1525
        int $indent = 76
1526
    ) {
1527 1
        if ($from_charset !== 'UTF-8' && $from_charset !== 'CP850') {
1528
            $from_charset = self::normalize_encoding($from_charset, 'UTF-8');
1529
        }
1530
1531 1
        if ($to_charset !== 'UTF-8' && $to_charset !== 'CP850') {
1532 1
            $to_charset = self::normalize_encoding($to_charset, 'UTF-8');
1533
        }
1534
1535
        // always fallback via symfony polyfill
1536 1
        return \iconv_mime_encode(
1537 1
            '',
1538 1
            $str,
1539
            [
1540 1
                'scheme'           => $transfer_encoding,
1541 1
                'line-length'      => $indent,
1542 1
                'input-charset'    => $from_charset,
1543 1
                'output-charset'   => $to_charset,
1544 1
                'line-break-chars' => $linefeed,
1545
            ]
1546
        );
1547
    }
1548
1549
    /**
1550
     * Create an extract from a sentence, so if the search-string was found, it try to centered in the output.
1551
     *
1552
     * @param string   $str                       <p>The input string.</p>
1553
     * @param string   $search                    <p>The searched string.</p>
1554
     * @param int|null $length                    [optional] <p>Default: null === text->length / 2</p>
1555
     * @param string   $replacer_for_skipped_text [optional] <p>Default: …</p>
1556
     * @param string   $encoding                  [optional] <p>Set the charset for e.g. "mb_" function</p>
1557
     *
1558
     * @psalm-pure
1559
     *
1560
     * @return string
1561
     */
1562 1
    public static function extract_text(
1563
        string $str,
1564
        string $search = '',
1565
        int $length = null,
1566
        string $replacer_for_skipped_text = '…',
1567
        string $encoding = 'UTF-8'
1568
    ): string {
1569 1
        if ($str === '') {
1570 1
            return '';
1571
        }
1572
1573 1
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
1574
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
1575
        }
1576
1577 1
        $trim_chars = "\t\r\n -_()!~?=+/*\\,.:;\"'[]{}`&";
1578
1579 1
        if ($length === null) {
1580 1
            $length = (int) \round((int) self::strlen($str, $encoding) / 2);
1581
        }
1582
1583 1
        if ($search === '') {
1584 1
            if ($encoding === 'UTF-8') {
1585 1
                if ($length > 0) {
1586 1
                    $string_length = (int) \mb_strlen($str);
1587 1
                    $end = ($length - 1) > $string_length ? $string_length : ($length - 1);
1588
                } else {
1589 1
                    $end = 0;
1590
                }
1591
1592 1
                $pos = (int) \min(
1593 1
                    \mb_strpos($str, ' ', $end),
1594 1
                    \mb_strpos($str, '.', $end)
1595
                );
1596
            } else {
1597
                if ($length > 0) {
1598
                    $string_length = (int) self::strlen($str, $encoding);
1599
                    $end = ($length - 1) > $string_length ? $string_length : ($length - 1);
1600
                } else {
1601
                    $end = 0;
1602
                }
1603
1604
                $pos = (int) \min(
1605
                    self::strpos($str, ' ', $end, $encoding),
1606
                    self::strpos($str, '.', $end, $encoding)
1607
                );
1608
            }
1609
1610 1
            if ($pos) {
1611 1
                if ($encoding === 'UTF-8') {
1612 1
                    $str_sub = \mb_substr($str, 0, $pos);
1613
                } else {
1614
                    $str_sub = self::substr($str, 0, $pos, $encoding);
1615
                }
1616
1617 1
                if ($str_sub === false) {
1618
                    return '';
1619
                }
1620
1621 1
                return \rtrim($str_sub, $trim_chars) . $replacer_for_skipped_text;
1622
            }
1623
1624
            return $str;
1625
        }
1626
1627 1
        if ($encoding === 'UTF-8') {
1628 1
            $word_position = (int) \mb_stripos($str, $search);
1629 1
            $half_side = (int) ($word_position - $length / 2 + (int) \mb_strlen($search) / 2);
1630
        } else {
1631
            $word_position = (int) self::stripos($str, $search, 0, $encoding);
1632
            $half_side = (int) ($word_position - $length / 2 + (int) self::strlen($search, $encoding) / 2);
1633
        }
1634
1635 1
        $pos_start = 0;
1636 1
        if ($half_side > 0) {
1637 1
            if ($encoding === 'UTF-8') {
1638 1
                $half_text = \mb_substr($str, 0, $half_side);
1639
            } else {
1640
                $half_text = self::substr($str, 0, $half_side, $encoding);
1641
            }
1642 1
            if ($half_text !== false) {
1643 1
                if ($encoding === 'UTF-8') {
1644 1
                    $pos_start = (int) \max(
1645 1
                        \mb_strrpos($half_text, ' '),
1646 1
                        \mb_strrpos($half_text, '.')
1647
                    );
1648
                } else {
1649
                    $pos_start = (int) \max(
1650
                        self::strrpos($half_text, ' ', 0, $encoding),
1651
                        self::strrpos($half_text, '.', 0, $encoding)
1652
                    );
1653
                }
1654
            }
1655
        }
1656
1657 1
        if ($word_position && $half_side > 0) {
1658 1
            $offset = $pos_start + $length - 1;
1659 1
            $real_length = (int) self::strlen($str, $encoding);
1660
1661 1
            if ($offset > $real_length) {
1662
                $offset = $real_length;
1663
            }
1664
1665 1
            if ($encoding === 'UTF-8') {
1666 1
                $pos_end = (int) \min(
1667 1
                    \mb_strpos($str, ' ', $offset),
1668 1
                    \mb_strpos($str, '.', $offset)
1669 1
                ) - $pos_start;
1670
            } else {
1671
                $pos_end = (int) \min(
1672
                    self::strpos($str, ' ', $offset, $encoding),
1673
                    self::strpos($str, '.', $offset, $encoding)
1674
                ) - $pos_start;
1675
            }
1676
1677 1
            if (!$pos_end || $pos_end <= 0) {
1678 1
                if ($encoding === 'UTF-8') {
1679 1
                    $str_sub = \mb_substr($str, $pos_start, (int) \mb_strlen($str));
1680
                } else {
1681
                    $str_sub = self::substr($str, $pos_start, (int) self::strlen($str, $encoding), $encoding);
1682
                }
1683 1
                if ($str_sub !== false) {
1684 1
                    $extract = $replacer_for_skipped_text . \ltrim($str_sub, $trim_chars);
1685
                } else {
1686 1
                    $extract = '';
1687
                }
1688
            } else {
1689 1
                if ($encoding === 'UTF-8') {
1690 1
                    $str_sub = \mb_substr($str, $pos_start, $pos_end);
1691
                } else {
1692
                    $str_sub = self::substr($str, $pos_start, $pos_end, $encoding);
1693
                }
1694 1
                if ($str_sub !== false) {
1695 1
                    $extract = $replacer_for_skipped_text . \trim($str_sub, $trim_chars) . $replacer_for_skipped_text;
1696
                } else {
1697 1
                    $extract = '';
1698
                }
1699
            }
1700
        } else {
1701 1
            $offset = $length - 1;
1702 1
            $true_length = (int) self::strlen($str, $encoding);
1703
1704 1
            if ($offset > $true_length) {
1705
                $offset = $true_length;
1706
            }
1707
1708 1
            if ($encoding === 'UTF-8') {
1709 1
                $pos_end = (int) \min(
1710 1
                    \mb_strpos($str, ' ', $offset),
1711 1
                    \mb_strpos($str, '.', $offset)
1712
                );
1713
            } else {
1714
                $pos_end = (int) \min(
1715
                    self::strpos($str, ' ', $offset, $encoding),
1716
                    self::strpos($str, '.', $offset, $encoding)
1717
                );
1718
            }
1719
1720 1
            if ($pos_end) {
1721 1
                if ($encoding === 'UTF-8') {
1722 1
                    $str_sub = \mb_substr($str, 0, $pos_end);
1723
                } else {
1724
                    $str_sub = self::substr($str, 0, $pos_end, $encoding);
1725
                }
1726 1
                if ($str_sub !== false) {
1727 1
                    $extract = \rtrim($str_sub, $trim_chars) . $replacer_for_skipped_text;
1728
                } else {
1729 1
                    $extract = '';
1730
                }
1731
            } else {
1732 1
                $extract = $str;
1733
            }
1734
        }
1735
1736 1
        return $extract;
1737
    }
1738
1739
    /**
1740
     * Reads entire file into a string.
1741
     *
1742
     * EXAMPLE: <code>UTF8::file_get_contents('utf16le.txt'); // ...</code>
1743
     *
1744
     * WARNING: Do not use UTF-8 Option ($convert_to_utf8) for binary files (e.g.: images) !!!
1745
     *
1746
     * @see http://php.net/manual/en/function.file-get-contents.php
1747
     *
1748
     * @param string        $filename         <p>
1749
     *                                        Name of the file to read.
1750
     *                                        </p>
1751
     * @param bool          $use_include_path [optional] <p>
1752
     *                                        Prior to PHP 5, this parameter is called
1753
     *                                        use_include_path and is a bool.
1754
     *                                        As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
1755
     *                                        to trigger include path
1756
     *                                        search.
1757
     *                                        </p>
1758
     * @param resource|null $context          [optional] <p>
1759
     *                                        A valid context resource created with
1760
     *                                        stream_context_create. If you don't need to use a
1761
     *                                        custom context, you can skip this parameter by &null;.
1762
     *                                        </p>
1763
     * @param int|null      $offset           [optional] <p>
1764
     *                                        The offset where the reading starts.
1765
     *                                        </p>
1766
     * @param int|null      $max_length       [optional] <p>
1767
     *                                        Maximum length of data read. The default is to read until end
1768
     *                                        of file is reached.
1769
     *                                        </p>
1770
     * @param int           $timeout          <p>The time in seconds for the timeout.</p>
1771
     * @param bool          $convert_to_utf8  <strong>WARNING!!!</strong> <p>Maybe you can't use this option for
1772
     *                                        some files, because they used non default utf-8 chars. Binary files
1773
     *                                        like images or pdf will not be converted.</p>
1774
     * @param string        $from_encoding    [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
1775
     *                                        A empty string will trigger the autodetect anyway.</p>
1776
     *
1777
     * @psalm-pure
1778
     *
1779
     * @return false|string
1780
     *                      <p>The function returns the read data as string or <b>false</b> on failure.</p>
1781
     *
1782
     * @noinspection PhpTooManyParametersInspection
1783
     */
1784 12
    public static function file_get_contents(
1785
        string $filename,
1786
        bool $use_include_path = false,
1787
        $context = null,
1788
        int $offset = null,
1789
        int $max_length = null,
1790
        int $timeout = 10,
1791
        bool $convert_to_utf8 = true,
1792
        string $from_encoding = ''
1793
    ) {
1794
        // init
1795 12
        $filename = \filter_var($filename, \FILTER_SANITIZE_STRING);
1796
        /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
1797 12
        if ($filename === false) {
1798
            return false;
1799
        }
1800
1801 12
        if ($timeout && $context === null) {
1802 9
            $context = \stream_context_create(
1803
                [
1804
                    'http' => [
1805 9
                        'timeout' => $timeout,
1806
                    ],
1807
                ]
1808
            );
1809
        }
1810
1811 12
        if ($offset === null) {
1812 12
            $offset = 0;
1813
        }
1814
1815 12
        if (\is_int($max_length)) {
1816 2
            $data = \file_get_contents($filename, $use_include_path, $context, $offset, $max_length);
1817
        } else {
1818 12
            $data = \file_get_contents($filename, $use_include_path, $context, $offset);
1819
        }
1820
1821
        // return false on error
1822 12
        if ($data === false) {
1823
            return false;
1824
        }
1825
1826 12
        if ($convert_to_utf8) {
1827
            if (
1828 12
                !self::is_binary($data, true)
1829
                ||
1830 9
                self::is_utf16($data, false) !== false
1831
                ||
1832 12
                self::is_utf32($data, false) !== false
1833
            ) {
1834 9
                $data = self::encode('UTF-8', $data, false, $from_encoding);
1835 9
                $data = self::cleanup($data);
1836
            }
1837
        }
1838
1839 12
        return $data;
1840
    }
1841
1842
    /**
1843
     * Checks if a file starts with BOM (Byte Order Mark) character.
1844
     *
1845
     * EXAMPLE: <code>UTF8::file_has_bom('utf8_with_bom.txt'); // true</code>
1846
     *
1847
     * @param string $file_path <p>Path to a valid file.</p>
1848
     *
1849
     * @throws \RuntimeException if file_get_contents() returned false
1850
     *
1851
     * @return bool
1852
     *              <p><strong>true</strong> if the file has BOM at the start, <strong>false</strong> otherwise</p>
1853
     *
1854
     * @psalm-pure
1855
     */
1856 2
    public static function file_has_bom(string $file_path): bool
1857
    {
1858 2
        $file_content = \file_get_contents($file_path);
1859 2
        if ($file_content === false) {
1860
            throw new \RuntimeException('file_get_contents() returned false for:' . $file_path);
1861
        }
1862
1863 2
        return self::string_has_bom($file_content);
1864
    }
1865
1866
    /**
1867
     * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1868
     *
1869
     * EXAMPLE: <code>UTF8::filter(array("\xE9", 'à', 'a')); // array('é', 'à', 'a')</code>
1870
     *
1871
     * @param array|object|string $var
1872
     * @param int                 $normalization_form
1873
     * @param string              $leading_combining
1874
     *
1875
     * @psalm-pure
1876
     *
1877
     * @return mixed
1878
     *
1879
     * @template TFilter
1880
     * @psalm-param TFilter $var
1881
     * @psalm-return TFilter
1882
     */
1883 65
    public static function filter(
1884
        $var,
1885
        int $normalization_form = \Normalizer::NFC,
1886
        string $leading_combining = '◌'
1887
    ) {
1888 65
        switch (\gettype($var)) {
1889 65
            case 'object':
1890 65
            case 'array':
1891 6
                foreach ($var as $k => &$v) {
1892 6
                    $v = self::filter($v, $normalization_form, $leading_combining);
1893
                }
1894 6
                unset($v);
1895
1896 6
                break;
1897 65
            case 'string':
1898
1899 63
                if (\strpos($var, "\r") !== false) {
1900 3
                    $var = self::normalize_line_ending($var);
1901
                }
1902
1903 63
                if (!ASCII::is_ascii($var)) {
1904 33
                    if (\Normalizer::isNormalized($var, $normalization_form)) {
1905 27
                        $n = '-';
1906
                    } else {
1907 13
                        $n = \Normalizer::normalize($var, $normalization_form);
1908
1909 13
                        if (isset($n[0])) {
1910 7
                            $var = $n;
1911
                        } else {
1912 9
                            $var = self::encode('UTF-8', $var);
1913
                        }
1914
                    }
1915
1916 33
                    \assert(\is_string($var));
1917
                    if (
1918 33
                        $var[0] >= "\x80"
1919
                        &&
1920 33
                        isset($n[0], $leading_combining[0])
1921
                        &&
1922 33
                        \preg_match('/^\\p{Mn}/u', $var)
1923
                    ) {
1924
                        // Prevent leading combining chars
1925
                        // for NFC-safe concatenations.
1926 3
                        $var = $leading_combining . $var;
1927
                    }
1928
                }
1929
1930 63
                break;
1931
            default:
1932
                // nothing
1933
        }
1934
1935
        /** @noinspection PhpSillyAssignmentInspection */
1936
        /** @psalm-var TFilter $var */
1937 65
        $var = $var;
1938
1939 65
        return $var;
1940
    }
1941
1942
    /**
1943
     * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1944
     *
1945
     * Gets a specific external variable by name and optionally filters it.
1946
     *
1947
     * EXAMPLE: <code>
1948
     * // _GET['foo'] = 'bar';
1949
     * UTF8::filter_input(INPUT_GET, 'foo', FILTER_SANITIZE_STRING)); // 'bar'
1950
     * </code>
1951
     *
1952
     * @see http://php.net/manual/en/function.filter-input.php
1953
     *
1954
     * @param int       $type          <p>
1955
     *                                 One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1956
     *                                 <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1957
     *                                 <b>INPUT_ENV</b>.
1958
     *                                 </p>
1959
     * @param string    $variable_name <p>
1960
     *                                 Name of a variable to get.
1961
     *                                 </p>
1962
     * @param int       $filter        [optional] <p>
1963
     *                                 The ID of the filter to apply. The
1964
     *                                 manual page lists the available filters.
1965
     *                                 </p>
1966
     * @param array|int $options       [optional] <p>
1967
     *                                 Associative array of options or bitwise disjunction of flags. If filter
1968
     *                                 accepts options, flags can be provided in "flags" field of array.
1969
     *                                 </p>
1970
     *
1971
     * @psalm-pure
1972
     *
1973
     * @return mixed
1974
     *               <p>
1975
     *               Value of the requested variable on success, <b>FALSE</b> if the filter fails, or <b>NULL</b> if the
1976
     *               <i>variable_name</i> variable is not set. If the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it
1977
     *               returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter fails.
1978
     *               </p>
1979
     */
1980 1
    public static function filter_input(
1981
        int $type,
1982
        string $variable_name,
1983
        int $filter = \FILTER_DEFAULT,
1984
        $options = null
1985
    ) {
1986
        /**
1987
         * @psalm-suppress ImpureFunctionCall - we use func_num_args only for args count matching here
1988
         */
1989 1
        if ($options === null || \func_num_args() < 4) {
1990 1
            $var = \filter_input($type, $variable_name, $filter);
1991
        } else {
1992
            $var = \filter_input($type, $variable_name, $filter, $options);
1993
        }
1994
1995 1
        return self::filter($var);
1996
    }
1997
1998
    /**
1999
     * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2000
     *
2001
     * Gets external variables and optionally filters them.
2002
     *
2003
     * EXAMPLE: <code>
2004
     * // _GET['foo'] = 'bar';
2005
     * UTF8::filter_input_array(INPUT_GET, array('foo' => 'FILTER_SANITIZE_STRING')); // array('bar')
2006
     * </code>
2007
     *
2008
     * @see http://php.net/manual/en/function.filter-input-array.php
2009
     *
2010
     * @param int        $type       <p>
2011
     *                               One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
2012
     *                               <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
2013
     *                               <b>INPUT_ENV</b>.
2014
     *                               </p>
2015
     * @param array|null $definition [optional] <p>
2016
     *                               An array defining the arguments. A valid key is a string
2017
     *                               containing a variable name and a valid value is either a filter type, or an array
2018
     *                               optionally specifying the filter, flags and options. If the value is an
2019
     *                               array, valid keys are filter which specifies the
2020
     *                               filter type,
2021
     *                               flags which specifies any flags that apply to the
2022
     *                               filter, and options which specifies any options that
2023
     *                               apply to the filter. See the example below for a better understanding.
2024
     *                               </p>
2025
     *                               <p>
2026
     *                               This parameter can be also an integer holding a filter constant. Then all values in the
2027
     *                               input array are filtered by this filter.
2028
     *                               </p>
2029
     * @param bool       $add_empty  [optional] <p>
2030
     *                               Add missing keys as <b>NULL</b> to the return value.
2031
     *                               </p>
2032
     *
2033
     * @psalm-pure
2034
     *
2035
     * @return mixed
2036
     *               <p>
2037
     *               An array containing the values of the requested variables on success, or <b>FALSE</b> on failure.
2038
     *               An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not
2039
     *               set. Or if the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it returns <b>FALSE</b> if the variable
2040
     *               is not set and <b>NULL</b> if the filter fails.
2041
     *               </p>
2042
     */
2043 1
    public static function filter_input_array(
2044
        int $type,
2045
        $definition = null,
2046
        bool $add_empty = true
2047
    ) {
2048
        /**
2049
         * @psalm-suppress ImpureFunctionCall - we use func_num_args only for args count matching here
2050
         */
2051 1
        if ($definition === null || \func_num_args() < 2) {
2052
            $a = \filter_input_array($type);
2053
        } else {
2054 1
            $a = \filter_input_array($type, $definition, $add_empty);
2055
        }
2056
2057 1
        return self::filter($a);
2058
    }
2059
2060
    /**
2061
     * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2062
     *
2063
     * Filters a variable with a specified filter.
2064
     *
2065
     * EXAMPLE: <code>UTF8::filter_var('-ABC-中文空白-', FILTER_VALIDATE_URL); // false</code>
2066
     *
2067
     * @see http://php.net/manual/en/function.filter-var.php
2068
     *
2069
     * @param float|int|string|null $variable <p>
2070
     *                                        Value to filter.
2071
     *                                        </p>
2072
     * @param int                   $filter   [optional] <p>
2073
     *                                        The ID of the filter to apply. The
2074
     *                                        manual page lists the available filters.
2075
     *                                        </p>
2076
     * @param array|int             $options  [optional] <p>
2077
     *                                        Associative array of options or bitwise disjunction of flags. If filter
2078
     *                                        accepts options, flags can be provided in "flags" field of array. For
2079
     *                                        the "callback" filter, callable type should be passed. The
2080
     *                                        callback must accept one argument, the value to be filtered, and return
2081
     *                                        the value after filtering/sanitizing it.
2082
     *                                        </p>
2083
     *                                        <p>
2084
     *                                        <code>
2085
     *                                        // for filters that accept options, use this format
2086
     *                                        $options = array(
2087
     *                                        'options' => array(
2088
     *                                        'default' => 3, // value to return if the filter fails
2089
     *                                        // other options here
2090
     *                                        'min_range' => 0
2091
     *                                        ),
2092
     *                                        'flags' => FILTER_FLAG_ALLOW_OCTAL,
2093
     *                                        );
2094
     *                                        $var = filter_var('0755', FILTER_VALIDATE_INT, $options);
2095
     *                                        // for filter that only accept flags, you can pass them directly
2096
     *                                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);
2097
     *                                        // for filter that only accept flags, you can also pass as an array
2098
     *                                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN,
2099
     *                                        array('flags' => FILTER_NULL_ON_FAILURE));
2100
     *                                        // callback validate filter
2101
     *                                        function foo($value)
2102
     *                                        {
2103
     *                                        // Expected format: Surname, GivenNames
2104
     *                                        if (strpos($value, ", ") === false) return false;
2105
     *                                        list($surname, $givennames) = explode(", ", $value, 2);
2106
     *                                        $empty = (empty($surname) || empty($givennames));
2107
     *                                        $notstrings = (!is_string($surname) || !is_string($givennames));
2108
     *                                        if ($empty || $notstrings) {
2109
     *                                        return false;
2110
     *                                        } else {
2111
     *                                        return $value;
2112
     *                                        }
2113
     *                                        }
2114
     *                                        $var = filter_var('Doe, Jane Sue', FILTER_CALLBACK, array('options' => 'foo'));
2115
     *                                        </code>
2116
     *                                        </p>
2117
     *
2118
     * @psalm-pure
2119
     *
2120
     * @return mixed
2121
     *               <p>The filtered data, or <b>FALSE</b> if the filter fails.</p>
2122
     */
2123 2
    public static function filter_var(
2124
        $variable,
2125
        int $filter = \FILTER_DEFAULT,
2126
        $options = null
2127
    ) {
2128
        /**
2129
         * @psalm-suppress ImpureFunctionCall - we use func_num_args only for args count matching here
2130
         */
2131 2
        if (\func_num_args() < 3) {
2132 2
            $variable = \filter_var($variable, $filter);
2133
        } else {
2134 2
            $variable = \filter_var($variable, $filter, $options);
2135
        }
2136
2137 2
        return self::filter($variable);
2138
    }
2139
2140
    /**
2141
     * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2142
     *
2143
     * Gets multiple variables and optionally filters them.
2144
     *
2145
     * EXAMPLE: <code>
2146
     * $filters = [
2147
     *     'name'  => ['filter'  => FILTER_CALLBACK, 'options' => [UTF8::class, 'ucwords']],
2148
     *     'age'   => ['filter'  => FILTER_VALIDATE_INT, 'options' => ['min_range' => 1, 'max_range' => 120]],
2149
     *     'email' => FILTER_VALIDATE_EMAIL,
2150
     * ];
2151
     *
2152
     * $data = [
2153
     *     'name' => 'κόσμε',
2154
     *     'age' => '18',
2155
     *     'email' => '[email protected]'
2156
     * ];
2157
     *
2158
     * UTF8::filter_var_array($data, $filters, true); // ['name' => 'Κόσμε', 'age' => 18, 'email' => '[email protected]']
2159
     * </code>
2160
     *
2161
     * @see http://php.net/manual/en/function.filter-var-array.php
2162
     *
2163
     * @param array<mixed> $data       <p>
2164
     *                                 An array with string keys containing the data to filter.
2165
     *                                 </p>
2166
     * @param array|int    $definition [optional] <p>
2167
     *                                 An array defining the arguments. A valid key is a string
2168
     *                                 containing a variable name and a valid value is either a
2169
     *                                 filter type, or an
2170
     *                                 array optionally specifying the filter, flags and options.
2171
     *                                 If the value is an array, valid keys are filter
2172
     *                                 which specifies the filter type,
2173
     *                                 flags which specifies any flags that apply to the
2174
     *                                 filter, and options which specifies any options that
2175
     *                                 apply to the filter. See the example below for a better understanding.
2176
     *                                 </p>
2177
     *                                 <p>
2178
     *                                 This parameter can be also an integer holding a filter constant. Then all values
2179
     *                                 in the input array are filtered by this filter.
2180
     *                                 </p>
2181
     * @param bool         $add_empty  [optional] <p>
2182
     *                                 Add missing keys as <b>NULL</b> to the return value.
2183
     *                                 </p>
2184
     *
2185
     * @psalm-pure
2186
     *
2187
     * @return mixed
2188
     *               <p>
2189
     *               An array containing the values of the requested variables on success, or <b>FALSE</b> on failure.
2190
     *               An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not
2191
     *               set.
2192
     *               </p>
2193
     */
2194 2
    public static function filter_var_array(
2195
        array $data,
2196
        $definition = null,
2197
        bool $add_empty = true
2198
    ) {
2199
        /**
2200
         * @psalm-suppress ImpureFunctionCall - we use func_num_args only for args count matching here
2201
         */
2202 2
        if (\func_num_args() < 2) {
2203 2
            $a = \filter_var_array($data);
2204
        } else {
2205 2
            $a = \filter_var_array($data, $definition, $add_empty);
2206
        }
2207
2208 2
        return self::filter($a);
2209
    }
2210
2211
    /**
2212
     * Checks whether finfo is available on the server.
2213
     *
2214
     * @psalm-pure
2215
     *
2216
     * @return bool
2217
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
2218
     */
2219
    public static function finfo_loaded(): bool
2220
    {
2221
        return \class_exists('finfo');
2222
    }
2223
2224
    /**
2225
     * Returns the first $n characters of the string.
2226
     *
2227
     * @param string $str      <p>The input string.</p>
2228
     * @param int    $n        <p>Number of characters to retrieve from the start.</p>
2229
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
2230
     *
2231
     * @psalm-pure
2232
     *
2233
     * @return string
2234
     */
2235 13
    public static function first_char(
2236
        string $str,
2237
        int $n = 1,
2238
        string $encoding = 'UTF-8'
2239
    ): string {
2240 13
        if ($str === '' || $n <= 0) {
2241 5
            return '';
2242
        }
2243
2244 8
        if ($encoding === 'UTF-8') {
2245 4
            return (string) \mb_substr($str, 0, $n);
2246
        }
2247
2248 4
        return (string) self::substr($str, 0, $n, $encoding);
2249
    }
2250
2251
    /**
2252
     * Check if the number of Unicode characters isn't greater than the specified integer.
2253
     *
2254
     * EXAMPLE: <code>UTF8::fits_inside('κόσμε', 6); // false</code>
2255
     *
2256
     * @param string $str      the original string to be checked
2257
     * @param int    $box_size the size in number of chars to be checked against string
2258
     *
2259
     * @psalm-pure
2260
     *
2261
     * @return bool
2262
     *              <p><strong>TRUE</strong> if string is less than or equal to $box_size, <strong>FALSE</strong> otherwise.</p>
2263
     */
2264 2
    public static function fits_inside(string $str, int $box_size): bool
2265
    {
2266 2
        return (int) self::strlen($str) <= $box_size;
2267
    }
2268
2269
    /**
2270
     * Try to fix simple broken UTF-8 strings.
2271
     *
2272
     * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
2273
     *
2274
     * EXAMPLE: <code>UTF8::fix_simple_utf8('Düsseldorf'); // 'Düsseldorf'</code>
2275
     *
2276
     * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
2277
     * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
2278
     * See: http://en.wikipedia.org/wiki/Windows-1252
2279
     *
2280
     * @param string $str <p>The input string</p>
2281
     *
2282
     * @psalm-pure
2283
     *
2284
     * @return string
2285
     */
2286 47
    public static function fix_simple_utf8(string $str): string
2287
    {
2288 47
        if ($str === '') {
2289 4
            return '';
2290
        }
2291
2292
        /**
2293
         * @psalm-suppress ImpureStaticVariable
2294
         *
2295
         * @var array<mixed>|null
2296
         */
2297 47
        static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null;
2298
2299
        /**
2300
         * @psalm-suppress ImpureStaticVariable
2301
         *
2302
         * @var array<mixed>|null
2303
         */
2304 47
        static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null;
2305
2306 47
        if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) {
2307 1
            if (self::$BROKEN_UTF8_FIX === null) {
2308 1
                self::$BROKEN_UTF8_FIX = self::getData('utf8_fix');
2309
            }
2310
2311 1
            $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = \array_keys(self::$BROKEN_UTF8_FIX);
2312 1
            $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = self::$BROKEN_UTF8_FIX;
2313
        }
2314
2315 47
        \assert(\is_array($BROKEN_UTF8_TO_UTF8_VALUES_CACHE));
2316
2317 47
        return \str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str);
2318
    }
2319
2320
    /**
2321
     * Fix a double (or multiple) encoded UTF8 string.
2322
     *
2323
     * EXAMPLE: <code>UTF8::fix_utf8('Fédération'); // 'Fédération'</code>
2324
     *
2325
     * @param string|string[] $str you can use a string or an array of strings
2326
     *
2327
     * @psalm-pure
2328
     *
2329
     * @return string|string[]
2330
     *                         Will return the fixed input-"array" or
2331
     *                         the fixed input-"string"
2332
     *
2333
     * @psalm-suppress InvalidReturnType
2334
     */
2335 2
    public static function fix_utf8($str)
2336
    {
2337 2
        if (\is_array($str)) {
2338 2
            foreach ($str as $k => &$v) {
2339 2
                $v = self::fix_utf8($v);
2340
            }
2341 2
            unset($v);
2342
2343
            /**
2344
             * @psalm-suppress InvalidReturnStatement
2345
             */
2346 2
            return $str;
2347
        }
2348
2349 2
        $str = (string) $str;
2350 2
        $last = '';
2351 2
        while ($last !== $str) {
2352 2
            $last = $str;
2353
            /**
2354
             * @psalm-suppress PossiblyInvalidArgument
2355
             */
2356 2
            $str = self::to_utf8(
2357 2
                self::utf8_decode($str, true)
2358
            );
2359
        }
2360
2361
        /**
2362
         * @psalm-suppress InvalidReturnStatement
2363
         */
2364 2
        return $str;
2365
    }
2366
2367
    /**
2368
     * Get character of a specific character.
2369
     *
2370
     * EXAMPLE: <code>UTF8::getCharDirection('ا'); // 'RTL'</code>
2371
     *
2372
     * @param string $char
2373
     *
2374
     * @psalm-pure
2375
     *
2376
     * @return string
2377
     *                <p>'RTL' or 'LTR'.</p>
2378
     */
2379 2
    public static function getCharDirection(string $char): string
2380
    {
2381 2
        if (self::$SUPPORT['intlChar'] === true) {
2382
            /** @noinspection PhpComposerExtensionStubsInspection */
2383 2
            $tmp_return = \IntlChar::charDirection($char);
2384
2385
            // from "IntlChar"-Class
2386
            $char_direction = [
2387 2
                'RTL' => [1, 13, 14, 15, 21],
2388
                'LTR' => [0, 11, 12, 20],
2389
            ];
2390
2391 2
            if (\in_array($tmp_return, $char_direction['LTR'], true)) {
2392
                return 'LTR';
2393
            }
2394
2395 2
            if (\in_array($tmp_return, $char_direction['RTL'], true)) {
2396 2
                return 'RTL';
2397
            }
2398
        }
2399
2400 2
        $c = static::chr_to_decimal($char);
2401
2402 2
        if (!($c >= 0x5be && $c <= 0x10b7f)) {
2403 2
            return 'LTR';
2404
        }
2405
2406 2
        if ($c <= 0x85e) {
2407 2
            if ($c === 0x5be ||
2408 2
                $c === 0x5c0 ||
2409 2
                $c === 0x5c3 ||
2410 2
                $c === 0x5c6 ||
2411 2
                ($c >= 0x5d0 && $c <= 0x5ea) ||
2412 2
                ($c >= 0x5f0 && $c <= 0x5f4) ||
2413 2
                $c === 0x608 ||
2414 2
                $c === 0x60b ||
2415 2
                $c === 0x60d ||
2416 2
                $c === 0x61b ||
2417 2
                ($c >= 0x61e && $c <= 0x64a) ||
2418
                ($c >= 0x66d && $c <= 0x66f) ||
2419
                ($c >= 0x671 && $c <= 0x6d5) ||
2420
                ($c >= 0x6e5 && $c <= 0x6e6) ||
2421
                ($c >= 0x6ee && $c <= 0x6ef) ||
2422
                ($c >= 0x6fa && $c <= 0x70d) ||
2423
                $c === 0x710 ||
2424
                ($c >= 0x712 && $c <= 0x72f) ||
2425
                ($c >= 0x74d && $c <= 0x7a5) ||
2426
                $c === 0x7b1 ||
2427
                ($c >= 0x7c0 && $c <= 0x7ea) ||
2428
                ($c >= 0x7f4 && $c <= 0x7f5) ||
2429
                $c === 0x7fa ||
2430
                ($c >= 0x800 && $c <= 0x815) ||
2431
                $c === 0x81a ||
2432
                $c === 0x824 ||
2433
                $c === 0x828 ||
2434
                ($c >= 0x830 && $c <= 0x83e) ||
2435
                ($c >= 0x840 && $c <= 0x858) ||
2436 2
                $c === 0x85e
2437
            ) {
2438 2
                return 'RTL';
2439
            }
2440 2
        } elseif ($c === 0x200f) {
2441
            return 'RTL';
2442 2
        } elseif ($c >= 0xfb1d) {
2443 2
            if ($c === 0xfb1d ||
2444 2
                ($c >= 0xfb1f && $c <= 0xfb28) ||
2445 2
                ($c >= 0xfb2a && $c <= 0xfb36) ||
2446 2
                ($c >= 0xfb38 && $c <= 0xfb3c) ||
2447 2
                $c === 0xfb3e ||
2448 2
                ($c >= 0xfb40 && $c <= 0xfb41) ||
2449 2
                ($c >= 0xfb43 && $c <= 0xfb44) ||
2450 2
                ($c >= 0xfb46 && $c <= 0xfbc1) ||
2451 2
                ($c >= 0xfbd3 && $c <= 0xfd3d) ||
2452 2
                ($c >= 0xfd50 && $c <= 0xfd8f) ||
2453 2
                ($c >= 0xfd92 && $c <= 0xfdc7) ||
2454 2
                ($c >= 0xfdf0 && $c <= 0xfdfc) ||
2455 2
                ($c >= 0xfe70 && $c <= 0xfe74) ||
2456 2
                ($c >= 0xfe76 && $c <= 0xfefc) ||
2457 2
                ($c >= 0x10800 && $c <= 0x10805) ||
2458 2
                $c === 0x10808 ||
2459 2
                ($c >= 0x1080a && $c <= 0x10835) ||
2460 2
                ($c >= 0x10837 && $c <= 0x10838) ||
2461 2
                $c === 0x1083c ||
2462 2
                ($c >= 0x1083f && $c <= 0x10855) ||
2463 2
                ($c >= 0x10857 && $c <= 0x1085f) ||
2464 2
                ($c >= 0x10900 && $c <= 0x1091b) ||
2465 2
                ($c >= 0x10920 && $c <= 0x10939) ||
2466 2
                $c === 0x1093f ||
2467 2
                $c === 0x10a00 ||
2468 2
                ($c >= 0x10a10 && $c <= 0x10a13) ||
2469 2
                ($c >= 0x10a15 && $c <= 0x10a17) ||
2470 2
                ($c >= 0x10a19 && $c <= 0x10a33) ||
2471 2
                ($c >= 0x10a40 && $c <= 0x10a47) ||
2472 2
                ($c >= 0x10a50 && $c <= 0x10a58) ||
2473 2
                ($c >= 0x10a60 && $c <= 0x10a7f) ||
2474 2
                ($c >= 0x10b00 && $c <= 0x10b35) ||
2475 2
                ($c >= 0x10b40 && $c <= 0x10b55) ||
2476 2
                ($c >= 0x10b58 && $c <= 0x10b72) ||
2477 2
                ($c >= 0x10b78 && $c <= 0x10b7f)
2478
            ) {
2479 2
                return 'RTL';
2480
            }
2481
        }
2482
2483 2
        return 'LTR';
2484
    }
2485
2486
    /**
2487
     * Check for php-support.
2488
     *
2489
     * @param string|null $key
2490
     *
2491
     * @psalm-pure
2492
     *
2493
     * @return mixed
2494
     *               Return the full support-"array", if $key === null<br>
2495
     *               return bool-value, if $key is used and available<br>
2496
     *               otherwise return <strong>null</strong>
2497
     */
2498 27
    public static function getSupportInfo(string $key = null)
2499
    {
2500 27
        if ($key === null) {
2501 4
            return self::$SUPPORT;
2502
        }
2503
2504 25
        if (self::$INTL_TRANSLITERATOR_LIST === null) {
2505 1
            self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
2506
        }
2507
        // compatibility fix for old versions
2508 25
        self::$SUPPORT['intl__transliterator_list_ids'] = self::$INTL_TRANSLITERATOR_LIST;
2509
2510 25
        return self::$SUPPORT[$key] ?? null;
2511
    }
2512
2513
    /**
2514
     * Warning: this method only works for some file-types (png, jpg)
2515
     *          if you need more supported types, please use e.g. "finfo"
2516
     *
2517
     * @param string $str
2518
     * @param array  $fallback <p>with this keys: 'ext', 'mime', 'type'
2519
     *
2520
     * @psalm-pure
2521
     *
2522
     * @return array<string, string|null>
2523
     *                       <p>with this keys: 'ext', 'mime', 'type'</p>
2524
     *
2525
     * @phpstan-param array{ext: null|string, mime: null|string, type: null|string} $fallback
2526
     */
2527 40
    public static function get_file_type(
2528
        string $str,
2529
        array $fallback = [
2530
            'ext'  => null,
2531
            'mime' => 'application/octet-stream',
2532
            'type' => null,
2533
        ]
2534
    ): array {
2535 40
        if ($str === '') {
2536
            return $fallback;
2537
        }
2538
2539
        /** @var false|string $str_info - needed for PhpStan (stubs error) */
2540 40
        $str_info = \substr($str, 0, 2);
2541 40
        if ($str_info === false || \strlen($str_info) !== 2) {
2542 11
            return $fallback;
2543
        }
2544
2545
        // DEBUG
2546
        //var_dump($str_info);
2547
2548 36
        $str_info = \unpack('C2chars', $str_info);
2549
2550
        /** @noinspection PhpSillyAssignmentInspection */
2551
        /** @var array|false $str_info - needed for PhpStan (stubs error) */
2552 36
        $str_info = $str_info;
2553
2554 36
        if ($str_info === false) {
2555
            return $fallback;
2556
        }
2557
        /** @noinspection OffsetOperationsInspection */
2558 36
        $type_code = (int) ($str_info['chars1'] . $str_info['chars2']);
2559
2560
        // DEBUG
2561
        //var_dump($type_code);
2562
2563
        //
2564
        // info: https://en.wikipedia.org/wiki/Magic_number_%28programming%29#Format_indicator
2565
        //
2566
        switch ($type_code) {
2567
            // WARNING: do not add too simple comparisons, because of false-positive results:
2568
            //
2569
            // 3780 => 'pdf', 7790 => 'exe', 7784 => 'midi', 8075 => 'zip',
2570
            // 8297 => 'rar', 7173 => 'gif', 7373 => 'tiff' 6677 => 'bmp', ...
2571
            //
2572 36
            case 255216:
2573
                $ext = 'jpg';
2574
                $mime = 'image/jpeg';
2575
                $type = 'binary';
2576
2577
                break;
2578 36
            case 13780:
2579 7
                $ext = 'png';
2580 7
                $mime = 'image/png';
2581 7
                $type = 'binary';
2582
2583 7
                break;
2584
            default:
2585 35
                return $fallback;
2586
        }
2587
2588
        return [
2589 7
            'ext'  => $ext,
2590 7
            'mime' => $mime,
2591 7
            'type' => $type,
2592
        ];
2593
    }
2594
2595
    /**
2596
     * @param int    $length         <p>Length of the random string.</p>
2597
     * @param string $possible_chars [optional] <p>Characters string for the random selection.</p>
2598
     * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
2599
     *
2600
     * @return string
2601
     */
2602 1
    public static function get_random_string(
2603
        int $length,
2604
        string $possible_chars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789',
2605
        string $encoding = 'UTF-8'
2606
    ): string {
2607
        // init
2608 1
        $i = 0;
2609 1
        $str = '';
2610
2611
        //
2612
        // add random chars
2613
        //
2614
2615 1
        if ($encoding === 'UTF-8') {
2616 1
            $max_length = (int) \mb_strlen($possible_chars);
2617 1
            if ($max_length === 0) {
2618 1
                return '';
2619
            }
2620
2621 1
            while ($i < $length) {
2622
                try {
2623 1
                    $rand_int = \random_int(0, $max_length - 1);
2624
                } catch (\Exception $e) {
2625
                    /** @noinspection RandomApiMigrationInspection */
2626
                    $rand_int = \mt_rand(0, $max_length - 1);
2627
                }
2628 1
                $char = \mb_substr($possible_chars, $rand_int, 1);
2629 1
                if ($char !== false) {
2630 1
                    $str .= $char;
2631 1
                    ++$i;
2632
                }
2633
            }
2634
        } else {
2635
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2636
2637
            $max_length = (int) self::strlen($possible_chars, $encoding);
2638
            if ($max_length === 0) {
2639
                return '';
2640
            }
2641
2642
            while ($i < $length) {
2643
                try {
2644
                    $rand_int = \random_int(0, $max_length - 1);
2645
                } catch (\Exception $e) {
2646
                    /** @noinspection RandomApiMigrationInspection */
2647
                    $rand_int = \mt_rand(0, $max_length - 1);
2648
                }
2649
                $char = self::substr($possible_chars, $rand_int, 1, $encoding);
2650
                if ($char !== false) {
2651
                    $str .= $char;
2652
                    ++$i;
2653
                }
2654
            }
2655
        }
2656
2657 1
        return $str;
2658
    }
2659
2660
    /**
2661
     * @param int|string $extra_entropy [optional] <p>Extra entropy via a string or int value.</p>
2662
     * @param bool       $use_md5       [optional] <p>Return the unique identifier as md5-hash? Default: true</p>
2663
     *
2664
     * @return string
2665
     */
2666 1
    public static function get_unique_string($extra_entropy = '', bool $use_md5 = true): string
2667
    {
2668
        try {
2669 1
            $rand_int = \random_int(0, \mt_getrandmax());
2670
        } catch (\Exception $e) {
2671
            /** @noinspection RandomApiMigrationInspection */
2672
            $rand_int = \mt_rand(0, \mt_getrandmax());
2673
        }
2674
2675
        $unique_helper = $rand_int .
2676 1
                         \session_id() .
2677 1
                         ($_SERVER['REMOTE_ADDR'] ?? '') .
2678 1
                         ($_SERVER['SERVER_ADDR'] ?? '') .
2679 1
                         $extra_entropy;
2680
2681 1
        $unique_string = \uniqid($unique_helper, true);
2682
2683 1
        if ($use_md5) {
2684 1
            $unique_string = \md5($unique_string . $unique_helper);
2685
        }
2686
2687 1
        return $unique_string;
2688
    }
2689
2690
    /**
2691
     * alias for "UTF8::string_has_bom()"
2692
     *
2693
     * @param string $str
2694
     *
2695
     * @psalm-pure
2696
     *
2697
     * @return bool
2698
     *
2699
     * @see        UTF8::string_has_bom()
2700
     * @deprecated <p>please use "UTF8::string_has_bom()"</p>
2701
     */
2702 2
    public static function hasBom(string $str): bool
2703
    {
2704 2
        return self::string_has_bom($str);
2705
    }
2706
2707
    /**
2708
     * Returns true if the string contains a lower case char, false otherwise.
2709
     *
2710
     * @param string $str <p>The input string.</p>
2711
     *
2712
     * @psalm-pure
2713
     *
2714
     * @return bool
2715
     *              <p>Whether or not the string contains a lower case character.</p>
2716
     */
2717 47
    public static function has_lowercase(string $str): bool
2718
    {
2719 47
        if (self::$SUPPORT['mbstring'] === true) {
2720
            /** @noinspection PhpComposerExtensionStubsInspection */
2721 47
            return \mb_ereg_match('.*[[:lower:]]', $str);
2722
        }
2723
2724
        return self::str_matches_pattern($str, '.*[[:lower:]]');
2725
    }
2726
2727
    /**
2728
     * Returns true if the string contains whitespace, false otherwise.
2729
     *
2730
     * @param string $str <p>The input string.</p>
2731
     *
2732
     * @psalm-pure
2733
     *
2734
     * @return bool
2735
     *              <p>Whether or not the string contains whitespace.</p>
2736
     */
2737 11
    public static function has_whitespace(string $str): bool
2738
    {
2739 11
        if (self::$SUPPORT['mbstring'] === true) {
2740
            /** @noinspection PhpComposerExtensionStubsInspection */
2741 11
            return \mb_ereg_match('.*[[:space:]]', $str);
2742
        }
2743
2744
        return self::str_matches_pattern($str, '.*[[:space:]]');
2745
    }
2746
2747
    /**
2748
     * Returns true if the string contains an upper case char, false otherwise.
2749
     *
2750
     * @param string $str <p>The input string.</p>
2751
     *
2752
     * @psalm-pure
2753
     *
2754
     * @return bool whether or not the string contains an upper case character
2755
     */
2756 12
    public static function has_uppercase(string $str): bool
2757
    {
2758 12
        if (self::$SUPPORT['mbstring'] === true) {
2759
            /** @noinspection PhpComposerExtensionStubsInspection */
2760 12
            return \mb_ereg_match('.*[[:upper:]]', $str);
2761
        }
2762
2763
        return self::str_matches_pattern($str, '.*[[:upper:]]');
2764
    }
2765
2766
    /**
2767
     * Converts a hexadecimal value into a UTF-8 character.
2768
     *
2769
     * INFO: opposite to UTF8::chr_to_hex()
2770
     *
2771
     * EXAMPLE: <code>UTF8::hex_to_chr('U+00a7'); // '§'</code>
2772
     *
2773
     * @param string $hexdec <p>The hexadecimal value.</p>
2774
     *
2775
     * @psalm-pure
2776
     *
2777
     * @return false|string one single UTF-8 character
2778
     */
2779 4
    public static function hex_to_chr(string $hexdec)
2780
    {
2781
        /** @noinspection PhpUsageOfSilenceOperatorInspection - Invalid characters passed for attempted conversion, these have been ignored */
2782 4
        return self::decimal_to_chr((int) @\hexdec($hexdec));
2783
    }
2784
2785
    /**
2786
     * Converts hexadecimal U+xxxx code point representation to integer.
2787
     *
2788
     * INFO: opposite to UTF8::int_to_hex()
2789
     *
2790
     * EXAMPLE: <code>UTF8::hex_to_int('U+00f1'); // 241</code>
2791
     *
2792
     * @param string $hexdec <p>The hexadecimal code point representation.</p>
2793
     *
2794
     * @psalm-pure
2795
     *
2796
     * @return false|int
2797
     *                   <p>The code point, or false on failure.</p>
2798
     */
2799 2
    public static function hex_to_int($hexdec)
2800
    {
2801
        // init
2802 2
        $hexdec = (string) $hexdec;
2803
2804 2
        if ($hexdec === '') {
2805 2
            return false;
2806
        }
2807
2808 2
        if (\preg_match('/^(?:\\\u|U\+|)([a-zA-Z0-9]{4,6})$/', $hexdec, $match)) {
2809 2
            return \intval($match[1], 16);
2810
        }
2811
2812 2
        return false;
2813
    }
2814
2815
    /**
2816
     * alias for "UTF8::html_entity_decode()"
2817
     *
2818
     * @param string $str
2819
     * @param int    $flags
2820
     * @param string $encoding
2821
     *
2822
     * @psalm-pure
2823
     *
2824
     * @return string
2825
     *
2826
     * @see        UTF8::html_entity_decode()
2827
     * @deprecated <p>please use "UTF8::html_entity_decode()"</p>
2828
     */
2829 2
    public static function html_decode(
2830
        string $str,
2831
        int $flags = null,
2832
        string $encoding = 'UTF-8'
2833
    ): string {
2834 2
        return self::html_entity_decode($str, $flags, $encoding);
2835
    }
2836
2837
    /**
2838
     * Converts a UTF-8 string to a series of HTML numbered entities.
2839
     *
2840
     * INFO: opposite to UTF8::html_decode()
2841
     *
2842
     * EXAMPLE: <code>UTF8::html_encode('中文空白'); // '&#20013;&#25991;&#31354;&#30333;'</code>
2843
     *
2844
     * @param string $str              <p>The Unicode string to be encoded as numbered entities.</p>
2845
     * @param bool   $keep_ascii_chars [optional] <p>Keep ASCII chars.</p>
2846
     * @param string $encoding         [optional] <p>Set the charset for e.g. "mb_" function</p>
2847
     *
2848
     * @psalm-pure
2849
     *
2850
     * @return string HTML numbered entities
2851
     */
2852 14
    public static function html_encode(
2853
        string $str,
2854
        bool $keep_ascii_chars = false,
2855
        string $encoding = 'UTF-8'
2856
    ): string {
2857 14
        if ($str === '') {
2858 4
            return '';
2859
        }
2860
2861 14
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2862 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2863
        }
2864
2865
        // INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2866 14
        if (self::$SUPPORT['mbstring'] === true) {
2867 14
            $start_code = 0x00;
2868 14
            if ($keep_ascii_chars) {
2869 13
                $start_code = 0x80;
2870
            }
2871
2872 14
            if ($encoding === 'UTF-8') {
2873
                /** @var false|string|null $return - needed for PhpStan (stubs error) */
2874 14
                $return = \mb_encode_numericentity(
2875 14
                    $str,
2876 14
                    [$start_code, 0xfffff, 0, 0xfffff, 0]
2877
                );
2878 14
                if ($return !== null && $return !== false) {
2879 14
                    return $return;
2880
                }
2881
            }
2882
2883
            /** @var false|string|null $return - needed for PhpStan (stubs error) */
2884 4
            $return = \mb_encode_numericentity(
2885 4
                $str,
2886 4
                [$start_code, 0xfffff, 0, 0xfffff, 0],
2887 4
                $encoding
2888
            );
2889 4
            if ($return !== null && $return !== false) {
2890 4
                return $return;
2891
            }
2892
        }
2893
2894
        //
2895
        // fallback via vanilla php
2896
        //
2897
2898
        return \implode(
2899
            '',
2900
            \array_map(
2901
                static function (string $chr) use ($keep_ascii_chars, $encoding): string {
2902
                    return self::single_chr_html_encode($chr, $keep_ascii_chars, $encoding);
2903
                },
2904
                self::str_split($str)
2905
            )
2906
        );
2907
    }
2908
2909
    /**
2910
     * UTF-8 version of html_entity_decode()
2911
     *
2912
     * The reason we are not using html_entity_decode() by itself is because
2913
     * while it is not technically correct to leave out the semicolon
2914
     * at the end of an entity most browsers will still interpret the entity
2915
     * correctly. html_entity_decode() does not convert entities without
2916
     * semicolons, so we are left with our own little solution here. Bummer.
2917
     *
2918
     * Convert all HTML entities to their applicable characters.
2919
     *
2920
     * INFO: opposite to UTF8::html_encode()
2921
     *
2922
     * EXAMPLE: <code>UTF8::html_entity_decode('&#20013;&#25991;&#31354;&#30333;'); // '中文空白'</code>
2923
     *
2924
     * @see http://php.net/manual/en/function.html-entity-decode.php
2925
     *
2926
     * @param string $str      <p>
2927
     *                         The input string.
2928
     *                         </p>
2929
     * @param int    $flags    [optional] <p>
2930
     *                         A bitmask of one or more of the following flags, which specify how to handle quotes
2931
     *                         and which document type to use. The default is ENT_COMPAT | ENT_HTML401.
2932
     *                         <table>
2933
     *                         Available <i>flags</i> constants
2934
     *                         <tr valign="top">
2935
     *                         <td>Constant Name</td>
2936
     *                         <td>Description</td>
2937
     *                         </tr>
2938
     *                         <tr valign="top">
2939
     *                         <td><b>ENT_COMPAT</b></td>
2940
     *                         <td>Will convert double-quotes and leave single-quotes alone.</td>
2941
     *                         </tr>
2942
     *                         <tr valign="top">
2943
     *                         <td><b>ENT_QUOTES</b></td>
2944
     *                         <td>Will convert both double and single quotes.</td>
2945
     *                         </tr>
2946
     *                         <tr valign="top">
2947
     *                         <td><b>ENT_NOQUOTES</b></td>
2948
     *                         <td>Will leave both double and single quotes unconverted.</td>
2949
     *                         </tr>
2950
     *                         <tr valign="top">
2951
     *                         <td><b>ENT_HTML401</b></td>
2952
     *                         <td>
2953
     *                         Handle code as HTML 4.01.
2954
     *                         </td>
2955
     *                         </tr>
2956
     *                         <tr valign="top">
2957
     *                         <td><b>ENT_XML1</b></td>
2958
     *                         <td>
2959
     *                         Handle code as XML 1.
2960
     *                         </td>
2961
     *                         </tr>
2962
     *                         <tr valign="top">
2963
     *                         <td><b>ENT_XHTML</b></td>
2964
     *                         <td>
2965
     *                         Handle code as XHTML.
2966
     *                         </td>
2967
     *                         </tr>
2968
     *                         <tr valign="top">
2969
     *                         <td><b>ENT_HTML5</b></td>
2970
     *                         <td>
2971
     *                         Handle code as HTML 5.
2972
     *                         </td>
2973
     *                         </tr>
2974
     *                         </table>
2975
     *                         </p>
2976
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
2977
     *
2978
     * @psalm-pure
2979
     *
2980
     * @return string the decoded string
2981
     */
2982 51
    public static function html_entity_decode(
2983
        string $str,
2984
        int $flags = null,
2985
        string $encoding = 'UTF-8'
2986
    ): string {
2987
        if (
2988 51
            !isset($str[3]) // examples: &; || &x;
2989
            ||
2990 51
            \strpos($str, '&') === false // no "&"
2991
        ) {
2992 24
            return $str;
2993
        }
2994
2995 49
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2996 9
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2997
        }
2998
2999 49
        if ($flags === null) {
3000 11
            $flags = \ENT_QUOTES | \ENT_HTML5;
3001
        }
3002
3003
        if (
3004 49
            $encoding !== 'UTF-8'
3005
            &&
3006 49
            $encoding !== 'ISO-8859-1'
3007
            &&
3008 49
            $encoding !== 'WINDOWS-1252'
3009
            &&
3010 49
            self::$SUPPORT['mbstring'] === false
3011
        ) {
3012
            /**
3013
             * @psalm-suppress ImpureFunctionCall - is is only a warning
3014
             */
3015
            \trigger_error('UTF8::html_entity_decode() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
3016
        }
3017
3018
        do {
3019 49
            $str_compare = $str;
3020
3021 49
            if (\strpos($str, '&') !== false) {
3022 49
                if (\strpos($str, '&#') !== false) {
3023
                    // decode also numeric & UTF16 two byte entities
3024 41
                    $str = (string) \preg_replace(
3025 41
                        '/(&#(?:x0*[0-9a-fA-F]{2,6}(?![0-9a-fA-F;])|(?:0*\d{2,6}(?![0-9;]))))/S',
3026 41
                        '$1;',
3027 41
                        $str
3028
                    );
3029
                }
3030
3031 49
                $str = \html_entity_decode(
3032 49
                    $str,
3033 49
                    $flags,
3034 49
                    $encoding
3035
                );
3036
            }
3037 49
        } while ($str_compare !== $str);
3038
3039 49
        return $str;
3040
    }
3041
3042
    /**
3043
     * Create a escape html version of the string via "UTF8::htmlspecialchars()".
3044
     *
3045
     * @param string $str
3046
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
3047
     *
3048
     * @psalm-pure
3049
     *
3050
     * @return string
3051
     */
3052 6
    public static function html_escape(string $str, string $encoding = 'UTF-8'): string
3053
    {
3054 6
        return self::htmlspecialchars(
3055 6
            $str,
3056 6
            \ENT_QUOTES | \ENT_SUBSTITUTE,
3057 6
            $encoding
3058
        );
3059
    }
3060
3061
    /**
3062
     * Remove empty html-tag.
3063
     *
3064
     * e.g.: <pre><tag></tag></pre>
3065
     *
3066
     * @param string $str
3067
     *
3068
     * @psalm-pure
3069
     *
3070
     * @return string
3071
     */
3072 1
    public static function html_stripe_empty_tags(string $str): string
3073
    {
3074 1
        return (string) \preg_replace(
3075 1
            '/<[^\\/>]*?>\\s*?<\\/[^>]*?>/u',
3076 1
            '',
3077 1
            $str
3078
        );
3079
    }
3080
3081
    /**
3082
     * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities().
3083
     *
3084
     * EXAMPLE: <code>UTF8::htmlentities('<白-öäü>'); // '&lt;&#30333;-&ouml;&auml;&uuml;&gt;'</code>
3085
     *
3086
     * @see http://php.net/manual/en/function.htmlentities.php
3087
     *
3088
     * @param string $str           <p>
3089
     *                              The input string.
3090
     *                              </p>
3091
     * @param int    $flags         [optional] <p>
3092
     *                              A bitmask of one or more of the following flags, which specify how to handle
3093
     *                              quotes, invalid code unit sequences and the used document type. The default is
3094
     *                              ENT_COMPAT | ENT_HTML401.
3095
     *                              <table>
3096
     *                              Available <i>flags</i> constants
3097
     *                              <tr valign="top">
3098
     *                              <td>Constant Name</td>
3099
     *                              <td>Description</td>
3100
     *                              </tr>
3101
     *                              <tr valign="top">
3102
     *                              <td><b>ENT_COMPAT</b></td>
3103
     *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
3104
     *                              </tr>
3105
     *                              <tr valign="top">
3106
     *                              <td><b>ENT_QUOTES</b></td>
3107
     *                              <td>Will convert both double and single quotes.</td>
3108
     *                              </tr>
3109
     *                              <tr valign="top">
3110
     *                              <td><b>ENT_NOQUOTES</b></td>
3111
     *                              <td>Will leave both double and single quotes unconverted.</td>
3112
     *                              </tr>
3113
     *                              <tr valign="top">
3114
     *                              <td><b>ENT_IGNORE</b></td>
3115
     *                              <td>
3116
     *                              Silently discard invalid code unit sequences instead of returning
3117
     *                              an empty string. Using this flag is discouraged as it
3118
     *                              may have security implications.
3119
     *                              </td>
3120
     *                              </tr>
3121
     *                              <tr valign="top">
3122
     *                              <td><b>ENT_SUBSTITUTE</b></td>
3123
     *                              <td>
3124
     *                              Replace invalid code unit sequences with a Unicode Replacement Character
3125
     *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty
3126
     *                              string.
3127
     *                              </td>
3128
     *                              </tr>
3129
     *                              <tr valign="top">
3130
     *                              <td><b>ENT_DISALLOWED</b></td>
3131
     *                              <td>
3132
     *                              Replace invalid code points for the given document type with a
3133
     *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
3134
     *                              (otherwise) instead of leaving them as is. This may be useful, for
3135
     *                              instance, to ensure the well-formedness of XML documents with
3136
     *                              embedded external content.
3137
     *                              </td>
3138
     *                              </tr>
3139
     *                              <tr valign="top">
3140
     *                              <td><b>ENT_HTML401</b></td>
3141
     *                              <td>
3142
     *                              Handle code as HTML 4.01.
3143
     *                              </td>
3144
     *                              </tr>
3145
     *                              <tr valign="top">
3146
     *                              <td><b>ENT_XML1</b></td>
3147
     *                              <td>
3148
     *                              Handle code as XML 1.
3149
     *                              </td>
3150
     *                              </tr>
3151
     *                              <tr valign="top">
3152
     *                              <td><b>ENT_XHTML</b></td>
3153
     *                              <td>
3154
     *                              Handle code as XHTML.
3155
     *                              </td>
3156
     *                              </tr>
3157
     *                              <tr valign="top">
3158
     *                              <td><b>ENT_HTML5</b></td>
3159
     *                              <td>
3160
     *                              Handle code as HTML 5.
3161
     *                              </td>
3162
     *                              </tr>
3163
     *                              </table>
3164
     *                              </p>
3165
     * @param string $encoding      [optional] <p>
3166
     *                              Like <b>htmlspecialchars</b>,
3167
     *                              <b>htmlentities</b> takes an optional third argument
3168
     *                              <i>encoding</i> which defines encoding used in
3169
     *                              conversion.
3170
     *                              Although this argument is technically optional, you are highly
3171
     *                              encouraged to specify the correct value for your code.
3172
     *                              </p>
3173
     * @param bool   $double_encode [optional] <p>
3174
     *                              When <i>double_encode</i> is turned off PHP will not
3175
     *                              encode existing html entities. The default is to convert everything.
3176
     *                              </p>
3177
     *
3178
     * @psalm-pure
3179
     *
3180
     * @return string
3181
     *                <p>
3182
     *                The encoded string.
3183
     *                <br><br>
3184
     *                If the input <i>string</i> contains an invalid code unit
3185
     *                sequence within the given <i>encoding</i> an empty string
3186
     *                will be returned, unless either the <b>ENT_IGNORE</b> or
3187
     *                <b>ENT_SUBSTITUTE</b> flags are set.
3188
     *                </p>
3189
     */
3190 9
    public static function htmlentities(
3191
        string $str,
3192
        int $flags = \ENT_COMPAT,
3193
        string $encoding = 'UTF-8',
3194
        bool $double_encode = true
3195
    ): string {
3196 9
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
3197 7
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
3198
        }
3199
3200 9
        $str = \htmlentities(
3201 9
            $str,
3202 9
            $flags,
3203 9
            $encoding,
3204 9
            $double_encode
3205
        );
3206
3207
        /**
3208
         * PHP doesn't replace a backslash to its html entity since this is something
3209
         * that's mostly used to escape characters when inserting in a database. Since
3210
         * we're using a decent database layer, we don't need this shit and we're replacing
3211
         * the double backslashes by its' html entity equivalent.
3212
         *
3213
         * https://github.com/forkcms/library/blob/master/spoon/filter/filter.php#L303
3214
         */
3215 9
        $str = \str_replace('\\', '&#92;', $str);
3216
3217 9
        return self::html_encode($str, true, $encoding);
3218
    }
3219
3220
    /**
3221
     * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
3222
     *
3223
     * INFO: Take a look at "UTF8::htmlentities()"
3224
     *
3225
     * EXAMPLE: <code>UTF8::htmlspecialchars('<白-öäü>'); // '&lt;白-öäü&gt;'</code>
3226
     *
3227
     * @see http://php.net/manual/en/function.htmlspecialchars.php
3228
     *
3229
     * @param string $str           <p>
3230
     *                              The string being converted.
3231
     *                              </p>
3232
     * @param int    $flags         [optional] <p>
3233
     *                              A bitmask of one or more of the following flags, which specify how to handle
3234
     *                              quotes, invalid code unit sequences and the used document type. The default is
3235
     *                              ENT_COMPAT | ENT_HTML401.
3236
     *                              <table>
3237
     *                              Available <i>flags</i> constants
3238
     *                              <tr valign="top">
3239
     *                              <td>Constant Name</td>
3240
     *                              <td>Description</td>
3241
     *                              </tr>
3242
     *                              <tr valign="top">
3243
     *                              <td><b>ENT_COMPAT</b></td>
3244
     *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
3245
     *                              </tr>
3246
     *                              <tr valign="top">
3247
     *                              <td><b>ENT_QUOTES</b></td>
3248
     *                              <td>Will convert both double and single quotes.</td>
3249
     *                              </tr>
3250
     *                              <tr valign="top">
3251
     *                              <td><b>ENT_NOQUOTES</b></td>
3252
     *                              <td>Will leave both double and single quotes unconverted.</td>
3253
     *                              </tr>
3254
     *                              <tr valign="top">
3255
     *                              <td><b>ENT_IGNORE</b></td>
3256
     *                              <td>
3257
     *                              Silently discard invalid code unit sequences instead of returning
3258
     *                              an empty string. Using this flag is discouraged as it
3259
     *                              may have security implications.
3260
     *                              </td>
3261
     *                              </tr>
3262
     *                              <tr valign="top">
3263
     *                              <td><b>ENT_SUBSTITUTE</b></td>
3264
     *                              <td>
3265
     *                              Replace invalid code unit sequences with a Unicode Replacement Character
3266
     *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty
3267
     *                              string.
3268
     *                              </td>
3269
     *                              </tr>
3270
     *                              <tr valign="top">
3271
     *                              <td><b>ENT_DISALLOWED</b></td>
3272
     *                              <td>
3273
     *                              Replace invalid code points for the given document type with a
3274
     *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
3275
     *                              (otherwise) instead of leaving them as is. This may be useful, for
3276
     *                              instance, to ensure the well-formedness of XML documents with
3277
     *                              embedded external content.
3278
     *                              </td>
3279
     *                              </tr>
3280
     *                              <tr valign="top">
3281
     *                              <td><b>ENT_HTML401</b></td>
3282
     *                              <td>
3283
     *                              Handle code as HTML 4.01.
3284
     *                              </td>
3285
     *                              </tr>
3286
     *                              <tr valign="top">
3287
     *                              <td><b>ENT_XML1</b></td>
3288
     *                              <td>
3289
     *                              Handle code as XML 1.
3290
     *                              </td>
3291
     *                              </tr>
3292
     *                              <tr valign="top">
3293
     *                              <td><b>ENT_XHTML</b></td>
3294
     *                              <td>
3295
     *                              Handle code as XHTML.
3296
     *                              </td>
3297
     *                              </tr>
3298
     *                              <tr valign="top">
3299
     *                              <td><b>ENT_HTML5</b></td>
3300
     *                              <td>
3301
     *                              Handle code as HTML 5.
3302
     *                              </td>
3303
     *                              </tr>
3304
     *                              </table>
3305
     *                              </p>
3306
     * @param string $encoding      [optional] <p>
3307
     *                              Defines encoding used in conversion.
3308
     *                              </p>
3309
     *                              <p>
3310
     *                              For the purposes of this function, the encodings
3311
     *                              ISO-8859-1, ISO-8859-15,
3312
     *                              UTF-8, cp866,
3313
     *                              cp1251, cp1252, and
3314
     *                              KOI8-R are effectively equivalent, provided the
3315
     *                              <i>string</i> itself is valid for the encoding, as
3316
     *                              the characters affected by <b>htmlspecialchars</b> occupy
3317
     *                              the same positions in all of these encodings.
3318
     *                              </p>
3319
     * @param bool   $double_encode [optional] <p>
3320
     *                              When <i>double_encode</i> is turned off PHP will not
3321
     *                              encode existing html entities, the default is to convert everything.
3322
     *                              </p>
3323
     *
3324
     * @psalm-pure
3325
     *
3326
     * @return string the converted string.
3327
     *                </p>
3328
     *                <p>
3329
     *                If the input <i>string</i> contains an invalid code unit
3330
     *                sequence within the given <i>encoding</i> an empty string
3331
     *                will be returned, unless either the <b>ENT_IGNORE</b> or
3332
     *                <b>ENT_SUBSTITUTE</b> flags are set
3333
     */
3334 8
    public static function htmlspecialchars(
3335
        string $str,
3336
        int $flags = \ENT_COMPAT,
3337
        string $encoding = 'UTF-8',
3338
        bool $double_encode = true
3339
    ): string {
3340 8
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
3341 8
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
3342
        }
3343
3344 8
        return \htmlspecialchars(
3345 8
            $str,
3346 8
            $flags,
3347 8
            $encoding,
3348 8
            $double_encode
3349
        );
3350
    }
3351
3352
    /**
3353
     * Checks whether iconv is available on the server.
3354
     *
3355
     * @psalm-pure
3356
     *
3357
     * @return bool
3358
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
3359
     */
3360
    public static function iconv_loaded(): bool
3361
    {
3362
        return \extension_loaded('iconv');
3363
    }
3364
3365
    /**
3366
     * alias for "UTF8::decimal_to_chr()"
3367
     *
3368
     * @param int|string $int
3369
     *
3370
     * @psalm-param int|numeric-string $int
3371
     *
3372
     * @psalm-pure
3373
     *
3374
     * @return string
3375
     *
3376
     * @see        UTF8::decimal_to_chr()
3377
     * @deprecated <p>please use "UTF8::decimal_to_chr()"</p>
3378
     */
3379 4
    public static function int_to_chr($int): string
3380
    {
3381 4
        return self::decimal_to_chr($int);
3382
    }
3383
3384
    /**
3385
     * Converts Integer to hexadecimal U+xxxx code point representation.
3386
     *
3387
     * INFO: opposite to UTF8::hex_to_int()
3388
     *
3389
     * EXAMPLE: <code>UTF8::int_to_hex(241); // 'U+00f1'</code>
3390
     *
3391
     * @param int    $int    <p>The integer to be converted to hexadecimal code point.</p>
3392
     * @param string $prefix [optional]
3393
     *
3394
     * @psalm-pure
3395
     *
3396
     * @return string the code point, or empty string on failure
3397
     */
3398 6
    public static function int_to_hex(int $int, string $prefix = 'U+'): string
3399
    {
3400 6
        $hex = \dechex($int);
3401
3402 6
        $hex = (\strlen($hex) < 4 ? \substr('0000' . $hex, -4) : $hex);
3403
3404 6
        return $prefix . $hex . '';
3405
    }
3406
3407
    /**
3408
     * Checks whether intl-char is available on the server.
3409
     *
3410
     * @psalm-pure
3411
     *
3412
     * @return bool
3413
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
3414
     */
3415
    public static function intlChar_loaded(): bool
3416
    {
3417
        return \class_exists('IntlChar');
3418
    }
3419
3420
    /**
3421
     * Checks whether intl is available on the server.
3422
     *
3423
     * @psalm-pure
3424
     *
3425
     * @return bool
3426
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
3427
     */
3428 5
    public static function intl_loaded(): bool
3429
    {
3430 5
        return \extension_loaded('intl');
3431
    }
3432
3433
    /**
3434
     * alias for "UTF8::is_ascii()"
3435
     *
3436
     * @param string $str
3437
     *
3438
     * @psalm-pure
3439
     *
3440
     * @return bool
3441
     *
3442
     * @see        UTF8::is_ascii()
3443
     * @deprecated <p>please use "UTF8::is_ascii()"</p>
3444
     */
3445 2
    public static function isAscii(string $str): bool
3446
    {
3447 2
        return ASCII::is_ascii($str);
3448
    }
3449
3450
    /**
3451
     * alias for "UTF8::is_base64()"
3452
     *
3453
     * @param string $str
3454
     *
3455
     * @psalm-pure
3456
     *
3457
     * @return bool
3458
     *
3459
     * @see        UTF8::is_base64()
3460
     * @deprecated <p>please use "UTF8::is_base64()"</p>
3461
     */
3462 2
    public static function isBase64($str): bool
3463
    {
3464 2
        return self::is_base64($str);
3465
    }
3466
3467
    /**
3468
     * alias for "UTF8::is_binary()"
3469
     *
3470
     * @param int|string $str
3471
     * @param bool       $strict
3472
     *
3473
     * @psalm-pure
3474
     *
3475
     * @return bool
3476
     *
3477
     * @see        UTF8::is_binary()
3478
     * @deprecated <p>please use "UTF8::is_binary()"</p>
3479
     */
3480 4
    public static function isBinary($str, bool $strict = false): bool
3481
    {
3482 4
        return self::is_binary($str, $strict);
3483
    }
3484
3485
    /**
3486
     * alias for "UTF8::is_bom()"
3487
     *
3488
     * @param string $utf8_chr
3489
     *
3490
     * @psalm-pure
3491
     *
3492
     * @return bool
3493
     *
3494
     * @see        UTF8::is_bom()
3495
     * @deprecated <p>please use "UTF8::is_bom()"</p>
3496
     */
3497 2
    public static function isBom(string $utf8_chr): bool
3498
    {
3499 2
        return self::is_bom($utf8_chr);
3500
    }
3501
3502
    /**
3503
     * alias for "UTF8::is_html()"
3504
     *
3505
     * @param string $str
3506
     *
3507
     * @psalm-pure
3508
     *
3509
     * @return bool
3510
     *
3511
     * @see        UTF8::is_html()
3512
     * @deprecated <p>please use "UTF8::is_html()"</p>
3513
     */
3514 2
    public static function isHtml(string $str): bool
3515
    {
3516 2
        return self::is_html($str);
3517
    }
3518
3519
    /**
3520
     * alias for "UTF8::is_json()"
3521
     *
3522
     * @param string $str
3523
     *
3524
     * @return bool
3525
     *
3526
     * @see        UTF8::is_json()
3527
     * @deprecated <p>please use "UTF8::is_json()"</p>
3528
     */
3529 1
    public static function isJson(string $str): bool
3530
    {
3531 1
        return self::is_json($str);
3532
    }
3533
3534
    /**
3535
     * alias for "UTF8::is_utf16()"
3536
     *
3537
     * @param string $str
3538
     *
3539
     * @psalm-pure
3540
     *
3541
     * @return false|int
3542
     *                   <strong>false</strong> if is't not UTF16,<br>
3543
     *                   <strong>1</strong> for UTF-16LE,<br>
3544
     *                   <strong>2</strong> for UTF-16BE
3545
     *
3546
     * @see        UTF8::is_utf16()
3547
     * @deprecated <p>please use "UTF8::is_utf16()"</p>
3548
     */
3549 2
    public static function isUtf16($str)
3550
    {
3551 2
        return self::is_utf16($str);
3552
    }
3553
3554
    /**
3555
     * alias for "UTF8::is_utf32()"
3556
     *
3557
     * @param string $str
3558
     *
3559
     * @psalm-pure
3560
     *
3561
     * @return false|int
3562
     *                   <strong>false</strong> if is't not UTF16,
3563
     *                   <strong>1</strong> for UTF-32LE,
3564
     *                   <strong>2</strong> for UTF-32BE
3565
     *
3566
     * @see        UTF8::is_utf32()
3567
     * @deprecated <p>please use "UTF8::is_utf32()"</p>
3568
     */
3569 2
    public static function isUtf32($str)
3570
    {
3571 2
        return self::is_utf32($str);
3572
    }
3573
3574
    /**
3575
     * alias for "UTF8::is_utf8()"
3576
     *
3577
     * @param string $str
3578
     * @param bool   $strict
3579
     *
3580
     * @psalm-pure
3581
     *
3582
     * @return bool
3583
     *
3584
     * @see        UTF8::is_utf8()
3585
     * @deprecated <p>please use "UTF8::is_utf8()"</p>
3586
     */
3587 17
    public static function isUtf8($str, bool $strict = false): bool
3588
    {
3589 17
        return self::is_utf8($str, $strict);
3590
    }
3591
3592
    /**
3593
     * Returns true if the string contains only alphabetic chars, false otherwise.
3594
     *
3595
     * @param string $str <p>The input string.</p>
3596
     *
3597
     * @psalm-pure
3598
     *
3599
     * @return bool
3600
     *              <p>Whether or not $str contains only alphabetic chars.</p>
3601
     */
3602 10
    public static function is_alpha(string $str): bool
3603
    {
3604 10
        if (self::$SUPPORT['mbstring'] === true) {
3605
            /** @noinspection PhpComposerExtensionStubsInspection */
3606 10
            return \mb_ereg_match('^[[:alpha:]]*$', $str);
3607
        }
3608
3609
        return self::str_matches_pattern($str, '^[[:alpha:]]*$');
3610
    }
3611
3612
    /**
3613
     * Returns true if the string contains only alphabetic and numeric chars, false otherwise.
3614
     *
3615
     * @param string $str <p>The input string.</p>
3616
     *
3617
     * @psalm-pure
3618
     *
3619
     * @return bool
3620
     *              <p>Whether or not $str contains only alphanumeric chars.</p>
3621
     */
3622 13
    public static function is_alphanumeric(string $str): bool
3623
    {
3624 13
        if (self::$SUPPORT['mbstring'] === true) {
3625
            /** @noinspection PhpComposerExtensionStubsInspection */
3626 13
            return \mb_ereg_match('^[[:alnum:]]*$', $str);
3627
        }
3628
3629
        return self::str_matches_pattern($str, '^[[:alnum:]]*$');
3630
    }
3631
3632
    /**
3633
     * Returns true if the string contains only punctuation chars, false otherwise.
3634
     *
3635
     * @param string $str <p>The input string.</p>
3636
     *
3637
     * @psalm-pure
3638
     *
3639
     * @return bool
3640
     *              <p>Whether or not $str contains only punctuation chars.</p>
3641
     */
3642 10
    public static function is_punctuation(string $str): bool
3643
    {
3644 10
        return self::str_matches_pattern($str, '^[[:punct:]]*$');
3645
    }
3646
3647
    /**
3648
     * Returns true if the string contains only printable (non-invisible) chars, false otherwise.
3649
     *
3650
     * @param string $str <p>The input string.</p>
3651
     *
3652
     * @psalm-pure
3653
     *
3654
     * @return bool
3655
     *              <p>Whether or not $str contains only printable (non-invisible) chars.</p>
3656
     */
3657 1
    public static function is_printable(string $str): bool
3658
    {
3659 1
        return self::remove_invisible_characters($str) === $str;
3660
    }
3661
3662
    /**
3663
     * Checks if a string is 7 bit ASCII.
3664
     *
3665
     * EXAMPLE: <code>UTF8::is_ascii('白'); // false</code>
3666
     *
3667
     * @param string $str <p>The string to check.</p>
3668
     *
3669
     * @psalm-pure
3670
     *
3671
     * @return bool
3672
     *              <p>
3673
     *              <strong>true</strong> if it is ASCII<br>
3674
     *              <strong>false</strong> otherwise
3675
     *              </p>
3676
     */
3677 8
    public static function is_ascii(string $str): bool
3678
    {
3679 8
        return ASCII::is_ascii($str);
3680
    }
3681
3682
    /**
3683
     * Returns true if the string is base64 encoded, false otherwise.
3684
     *
3685
     * EXAMPLE: <code>UTF8::is_base64('4KSu4KWL4KSo4KS/4KSa'); // true</code>
3686
     *
3687
     * @param string|null $str                   <p>The input string.</p>
3688
     * @param bool        $empty_string_is_valid [optional] <p>Is an empty string valid base64 or not?</p>
3689
     *
3690
     * @psalm-pure
3691
     *
3692
     * @return bool
3693
     *              <p>Whether or not $str is base64 encoded.</p>
3694
     */
3695 16
    public static function is_base64($str, bool $empty_string_is_valid = false): bool
3696
    {
3697
        if (
3698 16
            !$empty_string_is_valid
3699
            &&
3700 16
            $str === ''
3701
        ) {
3702 3
            return false;
3703
        }
3704
3705 15
        if (!\is_string($str)) {
3706 2
            return false;
3707
        }
3708
3709 15
        $base64String = \base64_decode($str, true);
3710
3711 15
        return $base64String !== false && \base64_encode($base64String) === $str;
3712
    }
3713
3714
    /**
3715
     * Check if the input is binary... (is look like a hack).
3716
     *
3717
     * EXAMPLE: <code>UTF8::is_binary(01); // true</code>
3718
     *
3719
     * @param int|string $input
3720
     * @param bool       $strict
3721
     *
3722
     * @psalm-pure
3723
     *
3724
     * @return bool
3725
     */
3726 40
    public static function is_binary($input, bool $strict = false): bool
3727
    {
3728 40
        $input = (string) $input;
3729 40
        if ($input === '') {
3730 10
            return false;
3731
        }
3732
3733 40
        if (\preg_match('~^[01]+$~', $input)) {
3734 13
            return true;
3735
        }
3736
3737 40
        $ext = self::get_file_type($input);
3738 40
        if ($ext['type'] === 'binary') {
3739 7
            return true;
3740
        }
3741
3742 39
        $test_length = \strlen($input);
3743 39
        $test_null_counting = \substr_count($input, "\x0", 0, $test_length);
3744 39
        if (($test_null_counting / $test_length) > 0.25) {
3745 15
            return true;
3746
        }
3747
3748 35
        if ($strict) {
3749 35
            if (self::$SUPPORT['finfo'] === false) {
3750
                throw new \RuntimeException('ext-fileinfo: is not installed');
3751
            }
3752
3753
            /**
3754
             * @noinspection   PhpComposerExtensionStubsInspection
3755
             * @psalm-suppress ImpureMethodCall - it will return the same result for the same file ...
3756
             */
3757 35
            $finfo_encoding = (new \finfo(\FILEINFO_MIME_ENCODING))->buffer($input);
3758 35
            if ($finfo_encoding && $finfo_encoding === 'binary') {
3759 15
                return true;
3760
            }
3761
        }
3762
3763 31
        return false;
3764
    }
3765
3766
    /**
3767
     * Check if the file is binary.
3768
     *
3769
     * EXAMPLE: <code>UTF8::is_binary('./utf32.txt'); // true</code>
3770
     *
3771
     * @param string $file
3772
     *
3773
     * @return bool
3774
     */
3775 6
    public static function is_binary_file($file): bool
3776
    {
3777
        // init
3778 6
        $block = '';
3779
3780 6
        $fp = \fopen($file, 'rb');
3781 6
        if (\is_resource($fp)) {
3782 6
            $block = \fread($fp, 512);
3783 6
            \fclose($fp);
3784
        }
3785
3786 6
        if ($block === '' || $block === false) {
3787 2
            return false;
3788
        }
3789
3790 6
        return self::is_binary($block, true);
3791
    }
3792
3793
    /**
3794
     * Returns true if the string contains only whitespace chars, false otherwise.
3795
     *
3796
     * @param string $str <p>The input string.</p>
3797
     *
3798
     * @psalm-pure
3799
     *
3800
     * @return bool
3801
     *              <p>Whether or not $str contains only whitespace characters.</p>
3802
     */
3803 15
    public static function is_blank(string $str): bool
3804
    {
3805 15
        if (self::$SUPPORT['mbstring'] === true) {
3806
            /** @noinspection PhpComposerExtensionStubsInspection */
3807 15
            return \mb_ereg_match('^[[:space:]]*$', $str);
3808
        }
3809
3810
        return self::str_matches_pattern($str, '^[[:space:]]*$');
3811
    }
3812
3813
    /**
3814
     * Checks if the given string is equal to any "Byte Order Mark".
3815
     *
3816
     * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
3817
     *
3818
     * EXAMPLE: <code>UTF8::is_bom("\xef\xbb\xbf"); // true</code>
3819
     *
3820
     * @param string $str <p>The input string.</p>
3821
     *
3822
     * @psalm-pure
3823
     *
3824
     * @return bool
3825
     *              <p><strong>true</strong> if the $utf8_chr is Byte Order Mark, <strong>false</strong> otherwise.</p>
3826
     */
3827 2
    public static function is_bom($str): bool
3828
    {
3829
        /** @noinspection PhpUnusedLocalVariableInspection */
3830 2
        foreach (self::$BOM as $bom_string => &$bom_byte_length) {
3831 2
            if ($str === $bom_string) {
3832 2
                return true;
3833
            }
3834
        }
3835
3836 2
        return false;
3837
    }
3838
3839
    /**
3840
     * Determine whether the string is considered to be empty.
3841
     *
3842
     * A variable is considered empty if it does not exist or if its value equals FALSE.
3843
     * empty() does not generate a warning if the variable does not exist.
3844
     *
3845
     * @param array|float|int|string $str
3846
     *
3847
     * @psalm-pure
3848
     *
3849
     * @return bool
3850
     *              <p>Whether or not $str is empty().</p>
3851
     */
3852 1
    public static function is_empty($str): bool
3853
    {
3854 1
        return empty($str);
3855
    }
3856
3857
    /**
3858
     * Returns true if the string contains only hexadecimal chars, false otherwise.
3859
     *
3860
     * @param string $str <p>The input string.</p>
3861
     *
3862
     * @psalm-pure
3863
     *
3864
     * @return bool
3865
     *              <p>Whether or not $str contains only hexadecimal chars.</p>
3866
     */
3867 13
    public static function is_hexadecimal(string $str): bool
3868
    {
3869 13
        if (self::$SUPPORT['mbstring'] === true) {
3870
            /** @noinspection PhpComposerExtensionStubsInspection */
3871 13
            return \mb_ereg_match('^[[:xdigit:]]*$', $str);
3872
        }
3873
3874
        return self::str_matches_pattern($str, '^[[:xdigit:]]*$');
3875
    }
3876
3877
    /**
3878
     * Check if the string contains any HTML tags.
3879
     *
3880
     * EXAMPLE: <code>UTF8::is_html('<b>lall</b>'); // true</code>
3881
     *
3882
     * @param string $str <p>The input string.</p>
3883
     *
3884
     * @psalm-pure
3885
     *
3886
     * @return bool
3887
     *              <p>Whether or not $str contains html elements.</p>
3888
     */
3889 3
    public static function is_html(string $str): bool
3890
    {
3891 3
        if ($str === '') {
3892 3
            return false;
3893
        }
3894
3895
        // init
3896 3
        $matches = [];
3897
3898 3
        $str = self::emoji_encode($str); // hack for emoji support :/
3899
3900 3
        \preg_match("/<\\/?\\w+(?:(?:\\s+\\w+(?:\\s*=\\s*(?:\".*?\"|'.*?'|[^'\">\\s]+))?)*\\s*|\\s*)\\/?>/u", $str, $matches);
3901
3902 3
        return $matches !== [];
3903
    }
3904
3905
    /**
3906
     * Check if $url is an correct url.
3907
     *
3908
     * @param string $url
3909
     * @param bool   $disallow_localhost
3910
     *
3911
     * @psalm-pure
3912
     *
3913
     * @return bool
3914
     */
3915 1
    public static function is_url(string $url, bool $disallow_localhost = false): bool
3916
    {
3917 1
        if ($url === '') {
3918 1
            return false;
3919
        }
3920
3921
        // WARNING: keep this as hack protection
3922 1
        if (!self::str_istarts_with_any($url, ['http://', 'https://'])) {
3923 1
            return false;
3924
        }
3925
3926
        // e.g. -> the server itself connect to "https://foo.localhost/phpmyadmin/...
3927 1
        if ($disallow_localhost) {
3928 1
            if (self::str_istarts_with_any(
3929 1
                $url,
3930
                [
3931 1
                    'http://localhost',
3932
                    'https://localhost',
3933
                    'http://127.0.0.1',
3934
                    'https://127.0.0.1',
3935
                    'http://::1',
3936
                    'https://::1',
3937
                ]
3938
            )) {
3939 1
                return false;
3940
            }
3941
3942 1
            $regex = '/^(?:http(?:s)?:\/\/).*?(?:\.localhost)/iu';
3943
            /** @noinspection BypassedUrlValidationInspection */
3944 1
            if (\preg_match($regex, $url)) {
3945 1
                return false;
3946
            }
3947
        }
3948
3949
        // INFO: this is needed for e.g. "http://müller.de/" (internationalized domain names) and non ASCII-parameters
3950
        /** @noinspection SuspiciousAssignmentsInspection - false-positive - https://github.com/kalessil/phpinspectionsea/issues/1500 */
3951 1
        $regex = '/^(?:http(?:s)?:\\/\\/)(?:[\p{L}0-9][\p{L}0-9_-]*(?:\\.[\p{L}0-9][\p{L}0-9_-]*))(?:\\d+)?(?:\\/\\.*)?/iu';
3952
        /** @noinspection BypassedUrlValidationInspection */
3953 1
        if (\preg_match($regex, $url)) {
3954 1
            return true;
3955
        }
3956
3957
        /** @noinspection BypassedUrlValidationInspection */
3958 1
        return \filter_var($url, \FILTER_VALIDATE_URL) !== false;
3959
    }
3960
3961
    /**
3962
     * Try to check if "$str" is a JSON-string.
3963
     *
3964
     * EXAMPLE: <code>UTF8::is_json('{"array":[1,"¥","ä"]}'); // true</code>
3965
     *
3966
     * @param string $str                                    <p>The input string.</p>
3967
     * @param bool   $only_array_or_object_results_are_valid [optional] <p>Only array and objects are valid json
3968
     *                                                       results.</p>
3969
     *
3970
     * @return bool
3971
     *              <p>Whether or not the $str is in JSON format.</p>
3972
     */
3973 42
    public static function is_json(string $str, bool $only_array_or_object_results_are_valid = true): bool
3974
    {
3975 42
        if ($str === '') {
3976 4
            return false;
3977
        }
3978
3979 40
        if (self::$SUPPORT['json'] === false) {
3980
            throw new \RuntimeException('ext-json: is not installed');
3981
        }
3982
3983 40
        $jsonOrNull = self::json_decode($str);
3984 40
        if ($jsonOrNull === null && \strtoupper($str) !== 'NULL') {
3985 18
            return false;
3986
        }
3987
3988
        if (
3989 24
            $only_array_or_object_results_are_valid
3990
            &&
3991 24
            !\is_object($jsonOrNull)
3992
            &&
3993 24
            !\is_array($jsonOrNull)
3994
        ) {
3995 5
            return false;
3996
        }
3997
3998
        /** @noinspection PhpComposerExtensionStubsInspection */
3999 19
        return \json_last_error() === \JSON_ERROR_NONE;
4000
    }
4001
4002
    /**
4003
     * @param string $str <p>The input string.</p>
4004
     *
4005
     * @psalm-pure
4006
     *
4007
     * @return bool
4008
     *              <p>Whether or not $str contains only lowercase chars.</p>
4009
     */
4010 8
    public static function is_lowercase(string $str): bool
4011
    {
4012 8
        if (self::$SUPPORT['mbstring'] === true) {
4013
            /** @noinspection PhpComposerExtensionStubsInspection */
4014 8
            return \mb_ereg_match('^[[:lower:]]*$', $str);
4015
        }
4016
4017
        return self::str_matches_pattern($str, '^[[:lower:]]*$');
4018
    }
4019
4020
    /**
4021
     * Returns true if the string is serialized, false otherwise.
4022
     *
4023
     * @param string $str <p>The input string.</p>
4024
     *
4025
     * @psalm-pure
4026
     *
4027
     * @return bool
4028
     *              <p>Whether or not $str is serialized.</p>
4029
     */
4030 7
    public static function is_serialized(string $str): bool
4031
    {
4032 7
        if ($str === '') {
4033 1
            return false;
4034
        }
4035
4036
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
4037
        /** @noinspection UnserializeExploitsInspection */
4038 6
        return $str === 'b:0;'
4039
               ||
4040 6
               @\unserialize($str) !== false;
4041
    }
4042
4043
    /**
4044
     * Returns true if the string contains only lower case chars, false
4045
     * otherwise.
4046
     *
4047
     * @param string $str <p>The input string.</p>
4048
     *
4049
     * @psalm-pure
4050
     *
4051
     * @return bool
4052
     *              <p>Whether or not $str contains only lower case characters.</p>
4053
     */
4054 8
    public static function is_uppercase(string $str): bool
4055
    {
4056 8
        if (self::$SUPPORT['mbstring'] === true) {
4057
            /** @noinspection PhpComposerExtensionStubsInspection */
4058 8
            return \mb_ereg_match('^[[:upper:]]*$', $str);
4059
        }
4060
4061
        return self::str_matches_pattern($str, '^[[:upper:]]*$');
4062
    }
4063
4064
    /**
4065
     * Check if the string is UTF-16.
4066
     *
4067
     * EXAMPLE: <code>
4068
     * UTF8::is_utf16(file_get_contents('utf-16-le.txt')); // 1
4069
     * //
4070
     * UTF8::is_utf16(file_get_contents('utf-16-be.txt')); // 2
4071
     * //
4072
     * UTF8::is_utf16(file_get_contents('utf-8.txt')); // false
4073
     * </code>
4074
     *
4075
     * @param string $str                       <p>The input string.</p>
4076
     * @param bool   $check_if_string_is_binary
4077
     *
4078
     * @psalm-pure
4079
     *
4080
     * @return false|int
4081
     *                   <strong>false</strong> if is't not UTF-16,<br>
4082
     *                   <strong>1</strong> for UTF-16LE,<br>
4083
     *                   <strong>2</strong> for UTF-16BE
4084
     */
4085 22
    public static function is_utf16($str, bool $check_if_string_is_binary = true)
4086
    {
4087
        // init
4088 22
        $str = (string) $str;
4089 22
        $str_chars = [];
4090
4091
        if (
4092 22
            $check_if_string_is_binary
4093
            &&
4094 22
            !self::is_binary($str, true)
4095
        ) {
4096 2
            return false;
4097
        }
4098
4099 22
        if (self::$SUPPORT['mbstring'] === false) {
4100
            /**
4101
             * @psalm-suppress ImpureFunctionCall - is is only a warning
4102
             */
4103 3
            \trigger_error('UTF8::is_utf16() without mbstring may did not work correctly', \E_USER_WARNING);
4104
        }
4105
4106 22
        $str = self::remove_bom($str);
4107
4108 22
        $maybe_utf16le = 0;
4109 22
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
4110 22
        if ($test) {
4111 15
            $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
4112 15
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
4113 15
            if ($test3 === $test) {
4114
                /**
4115
                 * @psalm-suppress RedundantCondition
4116
                 */
4117 15
                if ($str_chars === []) {
4118 15
                    $str_chars = self::count_chars($str, true, false);
4119
                }
4120 15
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
4121 15
                    if (\in_array($test3char, $str_chars, true)) {
4122 15
                        ++$maybe_utf16le;
4123
                    }
4124
                }
4125 15
                unset($test3charEmpty);
4126
            }
4127
        }
4128
4129 22
        $maybe_utf16be = 0;
4130 22
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
4131 22
        if ($test) {
4132 15
            $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
4133 15
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
4134 15
            if ($test3 === $test) {
4135 15
                if ($str_chars === []) {
4136 7
                    $str_chars = self::count_chars($str, true, false);
4137
                }
4138 15
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
4139 15
                    if (\in_array($test3char, $str_chars, true)) {
4140 15
                        ++$maybe_utf16be;
4141
                    }
4142
                }
4143 15
                unset($test3charEmpty);
4144
            }
4145
        }
4146
4147 22
        if ($maybe_utf16be !== $maybe_utf16le) {
4148 7
            if ($maybe_utf16le > $maybe_utf16be) {
4149 5
                return 1;
4150
            }
4151
4152 6
            return 2;
4153
        }
4154
4155 18
        return false;
4156
    }
4157
4158
    /**
4159
     * Check if the string is UTF-32.
4160
     *
4161
     * EXAMPLE: <code>
4162
     * UTF8::is_utf32(file_get_contents('utf-32-le.txt')); // 1
4163
     * //
4164
     * UTF8::is_utf32(file_get_contents('utf-32-be.txt')); // 2
4165
     * //
4166
     * UTF8::is_utf32(file_get_contents('utf-8.txt')); // false
4167
     * </code>
4168
     *
4169
     * @param string $str                       <p>The input string.</p>
4170
     * @param bool   $check_if_string_is_binary
4171
     *
4172
     * @psalm-pure
4173
     *
4174
     * @return false|int
4175
     *                   <strong>false</strong> if is't not UTF-32,<br>
4176
     *                   <strong>1</strong> for UTF-32LE,<br>
4177
     *                   <strong>2</strong> for UTF-32BE
4178
     */
4179 20
    public static function is_utf32($str, bool $check_if_string_is_binary = true)
4180
    {
4181
        // init
4182 20
        $str = (string) $str;
4183 20
        $str_chars = [];
4184
4185
        if (
4186 20
            $check_if_string_is_binary
4187
            &&
4188 20
            !self::is_binary($str, true)
4189
        ) {
4190 2
            return false;
4191
        }
4192
4193 20
        if (self::$SUPPORT['mbstring'] === false) {
4194
            /**
4195
             * @psalm-suppress ImpureFunctionCall - is is only a warning
4196
             */
4197 3
            \trigger_error('UTF8::is_utf32() without mbstring may did not work correctly', \E_USER_WARNING);
4198
        }
4199
4200 20
        $str = self::remove_bom($str);
4201
4202 20
        $maybe_utf32le = 0;
4203 20
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
4204 20
        if ($test) {
4205 13
            $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
4206 13
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
4207 13
            if ($test3 === $test) {
4208
                /**
4209
                 * @psalm-suppress RedundantCondition
4210
                 */
4211 13
                if ($str_chars === []) {
4212 13
                    $str_chars = self::count_chars($str, true, false);
4213
                }
4214 13
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
4215 13
                    if (\in_array($test3char, $str_chars, true)) {
4216 13
                        ++$maybe_utf32le;
4217
                    }
4218
                }
4219 13
                unset($test3charEmpty);
4220
            }
4221
        }
4222
4223 20
        $maybe_utf32be = 0;
4224 20
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
4225 20
        if ($test) {
4226 13
            $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
4227 13
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
4228 13
            if ($test3 === $test) {
4229 13
                if ($str_chars === []) {
4230 7
                    $str_chars = self::count_chars($str, true, false);
4231
                }
4232 13
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
4233 13
                    if (\in_array($test3char, $str_chars, true)) {
4234 13
                        ++$maybe_utf32be;
4235
                    }
4236
                }
4237 13
                unset($test3charEmpty);
4238
            }
4239
        }
4240
4241 20
        if ($maybe_utf32be !== $maybe_utf32le) {
4242 3
            if ($maybe_utf32le > $maybe_utf32be) {
4243 2
                return 1;
4244
            }
4245
4246 3
            return 2;
4247
        }
4248
4249 20
        return false;
4250
    }
4251
4252
    /**
4253
     * Checks whether the passed input contains only byte sequences that appear valid UTF-8.
4254
     *
4255
     * EXAMPLE: <code>
4256
     * UTF8::is_utf8(['Iñtërnâtiônàlizætiøn', 'foo']); // true
4257
     * //
4258
     * UTF8::is_utf8(["Iñtërnâtiônàlizætiøn\xA0\xA1", 'bar']); // false
4259
     * </code>
4260
     *
4261
     * @param int|string|string[]|null $str    <p>The input to be checked.</p>
4262
     * @param bool                     $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
4263
     *
4264
     * @psalm-pure
4265
     *
4266
     * @return bool
4267
     */
4268 83
    public static function is_utf8($str, bool $strict = false): bool
4269
    {
4270 83
        if (\is_array($str)) {
4271 2
            foreach ($str as &$v) {
4272 2
                if (!self::is_utf8($v, $strict)) {
4273 2
                    return false;
4274
                }
4275
            }
4276
4277
            return true;
4278
        }
4279
4280 83
        return self::is_utf8_string((string) $str, $strict);
4281
    }
4282
4283
    /**
4284
     * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
4285
     * Decodes a JSON string
4286
     *
4287
     * EXAMPLE: <code>UTF8::json_decode('[1,"\u00a5","\u00e4"]'); // array(1, '¥', 'ä')</code>
4288
     *
4289
     * @see http://php.net/manual/en/function.json-decode.php
4290
     *
4291
     * @param string $json    <p>
4292
     *                        The <i>json</i> string being decoded.
4293
     *                        </p>
4294
     *                        <p>
4295
     *                        This function only works with UTF-8 encoded strings.
4296
     *                        </p>
4297
     *                        <p>PHP implements a superset of
4298
     *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
4299
     *                        only supports these values when they are nested inside an array or an object.
4300
     *                        </p>
4301
     * @param bool   $assoc   [optional] <p>
4302
     *                        When <b>TRUE</b>, returned objects will be converted into
4303
     *                        associative arrays.
4304
     *                        </p>
4305
     * @param int    $depth   [optional] <p>
4306
     *                        User specified recursion depth.
4307
     *                        </p>
4308
     * @param int    $options [optional] <p>
4309
     *                        Bitmask of JSON decode options. Currently only
4310
     *                        <b>JSON_BIGINT_AS_STRING</b>
4311
     *                        is supported (default is to cast large integers as floats)
4312
     *                        </p>
4313
     *
4314
     * @psalm-pure
4315
     *
4316
     * @return mixed
4317
     *               <p>The value encoded in <i>json</i> in appropriate PHP type. Values true, false and
4318
     *               null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b> and <b>NULL</b> respectively.
4319
     *               <b>NULL</b> is returned if the <i>json</i> cannot be decoded or if the encoded data
4320
     *               is deeper than the recursion limit.</p>
4321
     */
4322 43
    public static function json_decode(
4323
        string $json,
4324
        bool $assoc = false,
4325
        int $depth = 512,
4326
        int $options = 0
4327
    ) {
4328 43
        $json = self::filter($json);
4329
4330 43
        if (self::$SUPPORT['json'] === false) {
4331
            throw new \RuntimeException('ext-json: is not installed');
4332
        }
4333
4334
        /** @noinspection PhpComposerExtensionStubsInspection */
4335 43
        return \json_decode($json, $assoc, $depth, $options);
4336
    }
4337
4338
    /**
4339
     * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
4340
     * Returns the JSON representation of a value.
4341
     *
4342
     * EXAMPLE: <code>UTF8::json_enocde(array(1, '¥', 'ä')); // '[1,"\u00a5","\u00e4"]'</code>
4343
     *
4344
     * @see http://php.net/manual/en/function.json-encode.php
4345
     *
4346
     * @param mixed $value   <p>
4347
     *                       The <i>value</i> being encoded. Can be any type except
4348
     *                       a resource.
4349
     *                       </p>
4350
     *                       <p>
4351
     *                       All string data must be UTF-8 encoded.
4352
     *                       </p>
4353
     *                       <p>PHP implements a superset of
4354
     *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
4355
     *                       only supports these values when they are nested inside an array or an object.
4356
     *                       </p>
4357
     * @param int   $options [optional] <p>
4358
     *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
4359
     *                       <b>JSON_HEX_TAG</b>,
4360
     *                       <b>JSON_HEX_AMP</b>,
4361
     *                       <b>JSON_HEX_APOS</b>,
4362
     *                       <b>JSON_NUMERIC_CHECK</b>,
4363
     *                       <b>JSON_PRETTY_PRINT</b>,
4364
     *                       <b>JSON_UNESCAPED_SLASHES</b>,
4365
     *                       <b>JSON_FORCE_OBJECT</b>,
4366
     *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
4367
     *                       constants is described on
4368
     *                       the JSON constants page.
4369
     *                       </p>
4370
     * @param int   $depth   [optional] <p>
4371
     *                       Set the maximum depth. Must be greater than zero.
4372
     *                       </p>
4373
     *
4374
     * @psalm-pure
4375
     *
4376
     * @return false|string
4377
     *                      A JSON encoded <strong>string</strong> on success or<br>
4378
     *                      <strong>FALSE</strong> on failure
4379
     */
4380 5
    public static function json_encode($value, int $options = 0, int $depth = 512)
4381
    {
4382 5
        $value = self::filter($value);
4383
4384 5
        if (self::$SUPPORT['json'] === false) {
4385
            throw new \RuntimeException('ext-json: is not installed');
4386
        }
4387
4388
        /** @noinspection PhpComposerExtensionStubsInspection */
4389 5
        return \json_encode($value, $options, $depth);
4390
    }
4391
4392
    /**
4393
     * Checks whether JSON is available on the server.
4394
     *
4395
     * @psalm-pure
4396
     *
4397
     * @return bool
4398
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
4399
     */
4400
    public static function json_loaded(): bool
4401
    {
4402
        return \function_exists('json_decode');
4403
    }
4404
4405
    /**
4406
     * Makes string's first char lowercase.
4407
     *
4408
     * EXAMPLE: <code>UTF8::lcfirst('ÑTËRNÂTIÔNÀLIZÆTIØN'); // ñTËRNÂTIÔNÀLIZÆTIØN</code>
4409
     *
4410
     * @param string      $str                           <p>The input string</p>
4411
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
4412
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
4413
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
4414
     *                                                   tr</p>
4415
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
4416
     *                                                   -> ß</p>
4417
     *
4418
     * @psalm-pure
4419
     *
4420
     * @return string the resulting string
4421
     */
4422 46
    public static function lcfirst(
4423
        string $str,
4424
        string $encoding = 'UTF-8',
4425
        bool $clean_utf8 = false,
4426
        string $lang = null,
4427
        bool $try_to_keep_the_string_length = false
4428
    ): string {
4429 46
        if ($clean_utf8) {
4430
            $str = self::clean($str);
4431
        }
4432
4433 46
        $use_mb_functions = ($lang === null && !$try_to_keep_the_string_length);
4434
4435 46
        if ($encoding === 'UTF-8') {
4436 43
            $str_part_two = (string) \mb_substr($str, 1);
4437
4438 43
            if ($use_mb_functions) {
4439 43
                $str_part_one = \mb_strtolower(
4440 43
                    (string) \mb_substr($str, 0, 1)
4441
                );
4442
            } else {
4443
                $str_part_one = self::strtolower(
4444
                    (string) \mb_substr($str, 0, 1),
4445
                    $encoding,
4446
                    false,
4447
                    $lang,
4448 43
                    $try_to_keep_the_string_length
4449
                );
4450
            }
4451
        } else {
4452 3
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4453
4454 3
            $str_part_two = (string) self::substr($str, 1, null, $encoding);
4455
4456 3
            $str_part_one = self::strtolower(
4457 3
                (string) self::substr($str, 0, 1, $encoding),
4458 3
                $encoding,
4459 3
                false,
4460 3
                $lang,
4461 3
                $try_to_keep_the_string_length
4462
            );
4463
        }
4464
4465 46
        return $str_part_one . $str_part_two;
4466
    }
4467
4468
    /**
4469
     * alias for "UTF8::lcfirst()"
4470
     *
4471
     * @param string      $str
4472
     * @param string      $encoding
4473
     * @param bool        $clean_utf8
4474
     * @param string|null $lang
4475
     * @param bool        $try_to_keep_the_string_length
4476
     *
4477
     * @psalm-pure
4478
     *
4479
     * @return string
4480
     *
4481
     * @see        UTF8::lcfirst()
4482
     * @deprecated <p>please use "UTF8::lcfirst()"</p>
4483
     */
4484 2
    public static function lcword(
4485
        string $str,
4486
        string $encoding = 'UTF-8',
4487
        bool $clean_utf8 = false,
4488
        string $lang = null,
4489
        bool $try_to_keep_the_string_length = false
4490
    ): string {
4491 2
        return self::lcfirst(
4492 2
            $str,
4493 2
            $encoding,
4494 2
            $clean_utf8,
4495 2
            $lang,
4496 2
            $try_to_keep_the_string_length
4497
        );
4498
    }
4499
4500
    /**
4501
     * Lowercase for all words in the string.
4502
     *
4503
     * @param string      $str                           <p>The input string.</p>
4504
     * @param string[]    $exceptions                    [optional] <p>Exclusion for some words.</p>
4505
     * @param string      $char_list                     [optional] <p>Additional chars that contains to words and do
4506
     *                                                   not start a new word.</p>
4507
     * @param string      $encoding                      [optional] <p>Set the charset.</p>
4508
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
4509
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
4510
     *                                                   tr</p>
4511
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
4512
     *                                                   -> ß</p>
4513
     *
4514
     * @psalm-pure
4515
     *
4516
     * @return string
4517
     */
4518 2
    public static function lcwords(
4519
        string $str,
4520
        array $exceptions = [],
4521
        string $char_list = '',
4522
        string $encoding = 'UTF-8',
4523
        bool $clean_utf8 = false,
4524
        string $lang = null,
4525
        bool $try_to_keep_the_string_length = false
4526
    ): string {
4527 2
        if (!$str) {
4528 2
            return '';
4529
        }
4530
4531 2
        $words = self::str_to_words($str, $char_list);
4532 2
        $use_exceptions = $exceptions !== [];
4533
4534 2
        $words_str = '';
4535 2
        foreach ($words as &$word) {
4536 2
            if (!$word) {
4537 2
                continue;
4538
            }
4539
4540
            if (
4541 2
                !$use_exceptions
4542
                ||
4543 2
                !\in_array($word, $exceptions, true)
4544
            ) {
4545 2
                $words_str .= self::lcfirst($word, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
4546
            } else {
4547 2
                $words_str .= $word;
4548
            }
4549
        }
4550
4551 2
        return $words_str;
4552
    }
4553
4554
    /**
4555
     * alias for "UTF8::lcfirst()"
4556
     *
4557
     * @param string      $str
4558
     * @param string      $encoding
4559
     * @param bool        $clean_utf8
4560
     * @param string|null $lang
4561
     * @param bool        $try_to_keep_the_string_length
4562
     *
4563
     * @psalm-pure
4564
     *
4565
     * @return string
4566
     *
4567
     * @see        UTF8::lcfirst()
4568
     * @deprecated <p>please use "UTF8::lcfirst()"</p>
4569
     */
4570 5
    public static function lowerCaseFirst(
4571
        string $str,
4572
        string $encoding = 'UTF-8',
4573
        bool $clean_utf8 = false,
4574
        string $lang = null,
4575
        bool $try_to_keep_the_string_length = false
4576
    ): string {
4577 5
        return self::lcfirst(
4578 5
            $str,
4579 5
            $encoding,
4580 5
            $clean_utf8,
4581 5
            $lang,
4582 5
            $try_to_keep_the_string_length
4583
        );
4584
    }
4585
4586
    /**
4587
     * Strip whitespace or other characters from the beginning of a UTF-8 string.
4588
     *
4589
     * EXAMPLE: <code>UTF8::ltrim(' 中文空白  '); // '中文空白  '</code>
4590
     *
4591
     * @param string      $str   <p>The string to be trimmed</p>
4592
     * @param string|null $chars <p>Optional characters to be stripped</p>
4593
     *
4594
     * @psalm-pure
4595
     *
4596
     * @return string the string with unwanted characters stripped from the left
4597
     */
4598 22
    public static function ltrim(string $str = '', string $chars = null): string
4599
    {
4600 22
        if ($str === '') {
4601 3
            return '';
4602
        }
4603
4604 21
        if (self::$SUPPORT['mbstring'] === true) {
4605 21
            if ($chars) {
4606
                /** @noinspection PregQuoteUsageInspection */
4607 10
                $chars = \preg_quote($chars);
4608 10
                $pattern = "^[${chars}]+";
4609
            } else {
4610 14
                $pattern = '^[\\s]+';
4611
            }
4612
4613
            /** @noinspection PhpComposerExtensionStubsInspection */
4614 21
            return (string) \mb_ereg_replace($pattern, '', $str);
4615
        }
4616
4617
        if ($chars) {
4618
            $chars = \preg_quote($chars, '/');
4619
            $pattern = "^[${chars}]+";
4620
        } else {
4621
            $pattern = '^[\\s]+';
4622
        }
4623
4624
        return self::regex_replace($str, $pattern, '');
4625
    }
4626
4627
    /**
4628
     * Returns the UTF-8 character with the maximum code point in the given data.
4629
     *
4630
     * EXAMPLE: <code>UTF8::max('abc-äöü-中文空白'); // 'ø'</code>
4631
     *
4632
     * @param array<string>|string $arg <p>A UTF-8 encoded string or an array of such strings.</p>
4633
     *
4634
     * @psalm-pure
4635
     *
4636
     * @return string|null the character with the highest code point than others, returns null on failure or empty input
4637
     */
4638 2
    public static function max($arg)
4639
    {
4640 2
        if (\is_array($arg)) {
4641 2
            $arg = \implode('', $arg);
4642
        }
4643
4644 2
        $codepoints = self::codepoints($arg);
4645 2
        if ($codepoints === []) {
4646 2
            return null;
4647
        }
4648
4649 2
        $codepoint_max = \max($codepoints);
4650
4651 2
        return self::chr((int) $codepoint_max);
4652
    }
4653
4654
    /**
4655
     * Calculates and returns the maximum number of bytes taken by any
4656
     * UTF-8 encoded character in the given string.
4657
     *
4658
     * EXAMPLE: <code>UTF8::max_chr_width('Intërnâtiônàlizætiøn'); // 2</code>
4659
     *
4660
     * @param string $str <p>The original Unicode string.</p>
4661
     *
4662
     * @psalm-pure
4663
     *
4664
     * @return int
4665
     *             <p>Max byte lengths of the given chars.</p>
4666
     */
4667 2
    public static function max_chr_width(string $str): int
4668
    {
4669 2
        $bytes = self::chr_size_list($str);
4670 2
        if ($bytes !== []) {
4671 2
            return (int) \max($bytes);
4672
        }
4673
4674 2
        return 0;
4675
    }
4676
4677
    /**
4678
     * Checks whether mbstring is available on the server.
4679
     *
4680
     * @psalm-pure
4681
     *
4682
     * @return bool
4683
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
4684
     */
4685 26
    public static function mbstring_loaded(): bool
4686
    {
4687 26
        return \extension_loaded('mbstring');
4688
    }
4689
4690
    /**
4691
     * Returns the UTF-8 character with the minimum code point in the given data.
4692
     *
4693
     * EXAMPLE: <code>UTF8::min('abc-äöü-中文空白'); // '-'</code>
4694
     *
4695
     * @param string|string[] $arg <strong>A UTF-8 encoded string or an array of such strings.</strong>
4696
     *
4697
     * @psalm-pure
4698
     *
4699
     * @return string|null
4700
     *                     <p>The character with the lowest code point than others, returns null on failure or empty input.</p>
4701
     */
4702 2
    public static function min($arg)
4703
    {
4704 2
        if (\is_array($arg)) {
4705 2
            $arg = \implode('', $arg);
4706
        }
4707
4708 2
        $codepoints = self::codepoints($arg);
4709 2
        if ($codepoints === []) {
4710 2
            return null;
4711
        }
4712
4713 2
        $codepoint_min = \min($codepoints);
4714
4715 2
        return self::chr((int) $codepoint_min);
4716
    }
4717
4718
    /**
4719
     * alias for "UTF8::normalize_encoding()"
4720
     *
4721
     * @param mixed $encoding
4722
     * @param mixed $fallback
4723
     *
4724
     * @psalm-pure
4725
     *
4726
     * @return mixed
4727
     *
4728
     * @see        UTF8::normalize_encoding()
4729
     * @deprecated <p>please use "UTF8::normalize_encoding()"</p>
4730
     */
4731 2
    public static function normalizeEncoding($encoding, $fallback = '')
4732
    {
4733 2
        return self::normalize_encoding($encoding, $fallback);
4734
    }
4735
4736
    /**
4737
     * Normalize the encoding-"name" input.
4738
     *
4739
     * EXAMPLE: <code>UTF8::normalize_encoding('UTF8'); // 'UTF-8'</code>
4740
     *
4741
     * @param mixed $encoding <p>e.g.: ISO, UTF8, WINDOWS-1251 etc.</p>
4742
     * @param mixed $fallback <p>e.g.: UTF-8</p>
4743
     *
4744
     * @psalm-pure
4745
     *
4746
     * @return mixed|string
4747
     *                      <p>e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.<br>Will return a empty string as fallback (by default)</p>
4748
     *
4749
     * @template TNormalizeEncodingFallback
4750
     * @psalm-param string|TNormalizeEncodingFallback $fallback
4751
     * @psalm-return string|TNormalizeEncodingFallback
4752
     */
4753 339
    public static function normalize_encoding($encoding, $fallback = '')
4754
    {
4755
        /**
4756
         * @psalm-suppress ImpureStaticVariable
4757
         *
4758
         * @var array<string,string>
4759
         */
4760 339
        static $STATIC_NORMALIZE_ENCODING_CACHE = [];
4761
4762
        // init
4763 339
        $encoding = (string) $encoding;
4764
4765 339
        if (!$encoding) {
4766 290
            return $fallback;
4767
        }
4768
4769
        if (
4770 53
            $encoding === 'UTF-8'
4771
            ||
4772 53
            $encoding === 'UTF8'
4773
        ) {
4774 29
            return 'UTF-8';
4775
        }
4776
4777
        if (
4778 44
            $encoding === '8BIT'
4779
            ||
4780 44
            $encoding === 'BINARY'
4781
        ) {
4782
            return 'CP850';
4783
        }
4784
4785
        if (
4786 44
            $encoding === 'HTML'
4787
            ||
4788 44
            $encoding === 'HTML-ENTITIES'
4789
        ) {
4790 2
            return 'HTML-ENTITIES';
4791
        }
4792
4793
        if (
4794 44
            $encoding === 'ISO'
4795
            ||
4796 44
            $encoding === 'ISO-8859-1'
4797
        ) {
4798 41
            return 'ISO-8859-1';
4799
        }
4800
4801
        if (
4802 11
            $encoding === '1' // only a fallback, for non "strict_types" usage ...
4803
            ||
4804 11
            $encoding === '0' // only a fallback, for non "strict_types" usage ...
4805
        ) {
4806
            return $fallback;
4807
        }
4808
4809 11
        if (isset($STATIC_NORMALIZE_ENCODING_CACHE[$encoding])) {
4810 8
            return $STATIC_NORMALIZE_ENCODING_CACHE[$encoding];
4811
        }
4812
4813 5
        if (self::$ENCODINGS === null) {
4814 1
            self::$ENCODINGS = self::getData('encodings');
4815
        }
4816
4817 5
        if (\in_array($encoding, self::$ENCODINGS, true)) {
4818 3
            $STATIC_NORMALIZE_ENCODING_CACHE[$encoding] = $encoding;
4819
4820 3
            return $encoding;
4821
        }
4822
4823 4
        $encoding_original = $encoding;
4824 4
        $encoding = \strtoupper($encoding);
4825 4
        $encoding_upper_helper = (string) \preg_replace('/[^a-zA-Z0-9]/u', '', $encoding);
4826
4827
        $equivalences = [
4828 4
            'ISO8859'     => 'ISO-8859-1',
4829
            'ISO88591'    => 'ISO-8859-1',
4830
            'ISO'         => 'ISO-8859-1',
4831
            'LATIN'       => 'ISO-8859-1',
4832
            'LATIN1'      => 'ISO-8859-1', // Western European
4833
            'ISO88592'    => 'ISO-8859-2',
4834
            'LATIN2'      => 'ISO-8859-2', // Central European
4835
            'ISO88593'    => 'ISO-8859-3',
4836
            'LATIN3'      => 'ISO-8859-3', // Southern European
4837
            'ISO88594'    => 'ISO-8859-4',
4838
            'LATIN4'      => 'ISO-8859-4', // Northern European
4839
            'ISO88595'    => 'ISO-8859-5',
4840
            'ISO88596'    => 'ISO-8859-6', // Greek
4841
            'ISO88597'    => 'ISO-8859-7',
4842
            'ISO88598'    => 'ISO-8859-8', // Hebrew
4843
            'ISO88599'    => 'ISO-8859-9',
4844
            'LATIN5'      => 'ISO-8859-9', // Turkish
4845
            'ISO885911'   => 'ISO-8859-11',
4846
            'TIS620'      => 'ISO-8859-11', // Thai
4847
            'ISO885910'   => 'ISO-8859-10',
4848
            'LATIN6'      => 'ISO-8859-10', // Nordic
4849
            'ISO885913'   => 'ISO-8859-13',
4850
            'LATIN7'      => 'ISO-8859-13', // Baltic
4851
            'ISO885914'   => 'ISO-8859-14',
4852
            'LATIN8'      => 'ISO-8859-14', // Celtic
4853
            'ISO885915'   => 'ISO-8859-15',
4854
            'LATIN9'      => 'ISO-8859-15', // Western European (with some extra chars e.g. €)
4855
            'ISO885916'   => 'ISO-8859-16',
4856
            'LATIN10'     => 'ISO-8859-16', // Southeast European
4857
            'CP1250'      => 'WINDOWS-1250',
4858
            'WIN1250'     => 'WINDOWS-1250',
4859
            'WINDOWS1250' => 'WINDOWS-1250',
4860
            'CP1251'      => 'WINDOWS-1251',
4861
            'WIN1251'     => 'WINDOWS-1251',
4862
            'WINDOWS1251' => 'WINDOWS-1251',
4863
            'CP1252'      => 'WINDOWS-1252',
4864
            'WIN1252'     => 'WINDOWS-1252',
4865
            'WINDOWS1252' => 'WINDOWS-1252',
4866
            'CP1253'      => 'WINDOWS-1253',
4867
            'WIN1253'     => 'WINDOWS-1253',
4868
            'WINDOWS1253' => 'WINDOWS-1253',
4869
            'CP1254'      => 'WINDOWS-1254',
4870
            'WIN1254'     => 'WINDOWS-1254',
4871
            'WINDOWS1254' => 'WINDOWS-1254',
4872
            'CP1255'      => 'WINDOWS-1255',
4873
            'WIN1255'     => 'WINDOWS-1255',
4874
            'WINDOWS1255' => 'WINDOWS-1255',
4875
            'CP1256'      => 'WINDOWS-1256',
4876
            'WIN1256'     => 'WINDOWS-1256',
4877
            'WINDOWS1256' => 'WINDOWS-1256',
4878
            'CP1257'      => 'WINDOWS-1257',
4879
            'WIN1257'     => 'WINDOWS-1257',
4880
            'WINDOWS1257' => 'WINDOWS-1257',
4881
            'CP1258'      => 'WINDOWS-1258',
4882
            'WIN1258'     => 'WINDOWS-1258',
4883
            'WINDOWS1258' => 'WINDOWS-1258',
4884
            'UTF16'       => 'UTF-16',
4885
            'UTF32'       => 'UTF-32',
4886
            'UTF8'        => 'UTF-8',
4887
            'UTF'         => 'UTF-8',
4888
            'UTF7'        => 'UTF-7',
4889
            '8BIT'        => 'CP850',
4890
            'BINARY'      => 'CP850',
4891
        ];
4892
4893 4
        if (!empty($equivalences[$encoding_upper_helper])) {
4894 3
            $encoding = $equivalences[$encoding_upper_helper];
4895
        }
4896
4897 4
        $STATIC_NORMALIZE_ENCODING_CACHE[$encoding_original] = $encoding;
4898
4899 4
        return $encoding;
4900
    }
4901
4902
    /**
4903
     * Standardize line ending to unix-like.
4904
     *
4905
     * @param string          $str      <p>The input string.</p>
4906
     * @param string|string[] $replacer <p>The replacer char e.g. "\n" (Linux) or "\r\n" (Windows). You can also use \PHP_EOL
4907
     *                                  here.</p>
4908
     *
4909
     * @psalm-pure
4910
     *
4911
     * @return string
4912
     *                <p>A string with normalized line ending.</p>
4913
     */
4914 5
    public static function normalize_line_ending(string $str, $replacer = "\n"): string
4915
    {
4916 5
        return \str_replace(["\r\n", "\r", "\n"], $replacer, $str);
4917
    }
4918
4919
    /**
4920
     * Normalize some MS Word special characters.
4921
     *
4922
     * EXAMPLE: <code>UTF8::normalize_msword('„Abcdef…”'); // '"Abcdef..."'</code>
4923
     *
4924
     * @param string $str <p>The string to be normalized.</p>
4925
     *
4926
     * @psalm-pure
4927
     *
4928
     * @return string
4929
     *                <p>A string with normalized characters for commonly used chars in Word documents.</p>
4930
     */
4931 10
    public static function normalize_msword(string $str): string
4932
    {
4933 10
        return ASCII::normalize_msword($str);
4934
    }
4935
4936
    /**
4937
     * Normalize the whitespace.
4938
     *
4939
     * EXAMPLE: <code>UTF8::normalize_whitespace("abc-\xc2\xa0-öäü-\xe2\x80\xaf-\xE2\x80\xAC", true); // "abc-\xc2\xa0-öäü- -"</code>
4940
     *
4941
     * @param string $str                        <p>The string to be normalized.</p>
4942
     * @param bool   $keep_non_breaking_space    [optional] <p>Set to true, to keep non-breaking-spaces.</p>
4943
     * @param bool   $keep_bidi_unicode_controls [optional] <p>Set to true, to keep non-printable (for the web)
4944
     *                                           bidirectional text chars.</p>
4945
     *
4946
     * @psalm-pure
4947
     *
4948
     * @return string
4949
     *                <p>A string with normalized whitespace.</p>
4950
     */
4951 61
    public static function normalize_whitespace(
4952
        string $str,
4953
        bool $keep_non_breaking_space = false,
4954
        bool $keep_bidi_unicode_controls = false
4955
    ): string {
4956 61
        return ASCII::normalize_whitespace(
4957 61
            $str,
4958 61
            $keep_non_breaking_space,
4959 61
            $keep_bidi_unicode_controls
4960
        );
4961
    }
4962
4963
    /**
4964
     * Calculates Unicode code point of the given UTF-8 encoded character.
4965
     *
4966
     * INFO: opposite to UTF8::chr()
4967
     *
4968
     * EXAMPLE: <code>UTF8::ord('☃'); // 0x2603</code>
4969
     *
4970
     * @param string $chr      <p>The character of which to calculate code point.<p/>
4971
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
4972
     *
4973
     * @psalm-pure
4974
     *
4975
     * @return int
4976
     *             <p>Unicode code point of the given character,<br>
4977
     *             0 on invalid UTF-8 byte sequence</p>
4978
     */
4979 27
    public static function ord($chr, string $encoding = 'UTF-8'): int
4980
    {
4981
        /**
4982
         * @psalm-suppress ImpureStaticVariable
4983
         *
4984
         * @var array<string,int>
4985
         */
4986 27
        static $CHAR_CACHE = [];
4987
4988
        // init
4989 27
        $chr = (string) $chr;
4990
4991 27
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
4992 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4993
        }
4994
4995 27
        $cache_key = $chr . '_' . $encoding;
4996 27
        if (isset($CHAR_CACHE[$cache_key])) {
4997 27
            return $CHAR_CACHE[$cache_key];
4998
        }
4999
5000
        // check again, if it's still not UTF-8
5001 11
        if ($encoding !== 'UTF-8') {
5002 3
            $chr = self::encode($encoding, $chr);
5003
        }
5004
5005 11
        if (self::$ORD === null) {
5006
            self::$ORD = self::getData('ord');
5007
        }
5008
5009 11
        if (isset(self::$ORD[$chr])) {
5010 11
            return $CHAR_CACHE[$cache_key] = self::$ORD[$chr];
5011
        }
5012
5013
        //
5014
        // fallback via "IntlChar"
5015
        //
5016
5017 6
        if (self::$SUPPORT['intlChar'] === true) {
5018
            /** @noinspection PhpComposerExtensionStubsInspection */
5019 5
            $code = \IntlChar::ord($chr);
5020 5
            if ($code) {
5021 5
                return $CHAR_CACHE[$cache_key] = $code;
5022
            }
5023
        }
5024
5025
        //
5026
        // fallback via vanilla php
5027
        //
5028
5029
        /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
5030 1
        $chr = \unpack('C*', (string) \substr($chr, 0, 4));
5031
        /** @noinspection OffsetOperationsInspection */
5032 1
        $code = $chr ? $chr[1] : 0;
5033
5034
        /** @noinspection OffsetOperationsInspection */
5035 1
        if ($code >= 0xF0 && isset($chr[4])) {
5036
            /** @noinspection UnnecessaryCastingInspection */
5037
            /** @noinspection OffsetOperationsInspection */
5038
            return $CHAR_CACHE[$cache_key] = (int) ((($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80);
5039
        }
5040
5041
        /** @noinspection OffsetOperationsInspection */
5042 1
        if ($code >= 0xE0 && isset($chr[3])) {
5043
            /** @noinspection UnnecessaryCastingInspection */
5044
            /** @noinspection OffsetOperationsInspection */
5045 1
            return $CHAR_CACHE[$cache_key] = (int) ((($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80);
5046
        }
5047
5048
        /** @noinspection OffsetOperationsInspection */
5049 1
        if ($code >= 0xC0 && isset($chr[2])) {
5050
            /** @noinspection UnnecessaryCastingInspection */
5051
            /** @noinspection OffsetOperationsInspection */
5052 1
            return $CHAR_CACHE[$cache_key] = (int) ((($code - 0xC0) << 6) + $chr[2] - 0x80);
5053
        }
5054
5055
        return $CHAR_CACHE[$cache_key] = $code;
5056
    }
5057
5058
    /**
5059
     * Parses the string into an array (into the the second parameter).
5060
     *
5061
     * WARNING: Unlike "parse_str()", this method does not (re-)place variables in the current scope,
5062
     *          if the second parameter is not set!
5063
     *
5064
     * EXAMPLE: <code>
5065
     * UTF8::parse_str('Iñtërnâtiônéàlizætiøn=測試&arr[]=foo+測試&arr[]=ການທົດສອບ', $array);
5066
     * echo $array['Iñtërnâtiônéàlizætiøn']; // '測試'
5067
     * </code>
5068
     *
5069
     * @see http://php.net/manual/en/function.parse-str.php
5070
     *
5071
     * @param string $str        <p>The input string.</p>
5072
     * @param array  $result     <p>The result will be returned into this reference parameter.</p>
5073
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5074
     *
5075
     * @psalm-pure
5076
     *
5077
     * @return bool
5078
     *              <p>Will return <strong>false</strong> if php can't parse the string and we haven't any $result.</p>
5079
     */
5080 2
    public static function parse_str(string $str, &$result, bool $clean_utf8 = false): bool
5081
    {
5082 2
        if ($clean_utf8) {
5083 2
            $str = self::clean($str);
5084
        }
5085
5086 2
        if (self::$SUPPORT['mbstring'] === true) {
5087 2
            $return = \mb_parse_str($str, $result);
5088
5089 2
            return $return !== false && $result !== [];
5090
        }
5091
5092
        /**
5093
         * @psalm-suppress ImpureFunctionCall - we use the second parameter, so we don't change variables by magic
5094
         */
5095
        \parse_str($str, $result);
5096
5097
        return $result !== [];
5098
    }
5099
5100
    /**
5101
     * Checks if \u modifier is available that enables Unicode support in PCRE.
5102
     *
5103
     * @psalm-pure
5104
     *
5105
     * @return bool
5106
     *              <p>
5107
     *              <strong>true</strong> if support is available,<br>
5108
     *              <strong>false</strong> otherwise
5109
     *              </p>
5110
     */
5111
    public static function pcre_utf8_support(): bool
5112
    {
5113
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
5114
        return (bool) @\preg_match('//u', '');
5115
    }
5116
5117
    /**
5118
     * Create an array containing a range of UTF-8 characters.
5119
     *
5120
     * EXAMPLE: <code>UTF8::range('κ', 'ζ'); // array('κ', 'ι', 'θ', 'η', 'ζ',)</code>
5121
     *
5122
     * @param int|string $var1      <p>Numeric or hexadecimal code points, or a UTF-8 character to start from.</p>
5123
     * @param int|string $var2      <p>Numeric or hexadecimal code points, or a UTF-8 character to end at.</p>
5124
     * @param bool       $use_ctype <p>use ctype to detect numeric and hexadecimal, otherwise we will use a simple
5125
     *                              "is_numeric"</p>
5126
     * @param string     $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
5127
     * @param float|int  $step      [optional] <p>
5128
     *                              If a step value is given, it will be used as the
5129
     *                              increment between elements in the sequence. step
5130
     *                              should be given as a positive number. If not specified,
5131
     *                              step will default to 1.
5132
     *                              </p>
5133
     *
5134
     * @psalm-pure
5135
     *
5136
     * @return string[]
5137
     */
5138 2
    public static function range(
5139
        $var1,
5140
        $var2,
5141
        bool $use_ctype = true,
5142
        string $encoding = 'UTF-8',
5143
        $step = 1
5144
    ): array {
5145 2
        if (!$var1 || !$var2) {
5146 2
            return [];
5147
        }
5148
5149 2
        if ($step !== 1) {
5150
            /**
5151
             * @psalm-suppress RedundantConditionGivenDocblockType
5152
             * @psalm-suppress DocblockTypeContradiction
5153
             */
5154 1
            if (!\is_numeric($step)) {
0 ignored issues
show
introduced by
The condition is_numeric($step) is always true.
Loading history...
5155
                throw new \InvalidArgumentException('$step need to be a number, type given: ' . \gettype($step));
5156
            }
5157
5158
            /**
5159
             * @psalm-suppress RedundantConditionGivenDocblockType - false-positive from psalm?
5160
             */
5161 1
            if ($step <= 0) {
5162
                throw new \InvalidArgumentException('$step need to be a positive number, given: ' . $step);
5163
            }
5164
        }
5165
5166 2
        if ($use_ctype && self::$SUPPORT['ctype'] === false) {
5167
            throw new \RuntimeException('ext-ctype: is not installed');
5168
        }
5169
5170 2
        $is_digit = false;
5171 2
        $is_xdigit = false;
5172
5173
        /** @noinspection PhpComposerExtensionStubsInspection */
5174 2
        if ($use_ctype && \ctype_digit((string) $var1) && \ctype_digit((string) $var2)) {
5175 2
            $is_digit = true;
5176 2
            $start = (int) $var1;
5177 2
        } /** @noinspection PhpComposerExtensionStubsInspection */ elseif ($use_ctype && \ctype_xdigit($var1) && \ctype_xdigit($var2)) {
5178
            $is_xdigit = true;
5179
            $start = (int) self::hex_to_int((string) $var1);
5180 2
        } elseif (!$use_ctype && \is_numeric($var1)) {
5181 1
            $start = (int) $var1;
5182
        } else {
5183 2
            $start = self::ord((string) $var1);
5184
        }
5185
5186 2
        if (!$start) {
5187
            return [];
5188
        }
5189
5190 2
        if ($is_digit) {
5191 2
            $end = (int) $var2;
5192 2
        } elseif ($is_xdigit) {
5193
            $end = (int) self::hex_to_int((string) $var2);
5194 2
        } elseif (!$use_ctype && \is_numeric($var2)) {
5195 1
            $end = (int) $var2;
5196
        } else {
5197 2
            $end = self::ord((string) $var2);
5198
        }
5199
5200 2
        if (!$end) {
5201
            return [];
5202
        }
5203
5204 2
        $array = [];
5205 2
        foreach (\range($start, $end, $step) as $i) {
5206 2
            $array[] = (string) self::chr((int) $i, $encoding);
5207
        }
5208
5209 2
        return $array;
5210
    }
5211
5212
    /**
5213
     * Multi decode HTML entity + fix urlencoded-win1252-chars.
5214
     *
5215
     * EXAMPLE: <code>UTF8::rawurldecode('tes%20öäü%20\u00edtest+test'); // 'tes öäü ítest+test'</code>
5216
     *
5217
     * e.g:
5218
     * 'test+test'                     => 'test+test'
5219
     * 'D&#252;sseldorf'               => 'Düsseldorf'
5220
     * 'D%FCsseldorf'                  => 'Düsseldorf'
5221
     * 'D&#xFC;sseldorf'               => 'Düsseldorf'
5222
     * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
5223
     * 'Düsseldorf'                   => 'Düsseldorf'
5224
     * 'D%C3%BCsseldorf'               => 'Düsseldorf'
5225
     * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
5226
     * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
5227
     *
5228
     * @param string $str          <p>The input string.</p>
5229
     * @param bool   $multi_decode <p>Decode as often as possible.</p>
5230
     *
5231
     * @psalm-pure
5232
     *
5233
     * @return string
5234
     *                <p>The decoded URL, as a string.</p>
5235
     */
5236 7
    public static function rawurldecode(string $str, bool $multi_decode = true): string
5237
    {
5238 7
        if ($str === '') {
5239 4
            return '';
5240
        }
5241
5242
        if (
5243 7
            \strpos($str, '&') === false
5244
            &&
5245 7
            \strpos($str, '%') === false
5246
            &&
5247 7
            \strpos($str, '+') === false
5248
            &&
5249 7
            \strpos($str, '\u') === false
5250
        ) {
5251 4
            return self::fix_simple_utf8($str);
5252
        }
5253
5254 7
        $str = self::urldecode_unicode_helper($str);
5255
5256 7
        if ($multi_decode) {
5257
            do {
5258 6
                $str_compare = $str;
5259
5260
                /**
5261
                 * @psalm-suppress PossiblyInvalidArgument
5262
                 */
5263 6
                $str = self::fix_simple_utf8(
5264 6
                    \rawurldecode(
5265 6
                        self::html_entity_decode(
5266 6
                            self::to_utf8($str),
5267 6
                            \ENT_QUOTES | \ENT_HTML5
5268
                        )
5269
                    )
5270
                );
5271 6
            } while ($str_compare !== $str);
5272
        } else {
5273
            /**
5274
             * @psalm-suppress PossiblyInvalidArgument
5275
             */
5276 1
            $str = self::fix_simple_utf8(
5277 1
                \rawurldecode(
5278 1
                    self::html_entity_decode(
5279 1
                        self::to_utf8($str),
5280 1
                        \ENT_QUOTES | \ENT_HTML5
5281
                    )
5282
                )
5283
            );
5284
        }
5285
5286 7
        return $str;
5287
    }
5288
5289
    /**
5290
     * Replaces all occurrences of $pattern in $str by $replacement.
5291
     *
5292
     * @param string $str         <p>The input string.</p>
5293
     * @param string $pattern     <p>The regular expression pattern.</p>
5294
     * @param string $replacement <p>The string to replace with.</p>
5295
     * @param string $options     [optional] <p>Matching conditions to be used.</p>
5296
     * @param string $delimiter   [optional] <p>Delimiter the the regex. Default: '/'</p>
5297
     *
5298
     * @psalm-pure
5299
     *
5300
     * @return string
5301
     */
5302 18
    public static function regex_replace(
5303
        string $str,
5304
        string $pattern,
5305
        string $replacement,
5306
        string $options = '',
5307
        string $delimiter = '/'
5308
    ): string {
5309 18
        if ($options === 'msr') {
5310 9
            $options = 'ms';
5311
        }
5312
5313
        // fallback
5314 18
        if (!$delimiter) {
5315
            $delimiter = '/';
5316
        }
5317
5318 18
        return (string) \preg_replace(
5319 18
            $delimiter . $pattern . $delimiter . 'u' . $options,
5320 18
            $replacement,
5321 18
            $str
5322
        );
5323
    }
5324
5325
    /**
5326
     * alias for "UTF8::remove_bom()"
5327
     *
5328
     * @param string $str
5329
     *
5330
     * @psalm-pure
5331
     *
5332
     * @return string
5333
     *
5334
     * @see        UTF8::remove_bom()
5335
     * @deprecated <p>please use "UTF8::remove_bom()"</p>
5336
     */
5337 1
    public static function removeBOM(string $str): string
5338
    {
5339 1
        return self::remove_bom($str);
5340
    }
5341
5342
    /**
5343
     * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
5344
     *
5345
     * EXAMPLE: <code>UTF8::remove_bom("\xEF\xBB\xBFΜπορώ να"); // 'Μπορώ να'</code>
5346
     *
5347
     * @param string $str <p>The input string.</p>
5348
     *
5349
     * @psalm-pure
5350
     *
5351
     * @return string
5352
     *                <p>A string without UTF-BOM.</p>
5353
     */
5354 55
    public static function remove_bom(string $str): string
5355
    {
5356 55
        if ($str === '') {
5357 9
            return '';
5358
        }
5359
5360 55
        $str_length = \strlen($str);
5361 55
        foreach (self::$BOM as $bom_string => $bom_byte_length) {
5362 55
            if (\strpos($str, $bom_string) === 0) {
5363
                /** @var false|string $str_tmp - needed for PhpStan (stubs error) */
5364 11
                $str_tmp = \substr($str, $bom_byte_length, $str_length);
5365 11
                if ($str_tmp === false) {
5366
                    return '';
5367
                }
5368
5369 11
                $str_length -= (int) $bom_byte_length;
5370
5371 55
                $str = (string) $str_tmp;
5372
            }
5373
        }
5374
5375 55
        return $str;
5376
    }
5377
5378
    /**
5379
     * Removes duplicate occurrences of a string in another string.
5380
     *
5381
     * EXAMPLE: <code>UTF8::remove_duplicates('öäü-κόσμεκόσμε-äöü', 'κόσμε'); // 'öäü-κόσμε-äöü'</code>
5382
     *
5383
     * @param string          $str  <p>The base string.</p>
5384
     * @param string|string[] $what <p>String to search for in the base string.</p>
5385
     *
5386
     * @psalm-pure
5387
     *
5388
     * @return string
5389
     *                <p>A string with removed duplicates.</p>
5390
     */
5391 2
    public static function remove_duplicates(string $str, $what = ' '): string
5392
    {
5393 2
        if (\is_string($what)) {
5394 2
            $what = [$what];
5395
        }
5396
5397
        /**
5398
         * @psalm-suppress RedundantConditionGivenDocblockType
5399
         */
5400 2
        if (\is_array($what)) {
0 ignored issues
show
introduced by
The condition is_array($what) is always true.
Loading history...
5401 2
            foreach ($what as $item) {
5402 2
                $str = (string) \preg_replace('/(' . \preg_quote($item, '/') . ')+/u', $item, $str);
5403
            }
5404
        }
5405
5406 2
        return $str;
5407
    }
5408
5409
    /**
5410
     * Remove html via "strip_tags()" from the string.
5411
     *
5412
     * @param string $str            <p>The input string.</p>
5413
     * @param string $allowable_tags [optional] <p>You can use the optional second parameter to specify tags which
5414
     *                               should not be stripped. Default: null
5415
     *                               </p>
5416
     *
5417
     * @psalm-pure
5418
     *
5419
     * @return string
5420
     *                <p>A string with without html tags.</p>
5421
     */
5422 6
    public static function remove_html(string $str, string $allowable_tags = ''): string
5423
    {
5424 6
        return \strip_tags($str, $allowable_tags);
5425
    }
5426
5427
    /**
5428
     * Remove all breaks [<br> | \r\n | \r | \n | ...] from the string.
5429
     *
5430
     * @param string $str         <p>The input string.</p>
5431
     * @param string $replacement [optional] <p>Default is a empty string.</p>
5432
     *
5433
     * @psalm-pure
5434
     *
5435
     * @return string
5436
     *                <p>A string without breaks.</p>
5437
     */
5438 6
    public static function remove_html_breaks(string $str, string $replacement = ''): string
5439
    {
5440 6
        return (string) \preg_replace("#/\r\n|\r|\n|<br.*/?>#isU", $replacement, $str);
5441
    }
5442
5443
    /**
5444
     * Remove invisible characters from a string.
5445
     *
5446
     * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
5447
     *
5448
     * EXAMPLE: <code>UTF8::remove_invisible_characters("κόσ\0με"); // 'κόσμε'</code>
5449
     *
5450
     * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
5451
     *
5452
     * @param string $str         <p>The input string.</p>
5453
     * @param bool   $url_encoded [optional] <p>
5454
     *                            Try to remove url encoded control character.
5455
     *                            WARNING: maybe contains false-positives e.g. aa%0Baa -> aaaa.
5456
     *                            <br>
5457
     *                            Default: false
5458
     *                            </p>
5459
     * @param string $replacement [optional] <p>The replacement character.</p>
5460
     *
5461
     * @psalm-pure
5462
     *
5463
     * @return string
5464
     *                <p>A string without invisible chars.</p>
5465
     */
5466 91
    public static function remove_invisible_characters(
5467
        string $str,
5468
        bool $url_encoded = false,
5469
        string $replacement = ''
5470
    ): string {
5471 91
        return ASCII::remove_invisible_characters(
5472 91
            $str,
5473 91
            $url_encoded,
5474 91
            $replacement
5475
        );
5476
    }
5477
5478
    /**
5479
     * Returns a new string with the prefix $substring removed, if present.
5480
     *
5481
     * @param string $str       <p>The input string.</p>
5482
     * @param string $substring <p>The prefix to remove.</p>
5483
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
5484
     *
5485
     * @psalm-pure
5486
     *
5487
     * @return string
5488
     *                <p>A string without the prefix $substring.</p>
5489
     */
5490 12
    public static function remove_left(
5491
        string $str,
5492
        string $substring,
5493
        string $encoding = 'UTF-8'
5494
    ): string {
5495 12
        if ($substring && \strpos($str, $substring) === 0) {
5496 6
            if ($encoding === 'UTF-8') {
5497 4
                return (string) \mb_substr(
5498 4
                    $str,
5499 4
                    (int) \mb_strlen($substring)
5500
                );
5501
            }
5502
5503 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
5504
5505 2
            return (string) self::substr(
5506 2
                $str,
5507 2
                (int) self::strlen($substring, $encoding),
5508 2
                null,
5509 2
                $encoding
5510
            );
5511
        }
5512
5513 6
        return $str;
5514
    }
5515
5516
    /**
5517
     * Returns a new string with the suffix $substring removed, if present.
5518
     *
5519
     * @param string $str
5520
     * @param string $substring <p>The suffix to remove.</p>
5521
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
5522
     *
5523
     * @psalm-pure
5524
     *
5525
     * @return string
5526
     *                <p>A string having a $str without the suffix $substring.</p>
5527
     */
5528 12
    public static function remove_right(
5529
        string $str,
5530
        string $substring,
5531
        string $encoding = 'UTF-8'
5532
    ): string {
5533 12
        if ($substring && \substr($str, -\strlen($substring)) === $substring) {
5534 6
            if ($encoding === 'UTF-8') {
5535 4
                return (string) \mb_substr(
5536 4
                    $str,
5537 4
                    0,
5538 4
                    (int) \mb_strlen($str) - (int) \mb_strlen($substring)
5539
                );
5540
            }
5541
5542 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
5543
5544 2
            return (string) self::substr(
5545 2
                $str,
5546 2
                0,
5547 2
                (int) self::strlen($str, $encoding) - (int) self::strlen($substring, $encoding),
5548 2
                $encoding
5549
            );
5550
        }
5551
5552 6
        return $str;
5553
    }
5554
5555
    /**
5556
     * Replaces all occurrences of $search in $str by $replacement.
5557
     *
5558
     * @param string $str            <p>The input string.</p>
5559
     * @param string $search         <p>The needle to search for.</p>
5560
     * @param string $replacement    <p>The string to replace with.</p>
5561
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5562
     *
5563
     * @psalm-pure
5564
     *
5565
     * @return string
5566
     *                <p>A string with replaced parts.</p>
5567
     */
5568 29
    public static function replace(
5569
        string $str,
5570
        string $search,
5571
        string $replacement,
5572
        bool $case_sensitive = true
5573
    ): string {
5574 29
        if ($case_sensitive) {
5575 22
            return \str_replace($search, $replacement, $str);
5576
        }
5577
5578 7
        return self::str_ireplace($search, $replacement, $str);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::str_ireplac...ch, $replacement, $str) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
5579
    }
5580
5581
    /**
5582
     * Replaces all occurrences of $search in $str by $replacement.
5583
     *
5584
     * @param string       $str            <p>The input string.</p>
5585
     * @param array        $search         <p>The elements to search for.</p>
5586
     * @param array|string $replacement    <p>The string to replace with.</p>
5587
     * @param bool         $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5588
     *
5589
     * @psalm-pure
5590
     *
5591
     * @return string
5592
     *                <p>A string with replaced parts.</p>
5593
     */
5594 30
    public static function replace_all(
5595
        string $str,
5596
        array $search,
5597
        $replacement,
5598
        bool $case_sensitive = true
5599
    ): string {
5600 30
        if ($case_sensitive) {
5601 23
            return \str_replace($search, $replacement, $str);
5602
        }
5603
5604 7
        return self::str_ireplace($search, $replacement, $str);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::str_ireplac...ch, $replacement, $str) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
5605
    }
5606
5607
    /**
5608
     * Replace the diamond question mark (�) and invalid-UTF8 chars with the replacement.
5609
     *
5610
     * EXAMPLE: <code>UTF8::replace_diamond_question_mark('中文空白�', ''); // '中文空白'</code>
5611
     *
5612
     * @param string $str                        <p>The input string</p>
5613
     * @param string $replacement_char           <p>The replacement character.</p>
5614
     * @param bool   $process_invalid_utf8_chars <p>Convert invalid UTF-8 chars </p>
5615
     *
5616
     * @psalm-pure
5617
     *
5618
     * @return string
5619
     *                <p>A string without diamond question marks (�).</p>
5620
     */
5621 35
    public static function replace_diamond_question_mark(
5622
        string $str,
5623
        string $replacement_char = '',
5624
        bool $process_invalid_utf8_chars = true
5625
    ): string {
5626 35
        if ($str === '') {
5627 9
            return '';
5628
        }
5629
5630 35
        if ($process_invalid_utf8_chars) {
5631 35
            $replacement_char_helper = $replacement_char;
5632 35
            if ($replacement_char === '') {
5633 35
                $replacement_char_helper = 'none';
5634
            }
5635
5636 35
            if (self::$SUPPORT['mbstring'] === false) {
5637
                // if there is no native support for "mbstring",
5638
                // then we need to clean the string before ...
5639
                $str = self::clean($str);
5640
            }
5641
5642
            /**
5643
             * @psalm-suppress ImpureFunctionCall - we will reset the value in the next step
5644
             */
5645 35
            $save = \mb_substitute_character();
5646
            /** @noinspection PhpUsageOfSilenceOperatorInspection - ignore "Unknown character" warnings, it's working anyway */
5647 35
            @\mb_substitute_character($replacement_char_helper);
0 ignored issues
show
Security Best Practice introduced by
It seems like you do not handle an error condition for mb_substitute_character(). This can introduce security issues, and is generally not recommended. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-unhandled  annotation

5647
            /** @scrutinizer ignore-unhandled */ @\mb_substitute_character($replacement_char_helper);

If you suppress an error, we recommend checking for the error condition explicitly:

// For example instead of
@mkdir($dir);

// Better use
if (@mkdir($dir) === false) {
    throw new \RuntimeException('The directory '.$dir.' could not be created.');
}
Loading history...
5648
            // the polyfill maybe return false, so cast to string
5649 35
            $str = (string) \mb_convert_encoding($str, 'UTF-8', 'UTF-8');
5650 35
            \mb_substitute_character($save);
5651
        }
5652
5653 35
        return \str_replace(
5654
            [
5655 35
                "\xEF\xBF\xBD",
5656
                '�',
5657
            ],
5658
            [
5659 35
                $replacement_char,
5660 35
                $replacement_char,
5661
            ],
5662 35
            $str
5663
        );
5664
    }
5665
5666
    /**
5667
     * Strip whitespace or other characters from the end of a UTF-8 string.
5668
     *
5669
     * EXAMPLE: <code>UTF8::rtrim('-ABC-中文空白-  '); // '-ABC-中文空白-'</code>
5670
     *
5671
     * @param string      $str   <p>The string to be trimmed.</p>
5672
     * @param string|null $chars <p>Optional characters to be stripped.</p>
5673
     *
5674
     * @psalm-pure
5675
     *
5676
     * @return string
5677
     *                <p>A string with unwanted characters stripped from the right.</p>
5678
     */
5679 20
    public static function rtrim(string $str = '', string $chars = null): string
5680
    {
5681 20
        if ($str === '') {
5682 3
            return '';
5683
        }
5684
5685 19
        if (self::$SUPPORT['mbstring'] === true) {
5686 19
            if ($chars) {
5687
                /** @noinspection PregQuoteUsageInspection */
5688 8
                $chars = \preg_quote($chars);
5689 8
                $pattern = "[${chars}]+$";
5690
            } else {
5691 14
                $pattern = '[\\s]+$';
5692
            }
5693
5694
            /** @noinspection PhpComposerExtensionStubsInspection */
5695 19
            return (string) \mb_ereg_replace($pattern, '', $str);
5696
        }
5697
5698
        if ($chars) {
5699
            $chars = \preg_quote($chars, '/');
5700
            $pattern = "[${chars}]+$";
5701
        } else {
5702
            $pattern = '[\\s]+$';
5703
        }
5704
5705
        return self::regex_replace($str, $pattern, '');
5706
    }
5707
5708
    /**
5709
     * WARNING: Print native UTF-8 support (libs) by default, e.g. for debugging.
5710
     *
5711
     * @param bool $useEcho
5712
     *
5713
     * @psalm-pure
5714
     *
5715
     * @return string|void
5716
     */
5717 2
    public static function showSupport(bool $useEcho = true)
5718
    {
5719
        // init
5720 2
        $html = '';
5721
5722 2
        $html .= '<pre>';
5723
        /** @noinspection AlterInForeachInspection */
5724 2
        foreach (self::$SUPPORT as $key => &$value) {
5725 2
            $html .= $key . ' - ' . \print_r($value, true) . "\n<br>";
5726
        }
5727 2
        $html .= '</pre>';
5728
5729 2
        if ($useEcho) {
5730 1
            echo $html;
5731
        }
5732
5733 2
        return $html;
5734
    }
5735
5736
    /**
5737
     * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
5738
     *
5739
     * EXAMPLE: <code>UTF8::single_chr_html_encode('κ'); // '&#954;'</code>
5740
     *
5741
     * @param string $char             <p>The Unicode character to be encoded as numbered entity.</p>
5742
     * @param bool   $keep_ascii_chars <p>Set to <strong>true</strong> to keep ASCII chars.</>
5743
     * @param string $encoding         [optional] <p>Set the charset for e.g. "mb_" function</p>
5744
     *
5745
     * @psalm-pure
5746
     *
5747
     * @return string
5748
     *                <p>The HTML numbered entity for the given character.</p>
5749
     */
5750 2
    public static function single_chr_html_encode(
5751
        string $char,
5752
        bool $keep_ascii_chars = false,
5753
        string $encoding = 'UTF-8'
5754
    ): string {
5755 2
        if ($char === '') {
5756 2
            return '';
5757
        }
5758
5759
        if (
5760 2
            $keep_ascii_chars
5761
            &&
5762 2
            ASCII::is_ascii($char)
5763
        ) {
5764 2
            return $char;
5765
        }
5766
5767 2
        return '&#' . self::ord($char, $encoding) . ';';
5768
    }
5769
5770
    /**
5771
     * @param string $str
5772
     * @param int    $tab_length
5773
     *
5774
     * @psalm-pure
5775
     *
5776
     * @return string
5777
     */
5778 5
    public static function spaces_to_tabs(string $str, int $tab_length = 4): string
5779
    {
5780 5
        if ($tab_length === 4) {
5781 3
            $tab = '    ';
5782 2
        } elseif ($tab_length === 2) {
5783 1
            $tab = '  ';
5784
        } else {
5785 1
            $tab = \str_repeat(' ', $tab_length);
5786
        }
5787
5788 5
        return \str_replace($tab, "\t", $str);
5789
    }
5790
5791
    /**
5792
     * alias for "UTF8::str_split()"
5793
     *
5794
     * @param int|string $str
5795
     * @param int        $length
5796
     * @param bool       $clean_utf8
5797
     *
5798
     * @psalm-pure
5799
     *
5800
     * @return string[]
5801
     *
5802
     * @see        UTF8::str_split()
5803
     * @deprecated <p>please use "UTF8::str_split()"</p>
5804
     */
5805 9
    public static function split(
5806
        $str,
5807
        int $length = 1,
5808
        bool $clean_utf8 = false
5809
    ): array {
5810
        /** @var string[] */
5811 9
        return self::str_split($str, $length, $clean_utf8);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::str_split($str, $length, $clean_utf8) returns an array which contains values of type array|string[] which are incompatible with the documented value type string.
Loading history...
5812
    }
5813
5814
    /**
5815
     * alias for "UTF8::str_starts_with()"
5816
     *
5817
     * @param string $haystack
5818
     * @param string $needle
5819
     *
5820
     * @psalm-pure
5821
     *
5822
     * @return bool
5823
     *
5824
     * @see        UTF8::str_starts_with()
5825
     * @deprecated <p>please use "UTF8::str_starts_with()"</p>
5826
     */
5827 1
    public static function str_begins(string $haystack, string $needle): bool
5828
    {
5829 1
        return self::str_starts_with($haystack, $needle);
5830
    }
5831
5832
    /**
5833
     * Returns a camelCase version of the string. Trims surrounding spaces,
5834
     * capitalizes letters following digits, spaces, dashes and underscores,
5835
     * and removes spaces, dashes, as well as underscores.
5836
     *
5837
     * @param string      $str                           <p>The input string.</p>
5838
     * @param string      $encoding                      [optional] <p>Default: 'UTF-8'</p>
5839
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
5840
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
5841
     *                                                   tr</p>
5842
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
5843
     *                                                   -> ß</p>
5844
     *
5845
     * @psalm-pure
5846
     *
5847
     * @return string
5848
     */
5849 32
    public static function str_camelize(
5850
        string $str,
5851
        string $encoding = 'UTF-8',
5852
        bool $clean_utf8 = false,
5853
        string $lang = null,
5854
        bool $try_to_keep_the_string_length = false
5855
    ): string {
5856 32
        if ($clean_utf8) {
5857
            $str = self::clean($str);
5858
        }
5859
5860 32
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
5861 26
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
5862
        }
5863
5864 32
        $str = self::lcfirst(
5865 32
            \trim($str),
5866 32
            $encoding,
5867 32
            false,
5868 32
            $lang,
5869 32
            $try_to_keep_the_string_length
5870
        );
5871 32
        $str = (string) \preg_replace('/^[-_]+/', '', $str);
5872
5873 32
        $use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
5874
5875 32
        $str = (string) \preg_replace_callback(
5876 32
            '/[-_\\s]+(.)?/u',
5877
            /**
5878
             * @param array $match
5879
             *
5880
             * @psalm-pure
5881
             *
5882
             * @return string
5883
             */
5884
            static function (array $match) use ($use_mb_functions, $encoding, $lang, $try_to_keep_the_string_length): string {
5885 27
                if (isset($match[1])) {
5886 27
                    if ($use_mb_functions) {
5887 27
                        if ($encoding === 'UTF-8') {
5888 27
                            return \mb_strtoupper($match[1]);
5889
                        }
5890
5891
                        return \mb_strtoupper($match[1], $encoding);
5892
                    }
5893
5894
                    return self::strtoupper($match[1], $encoding, false, $lang, $try_to_keep_the_string_length);
5895
                }
5896
5897 1
                return '';
5898 32
            },
5899 32
            $str
5900
        );
5901
5902 32
        return (string) \preg_replace_callback(
5903 32
            '/[\\p{N}]+(.)?/u',
5904
            /**
5905
             * @param array $match
5906
             *
5907
             * @psalm-pure
5908
             *
5909
             * @return string
5910
             */
5911
            static function (array $match) use ($use_mb_functions, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length): string {
5912 6
                if ($use_mb_functions) {
5913 6
                    if ($encoding === 'UTF-8') {
5914 6
                        return \mb_strtoupper($match[0]);
5915
                    }
5916
5917
                    return \mb_strtoupper($match[0], $encoding);
5918
                }
5919
5920
                return self::strtoupper($match[0], $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
5921 32
            },
5922 32
            $str
5923
        );
5924
    }
5925
5926
    /**
5927
     * Returns the string with the first letter of each word capitalized,
5928
     * except for when the word is a name which shouldn't be capitalized.
5929
     *
5930
     * @param string $str
5931
     *
5932
     * @psalm-pure
5933
     *
5934
     * @return string
5935
     *                <p>A string with $str capitalized.</p>
5936
     */
5937 1
    public static function str_capitalize_name(string $str): string
5938
    {
5939 1
        return self::str_capitalize_name_helper(
5940 1
            self::str_capitalize_name_helper(
5941 1
                self::collapse_whitespace($str),
5942 1
                ' '
5943
            ),
5944 1
            '-'
5945
        );
5946
    }
5947
5948
    /**
5949
     * Returns true if the string contains $needle, false otherwise. By default
5950
     * the comparison is case-sensitive, but can be made insensitive by setting
5951
     * $case_sensitive to false.
5952
     *
5953
     * @param string $haystack       <p>The input string.</p>
5954
     * @param string $needle         <p>Substring to look for.</p>
5955
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5956
     *
5957
     * @psalm-pure
5958
     *
5959
     * @return bool whether or not $haystack contains $needle
5960
     */
5961 21
    public static function str_contains(
5962
        string $haystack,
5963
        string $needle,
5964
        bool $case_sensitive = true
5965
    ): bool {
5966 21
        if ($case_sensitive) {
5967 11
            return \strpos($haystack, $needle) !== false;
5968
        }
5969
5970 10
        return \mb_stripos($haystack, $needle) !== false;
5971
    }
5972
5973
    /**
5974
     * Returns true if the string contains all $needles, false otherwise. By
5975
     * default the comparison is case-sensitive, but can be made insensitive by
5976
     * setting $case_sensitive to false.
5977
     *
5978
     * @param string $haystack       <p>The input string.</p>
5979
     * @param array  $needles        <p>SubStrings to look for.</p>
5980
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5981
     *
5982
     * @psalm-pure
5983
     *
5984
     * @return bool whether or not $haystack contains $needle
5985
     */
5986 45
    public static function str_contains_all(
5987
        string $haystack,
5988
        array $needles,
5989
        bool $case_sensitive = true
5990
    ): bool {
5991 45
        if ($haystack === '' || $needles === []) {
5992 1
            return false;
5993
        }
5994
5995
        /** @noinspection LoopWhichDoesNotLoopInspection */
5996 44
        foreach ($needles as &$needle) {
5997 44
            if ($case_sensitive) {
5998
                /** @noinspection NestedPositiveIfStatementsInspection */
5999 24
                if (!$needle || \strpos($haystack, $needle) === false) {
6000 12
                    return false;
6001
                }
6002
            }
6003
6004 33
            if (!$needle || \mb_stripos($haystack, $needle) === false) {
6005 33
                return false;
6006
            }
6007
        }
6008
6009 24
        return true;
6010
    }
6011
6012
    /**
6013
     * Returns true if the string contains any $needles, false otherwise. By
6014
     * default the comparison is case-sensitive, but can be made insensitive by
6015
     * setting $case_sensitive to false.
6016
     *
6017
     * @param string $haystack       <p>The input string.</p>
6018
     * @param array  $needles        <p>SubStrings to look for.</p>
6019
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
6020
     *
6021
     * @psalm-pure
6022
     *
6023
     * @return bool
6024
     *              Whether or not $str contains $needle
6025
     */
6026 46
    public static function str_contains_any(
6027
        string $haystack,
6028
        array $needles,
6029
        bool $case_sensitive = true
6030
    ): bool {
6031 46
        if ($haystack === '' || $needles === []) {
6032 1
            return false;
6033
        }
6034
6035
        /** @noinspection LoopWhichDoesNotLoopInspection */
6036 45
        foreach ($needles as &$needle) {
6037 45
            if (!$needle) {
6038
                continue;
6039
            }
6040
6041 45
            if ($case_sensitive) {
6042 25
                if (\strpos($haystack, $needle) !== false) {
6043 14
                    return true;
6044
                }
6045
6046 13
                continue;
6047
            }
6048
6049 20
            if (\mb_stripos($haystack, $needle) !== false) {
6050 20
                return true;
6051
            }
6052
        }
6053
6054 19
        return false;
6055
    }
6056
6057
    /**
6058
     * Returns a lowercase and trimmed string separated by dashes. Dashes are
6059
     * inserted before uppercase characters (with the exception of the first
6060
     * character of the string), and in place of spaces as well as underscores.
6061
     *
6062
     * @param string $str      <p>The input string.</p>
6063
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6064
     *
6065
     * @psalm-pure
6066
     *
6067
     * @return string
6068
     */
6069 19
    public static function str_dasherize(string $str, string $encoding = 'UTF-8'): string
6070
    {
6071 19
        return self::str_delimit($str, '-', $encoding);
6072
    }
6073
6074
    /**
6075
     * Returns a lowercase and trimmed string separated by the given delimiter.
6076
     * Delimiters are inserted before uppercase characters (with the exception
6077
     * of the first character of the string), and in place of spaces, dashes,
6078
     * and underscores. Alpha delimiters are not converted to lowercase.
6079
     *
6080
     * @param string      $str                           <p>The input string.</p>
6081
     * @param string      $delimiter                     <p>Sequence used to separate parts of the string.</p>
6082
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
6083
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
6084
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
6085
     *                                                   tr</p>
6086
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ ->
6087
     *                                                   ß</p>
6088
     *
6089
     * @psalm-pure
6090
     *
6091
     * @return string
6092
     */
6093 49
    public static function str_delimit(
6094
        string $str,
6095
        string $delimiter,
6096
        string $encoding = 'UTF-8',
6097
        bool $clean_utf8 = false,
6098
        string $lang = null,
6099
        bool $try_to_keep_the_string_length = false
6100
    ): string {
6101 49
        if (self::$SUPPORT['mbstring'] === true) {
6102
            /** @noinspection PhpComposerExtensionStubsInspection */
6103 49
            $str = (string) \mb_ereg_replace('\\B(\\p{Lu})', '-\1', \trim($str));
6104
6105 49
            $use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
6106 49
            if ($use_mb_functions && $encoding === 'UTF-8') {
6107 22
                $str = \mb_strtolower($str);
6108
            } else {
6109 27
                $str = self::strtolower($str, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
6110
            }
6111
6112
            /** @noinspection PhpComposerExtensionStubsInspection */
6113 49
            return (string) \mb_ereg_replace('[\\-_\\s]+', $delimiter, $str);
6114
        }
6115
6116
        $str = (string) \preg_replace('/\\B(\\p{Lu})/u', '-\1', \trim($str));
6117
6118
        $use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
6119
        if ($use_mb_functions && $encoding === 'UTF-8') {
6120
            $str = \mb_strtolower($str);
6121
        } else {
6122
            $str = self::strtolower($str, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
6123
        }
6124
6125
        return (string) \preg_replace('/[\\-_\\s]+/u', $delimiter, $str);
6126
    }
6127
6128
    /**
6129
     * Optimized "mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
6130
     *
6131
     * EXAMPLE: <code>
6132
     * UTF8::str_detect_encoding('中文空白'); // 'UTF-8'
6133
     * UTF8::str_detect_encoding('Abc'); // 'ASCII'
6134
     * </code>
6135
     *
6136
     * @param string $str <p>The input string.</p>
6137
     *
6138
     * @psalm-pure
6139
     *
6140
     * @return false|string
6141
     *                      The detected string-encoding e.g. UTF-8 or UTF-16BE,<br>
6142
     *                      otherwise it will return false e.g. for BINARY or not detected encoding.
6143
     */
6144 31
    public static function str_detect_encoding($str)
6145
    {
6146
        // init
6147 31
        $str = (string) $str;
6148
6149
        //
6150
        // 1.) check binary strings (010001001...) like UTF-16 / UTF-32 / PDF / Images / ...
6151
        //
6152
6153 31
        if (self::is_binary($str, true)) {
6154 11
            $is_utf32 = self::is_utf32($str, false);
6155 11
            if ($is_utf32 === 1) {
6156
                return 'UTF-32LE';
6157
            }
6158 11
            if ($is_utf32 === 2) {
6159 1
                return 'UTF-32BE';
6160
            }
6161
6162 11
            $is_utf16 = self::is_utf16($str, false);
6163 11
            if ($is_utf16 === 1) {
6164 3
                return 'UTF-16LE';
6165
            }
6166 11
            if ($is_utf16 === 2) {
6167 2
                return 'UTF-16BE';
6168
            }
6169
6170
            // is binary but not "UTF-16" or "UTF-32"
6171 9
            return false;
6172
        }
6173
6174
        //
6175
        // 2.) simple check for ASCII chars
6176
        //
6177
6178 27
        if (ASCII::is_ascii($str)) {
6179 10
            return 'ASCII';
6180
        }
6181
6182
        //
6183
        // 3.) simple check for UTF-8 chars
6184
        //
6185
6186 27
        if (self::is_utf8_string($str)) {
6187 19
            return 'UTF-8';
6188
        }
6189
6190
        //
6191
        // 4.) check via "mb_detect_encoding()"
6192
        //
6193
        // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "mb_detect_encoding()"
6194
6195
        $encoding_detecting_order = [
6196 16
            'ISO-8859-1',
6197
            'ISO-8859-2',
6198
            'ISO-8859-3',
6199
            'ISO-8859-4',
6200
            'ISO-8859-5',
6201
            'ISO-8859-6',
6202
            'ISO-8859-7',
6203
            'ISO-8859-8',
6204
            'ISO-8859-9',
6205
            'ISO-8859-10',
6206
            'ISO-8859-13',
6207
            'ISO-8859-14',
6208
            'ISO-8859-15',
6209
            'ISO-8859-16',
6210
            'WINDOWS-1251',
6211
            'WINDOWS-1252',
6212
            'WINDOWS-1254',
6213
            'CP932',
6214
            'CP936',
6215
            'CP950',
6216
            'CP866',
6217
            'CP850',
6218
            'CP51932',
6219
            'CP50220',
6220
            'CP50221',
6221
            'CP50222',
6222
            'ISO-2022-JP',
6223
            'ISO-2022-KR',
6224
            'JIS',
6225
            'JIS-ms',
6226
            'EUC-CN',
6227
            'EUC-JP',
6228
        ];
6229
6230 16
        if (self::$SUPPORT['mbstring'] === true) {
6231
            // info: do not use the symfony polyfill here
6232 16
            $encoding = \mb_detect_encoding($str, $encoding_detecting_order, true);
6233 16
            if ($encoding) {
6234 16
                return $encoding;
6235
            }
6236
        }
6237
6238
        //
6239
        // 5.) check via "iconv()"
6240
        //
6241
6242
        if (self::$ENCODINGS === null) {
6243
            self::$ENCODINGS = self::getData('encodings');
6244
        }
6245
6246
        foreach (self::$ENCODINGS as $encoding_tmp) {
6247
            // INFO: //IGNORE but still throw notice
6248
            /** @noinspection PhpUsageOfSilenceOperatorInspection */
6249
            if ((string) @\iconv($encoding_tmp, $encoding_tmp . '//IGNORE', $str) === $str) {
6250
                return $encoding_tmp;
6251
            }
6252
        }
6253
6254
        return false;
6255
    }
6256
6257
    /**
6258
     * alias for "UTF8::str_ends_with()"
6259
     *
6260
     * @param string $haystack
6261
     * @param string $needle
6262
     *
6263
     * @psalm-pure
6264
     *
6265
     * @return bool
6266
     *
6267
     * @see        UTF8::str_ends_with()
6268
     * @deprecated <p>please use "UTF8::str_ends_with()"</p>
6269
     */
6270 1
    public static function str_ends(string $haystack, string $needle): bool
6271
    {
6272 1
        return self::str_ends_with($haystack, $needle);
6273
    }
6274
6275
    /**
6276
     * Check if the string ends with the given substring.
6277
     *
6278
     * EXAMPLE: <code>
6279
     * UTF8::str_ends_with('BeginMiddleΚόσμε', 'Κόσμε'); // true
6280
     * UTF8::str_ends_with('BeginMiddleΚόσμε', 'κόσμε'); // false
6281
     * </code>
6282
     *
6283
     * @param string $haystack <p>The string to search in.</p>
6284
     * @param string $needle   <p>The substring to search for.</p>
6285
     *
6286
     * @psalm-pure
6287
     *
6288
     * @return bool
6289
     */
6290 9
    public static function str_ends_with(string $haystack, string $needle): bool
6291
    {
6292 9
        if ($needle === '') {
6293 2
            return true;
6294
        }
6295
6296 9
        if ($haystack === '') {
6297
            return false;
6298
        }
6299
6300 9
        return \substr($haystack, -\strlen($needle)) === $needle;
6301
    }
6302
6303
    /**
6304
     * Returns true if the string ends with any of $substrings, false otherwise.
6305
     *
6306
     * - case-sensitive
6307
     *
6308
     * @param string   $str        <p>The input string.</p>
6309
     * @param string[] $substrings <p>Substrings to look for.</p>
6310
     *
6311
     * @psalm-pure
6312
     *
6313
     * @return bool whether or not $str ends with $substring
6314
     */
6315 7
    public static function str_ends_with_any(string $str, array $substrings): bool
6316
    {
6317 7
        if ($substrings === []) {
6318
            return false;
6319
        }
6320
6321 7
        foreach ($substrings as &$substring) {
6322 7
            if (\substr($str, -\strlen($substring)) === $substring) {
6323 7
                return true;
6324
            }
6325
        }
6326
6327 6
        return false;
6328
    }
6329
6330
    /**
6331
     * Ensures that the string begins with $substring. If it doesn't, it's
6332
     * prepended.
6333
     *
6334
     * @param string $str       <p>The input string.</p>
6335
     * @param string $substring <p>The substring to add if not present.</p>
6336
     *
6337
     * @psalm-pure
6338
     *
6339
     * @return string
6340
     */
6341 10
    public static function str_ensure_left(string $str, string $substring): string
6342
    {
6343
        if (
6344 10
            $substring !== ''
6345
            &&
6346 10
            \strpos($str, $substring) === 0
6347
        ) {
6348 6
            return $str;
6349
        }
6350
6351 4
        return $substring . $str;
6352
    }
6353
6354
    /**
6355
     * Ensures that the string ends with $substring. If it doesn't, it's appended.
6356
     *
6357
     * @param string $str       <p>The input string.</p>
6358
     * @param string $substring <p>The substring to add if not present.</p>
6359
     *
6360
     * @psalm-pure
6361
     *
6362
     * @return string
6363
     */
6364 10
    public static function str_ensure_right(string $str, string $substring): string
6365
    {
6366
        if (
6367 10
            $str === ''
6368
            ||
6369 10
            $substring === ''
6370
            ||
6371 10
            \substr($str, -\strlen($substring)) !== $substring
6372
        ) {
6373 4
            $str .= $substring;
6374
        }
6375
6376 10
        return $str;
6377
    }
6378
6379
    /**
6380
     * Capitalizes the first word of the string, replaces underscores with
6381
     * spaces, and strips '_id'.
6382
     *
6383
     * @param string $str
6384
     *
6385
     * @psalm-pure
6386
     *
6387
     * @return string
6388
     */
6389 3
    public static function str_humanize($str): string
6390
    {
6391 3
        $str = \str_replace(
6392
            [
6393 3
                '_id',
6394
                '_',
6395
            ],
6396
            [
6397 3
                '',
6398
                ' ',
6399
            ],
6400 3
            $str
6401
        );
6402
6403 3
        return self::ucfirst(\trim($str));
6404
    }
6405
6406
    /**
6407
     * alias for "UTF8::str_istarts_with()"
6408
     *
6409
     * @param string $haystack
6410
     * @param string $needle
6411
     *
6412
     * @psalm-pure
6413
     *
6414
     * @return bool
6415
     *
6416
     * @see        UTF8::str_istarts_with()
6417
     * @deprecated <p>please use "UTF8::str_istarts_with()"</p>
6418
     */
6419 1
    public static function str_ibegins(string $haystack, string $needle): bool
6420
    {
6421 1
        return self::str_istarts_with($haystack, $needle);
6422
    }
6423
6424
    /**
6425
     * alias for "UTF8::str_iends_with()"
6426
     *
6427
     * @param string $haystack
6428
     * @param string $needle
6429
     *
6430
     * @psalm-pure
6431
     *
6432
     * @return bool
6433
     *
6434
     * @see        UTF8::str_iends_with()
6435
     * @deprecated <p>please use "UTF8::str_iends_with()"</p>
6436
     */
6437 1
    public static function str_iends(string $haystack, string $needle): bool
6438
    {
6439 1
        return self::str_iends_with($haystack, $needle);
6440
    }
6441
6442
    /**
6443
     * Check if the string ends with the given substring, case-insensitive.
6444
     *
6445
     * EXAMPLE: <code>
6446
     * UTF8::str_iends_with('BeginMiddleΚόσμε', 'Κόσμε'); // true
6447
     * UTF8::str_iends_with('BeginMiddleΚόσμε', 'κόσμε'); // true
6448
     * </code>
6449
     *
6450
     * @param string $haystack <p>The string to search in.</p>
6451
     * @param string $needle   <p>The substring to search for.</p>
6452
     *
6453
     * @psalm-pure
6454
     *
6455
     * @return bool
6456
     */
6457 12
    public static function str_iends_with(string $haystack, string $needle): bool
6458
    {
6459 12
        if ($needle === '') {
6460 2
            return true;
6461
        }
6462
6463 12
        if ($haystack === '') {
6464
            return false;
6465
        }
6466
6467 12
        return self::strcasecmp(\substr($haystack, -\strlen($needle)), $needle) === 0;
6468
    }
6469
6470
    /**
6471
     * Returns true if the string ends with any of $substrings, false otherwise.
6472
     *
6473
     * - case-insensitive
6474
     *
6475
     * @param string   $str        <p>The input string.</p>
6476
     * @param string[] $substrings <p>Substrings to look for.</p>
6477
     *
6478
     * @psalm-pure
6479
     *
6480
     * @return bool
6481
     *              <p>Whether or not $str ends with $substring.</p>
6482
     */
6483 4
    public static function str_iends_with_any(string $str, array $substrings): bool
6484
    {
6485 4
        if ($substrings === []) {
6486
            return false;
6487
        }
6488
6489 4
        foreach ($substrings as &$substring) {
6490 4
            if (self::str_iends_with($str, $substring)) {
6491 4
                return true;
6492
            }
6493
        }
6494
6495
        return false;
6496
    }
6497
6498
    /**
6499
     * Returns the index of the first occurrence of $needle in the string,
6500
     * and false if not found. Accepts an optional offset from which to begin
6501
     * the search.
6502
     *
6503
     * @param string $str      <p>The input string.</p>
6504
     * @param string $needle   <p>Substring to look for.</p>
6505
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
6506
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6507
     *
6508
     * @psalm-pure
6509
     *
6510
     * @return false|int
6511
     *                   <p>The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.</p>
6512
     *
6513
     * @see        UTF8::stripos()
6514
     * @deprecated <p>please use "UTF8::stripos()"</p>
6515
     */
6516 1
    public static function str_iindex_first(
6517
        string $str,
6518
        string $needle,
6519
        int $offset = 0,
6520
        string $encoding = 'UTF-8'
6521
    ) {
6522 1
        return self::stripos(
6523 1
            $str,
6524 1
            $needle,
6525 1
            $offset,
6526 1
            $encoding
6527
        );
6528
    }
6529
6530
    /**
6531
     * Returns the index of the last occurrence of $needle in the string,
6532
     * and false if not found. Accepts an optional offset from which to begin
6533
     * the search. Offsets may be negative to count from the last character
6534
     * in the string.
6535
     *
6536
     * @param string $str      <p>The input string.</p>
6537
     * @param string $needle   <p>Substring to look for.</p>
6538
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
6539
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6540
     *
6541
     * @psalm-pure
6542
     *
6543
     * @return false|int
6544
     *                   <p>The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.</p>
6545
     *
6546
     * @see        UTF8::strripos()
6547
     * @deprecated <p>please use "UTF8::strripos()"</p>
6548
     */
6549 10
    public static function str_iindex_last(
6550
        string $str,
6551
        string $needle,
6552
        int $offset = 0,
6553
        string $encoding = 'UTF-8'
6554
    ) {
6555 10
        return self::strripos(
6556 10
            $str,
6557 10
            $needle,
6558 10
            $offset,
6559 10
            $encoding
6560
        );
6561
    }
6562
6563
    /**
6564
     * Returns the index of the first occurrence of $needle in the string,
6565
     * and false if not found. Accepts an optional offset from which to begin
6566
     * the search.
6567
     *
6568
     * @param string $str      <p>The input string.</p>
6569
     * @param string $needle   <p>Substring to look for.</p>
6570
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
6571
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6572
     *
6573
     * @psalm-pure
6574
     *
6575
     * @return false|int
6576
     *                   <p>The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.</p>
6577
     *
6578
     * @see        UTF8::strpos()
6579
     * @deprecated <p>please use "UTF8::strpos()"</p>
6580
     */
6581 11
    public static function str_index_first(
6582
        string $str,
6583
        string $needle,
6584
        int $offset = 0,
6585
        string $encoding = 'UTF-8'
6586
    ) {
6587 11
        return self::strpos(
6588 11
            $str,
6589 11
            $needle,
6590 11
            $offset,
6591 11
            $encoding
6592
        );
6593
    }
6594
6595
    /**
6596
     * Returns the index of the last occurrence of $needle in the string,
6597
     * and false if not found. Accepts an optional offset from which to begin
6598
     * the search. Offsets may be negative to count from the last character
6599
     * in the string.
6600
     *
6601
     * @param string $str      <p>The input string.</p>
6602
     * @param string $needle   <p>Substring to look for.</p>
6603
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
6604
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6605
     *
6606
     * @psalm-pure
6607
     *
6608
     * @return false|int
6609
     *                   <p>The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.</p>
6610
     *
6611
     * @see        UTF8::strrpos()
6612
     * @deprecated <p>please use "UTF8::strrpos()"</p>
6613
     */
6614 10
    public static function str_index_last(
6615
        string $str,
6616
        string $needle,
6617
        int $offset = 0,
6618
        string $encoding = 'UTF-8'
6619
    ) {
6620 10
        return self::strrpos(
6621 10
            $str,
6622 10
            $needle,
6623 10
            $offset,
6624 10
            $encoding
6625
        );
6626
    }
6627
6628
    /**
6629
     * Inserts $substring into the string at the $index provided.
6630
     *
6631
     * @param string $str       <p>The input string.</p>
6632
     * @param string $substring <p>String to be inserted.</p>
6633
     * @param int    $index     <p>The index at which to insert the substring.</p>
6634
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
6635
     *
6636
     * @psalm-pure
6637
     *
6638
     * @return string
6639
     */
6640 8
    public static function str_insert(
6641
        string $str,
6642
        string $substring,
6643
        int $index,
6644
        string $encoding = 'UTF-8'
6645
    ): string {
6646 8
        if ($encoding === 'UTF-8') {
6647 4
            $len = (int) \mb_strlen($str);
6648 4
            if ($index > $len) {
6649
                return $str;
6650
            }
6651
6652
            /** @noinspection UnnecessaryCastingInspection */
6653 4
            return (string) \mb_substr($str, 0, $index) .
6654 4
                   $substring .
6655 4
                   (string) \mb_substr($str, $index, $len);
6656
        }
6657
6658 4
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6659
6660 4
        $len = (int) self::strlen($str, $encoding);
6661 4
        if ($index > $len) {
6662 1
            return $str;
6663
        }
6664
6665 3
        return ((string) self::substr($str, 0, $index, $encoding)) .
6666 3
               $substring .
6667 3
               ((string) self::substr($str, $index, $len, $encoding));
6668
    }
6669
6670
    /**
6671
     * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
6672
     *
6673
     * EXAMPLE: <code>
6674
     * UTF8::str_ireplace('lIzÆ', 'lise', 'Iñtërnâtiônàlizætiøn'); // 'Iñtërnâtiônàlisetiøn'
6675
     * </code>
6676
     *
6677
     * @see http://php.net/manual/en/function.str-ireplace.php
6678
     *
6679
     * @param string|string[] $search      <p>
6680
     *                                     Every replacement with search array is
6681
     *                                     performed on the result of previous replacement.
6682
     *                                     </p>
6683
     * @param string|string[] $replacement <p>The replacement.</p>
6684
     * @param string|string[] $subject     <p>
6685
     *                                     If subject is an array, then the search and
6686
     *                                     replace is performed with every entry of
6687
     *                                     subject, and the return value is an array as
6688
     *                                     well.
6689
     *                                     </p>
6690
     * @param int             $count       [optional] <p>
6691
     *                                     The number of matched and replaced needles will
6692
     *                                     be returned in count which is passed by
6693
     *                                     reference.
6694
     *                                     </p>
6695
     *
6696
     * @psalm-pure
6697
     *
6698
     * @return string|string[] a string or an array of replacements
6699
     *
6700
     * @template TStrIReplaceSubject
6701
     * @psalm-param TStrIReplaceSubject $subject
6702
     * @psalm-return TStrIReplaceSubject
6703
     */
6704 29
    public static function str_ireplace($search, $replacement, $subject, &$count = null)
6705
    {
6706 29
        $search = (array) $search;
6707
6708
        /** @noinspection AlterInForeachInspection */
6709 29
        foreach ($search as &$s) {
6710 29
            $s = (string) $s;
6711 29
            if ($s === '') {
6712 6
                $s = '/^(?<=.)$/';
6713
            } else {
6714 29
                $s = '/' . \preg_quote($s, '/') . '/ui';
6715
            }
6716
        }
6717
6718
        /**
6719
         * @psalm-suppress PossiblyNullArgument
6720
         * @psalm-var TStrIReplaceSubject $subject
6721
         */
6722 29
        $subject = \preg_replace($search, $replacement, $subject, -1, $count);
6723
6724 29
        return $subject;
6725
    }
6726
6727
    /**
6728
     * Replaces $search from the beginning of string with $replacement.
6729
     *
6730
     * @param string $str         <p>The input string.</p>
6731
     * @param string $search      <p>The string to search for.</p>
6732
     * @param string $replacement <p>The replacement.</p>
6733
     *
6734
     * @psalm-pure
6735
     *
6736
     * @return string string after the replacements
6737
     */
6738 17
    public static function str_ireplace_beginning(string $str, string $search, string $replacement): string
6739
    {
6740 17
        if ($str === '') {
6741 4
            if ($replacement === '') {
6742 2
                return '';
6743
            }
6744
6745 2
            if ($search === '') {
6746 2
                return $replacement;
6747
            }
6748
        }
6749
6750 13
        if ($search === '') {
6751 2
            return $str . $replacement;
6752
        }
6753
6754 11
        if (\stripos($str, $search) === 0) {
6755 10
            return $replacement . \substr($str, \strlen($search));
6756
        }
6757
6758 1
        return $str;
6759
    }
6760
6761
    /**
6762
     * Replaces $search from the ending of string with $replacement.
6763
     *
6764
     * @param string $str         <p>The input string.</p>
6765
     * @param string $search      <p>The string to search for.</p>
6766
     * @param string $replacement <p>The replacement.</p>
6767
     *
6768
     * @psalm-pure
6769
     *
6770
     * @return string
6771
     *                <p>string after the replacements.</p>
6772
     */
6773 17
    public static function str_ireplace_ending(string $str, string $search, string $replacement): string
6774
    {
6775 17
        if ($str === '') {
6776 4
            if ($replacement === '') {
6777 2
                return '';
6778
            }
6779
6780 2
            if ($search === '') {
6781 2
                return $replacement;
6782
            }
6783
        }
6784
6785 13
        if ($search === '') {
6786 2
            return $str . $replacement;
6787
        }
6788
6789 11
        if (\stripos($str, $search, \strlen($str) - \strlen($search)) !== false) {
6790 9
            $str = \substr($str, 0, -\strlen($search)) . $replacement;
6791
        }
6792
6793 11
        return $str;
6794
    }
6795
6796
    /**
6797
     * Check if the string starts with the given substring, case-insensitive.
6798
     *
6799
     * EXAMPLE: <code>
6800
     * UTF8::str_istarts_with('ΚόσμεMiddleEnd', 'Κόσμε'); // true
6801
     * UTF8::str_istarts_with('ΚόσμεMiddleEnd', 'κόσμε'); // true
6802
     * </code>
6803
     *
6804
     * @param string $haystack <p>The string to search in.</p>
6805
     * @param string $needle   <p>The substring to search for.</p>
6806
     *
6807
     * @psalm-pure
6808
     *
6809
     * @return bool
6810
     */
6811 13
    public static function str_istarts_with(string $haystack, string $needle): bool
6812
    {
6813 13
        if ($needle === '') {
6814 2
            return true;
6815
        }
6816
6817 13
        if ($haystack === '') {
6818
            return false;
6819
        }
6820
6821 13
        return self::stripos($haystack, $needle) === 0;
6822
    }
6823
6824
    /**
6825
     * Returns true if the string begins with any of $substrings, false otherwise.
6826
     *
6827
     * - case-insensitive
6828
     *
6829
     * @param string $str        <p>The input string.</p>
6830
     * @param array  $substrings <p>Substrings to look for.</p>
6831
     *
6832
     * @psalm-pure
6833
     *
6834
     * @return bool whether or not $str starts with $substring
6835
     */
6836 5
    public static function str_istarts_with_any(string $str, array $substrings): bool
6837
    {
6838 5
        if ($str === '') {
6839
            return false;
6840
        }
6841
6842 5
        if ($substrings === []) {
6843
            return false;
6844
        }
6845
6846 5
        foreach ($substrings as &$substring) {
6847 5
            if (self::str_istarts_with($str, $substring)) {
6848 5
                return true;
6849
            }
6850
        }
6851
6852 1
        return false;
6853
    }
6854
6855
    /**
6856
     * Gets the substring after the first occurrence of a separator.
6857
     *
6858
     * @param string $str       <p>The input string.</p>
6859
     * @param string $separator <p>The string separator.</p>
6860
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6861
     *
6862
     * @psalm-pure
6863
     *
6864
     * @return string
6865
     */
6866 1
    public static function str_isubstr_after_first_separator(
6867
        string $str,
6868
        string $separator,
6869
        string $encoding = 'UTF-8'
6870
    ): string {
6871 1
        if ($separator === '' || $str === '') {
6872 1
            return '';
6873
        }
6874
6875 1
        $offset = self::stripos($str, $separator);
6876 1
        if ($offset === false) {
6877 1
            return '';
6878
        }
6879
6880 1
        if ($encoding === 'UTF-8') {
6881 1
            return (string) \mb_substr(
6882 1
                $str,
6883 1
                $offset + (int) \mb_strlen($separator)
6884
            );
6885
        }
6886
6887
        return (string) self::substr(
6888
            $str,
6889
            $offset + (int) self::strlen($separator, $encoding),
6890
            null,
6891
            $encoding
6892
        );
6893
    }
6894
6895
    /**
6896
     * Gets the substring after the last occurrence of a separator.
6897
     *
6898
     * @param string $str       <p>The input string.</p>
6899
     * @param string $separator <p>The string separator.</p>
6900
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6901
     *
6902
     * @psalm-pure
6903
     *
6904
     * @return string
6905
     */
6906 1
    public static function str_isubstr_after_last_separator(
6907
        string $str,
6908
        string $separator,
6909
        string $encoding = 'UTF-8'
6910
    ): string {
6911 1
        if ($separator === '' || $str === '') {
6912 1
            return '';
6913
        }
6914
6915 1
        $offset = self::strripos($str, $separator);
6916 1
        if ($offset === false) {
6917 1
            return '';
6918
        }
6919
6920 1
        if ($encoding === 'UTF-8') {
6921 1
            return (string) \mb_substr(
6922 1
                $str,
6923 1
                $offset + (int) self::strlen($separator)
6924
            );
6925
        }
6926
6927
        return (string) self::substr(
6928
            $str,
6929
            $offset + (int) self::strlen($separator, $encoding),
6930
            null,
6931
            $encoding
6932
        );
6933
    }
6934
6935
    /**
6936
     * Gets the substring before the first occurrence of a separator.
6937
     *
6938
     * @param string $str       <p>The input string.</p>
6939
     * @param string $separator <p>The string separator.</p>
6940
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6941
     *
6942
     * @psalm-pure
6943
     *
6944
     * @return string
6945
     */
6946 1
    public static function str_isubstr_before_first_separator(
6947
        string $str,
6948
        string $separator,
6949
        string $encoding = 'UTF-8'
6950
    ): string {
6951 1
        if ($separator === '' || $str === '') {
6952 1
            return '';
6953
        }
6954
6955 1
        $offset = self::stripos($str, $separator);
6956 1
        if ($offset === false) {
6957 1
            return '';
6958
        }
6959
6960 1
        if ($encoding === 'UTF-8') {
6961 1
            return (string) \mb_substr($str, 0, $offset);
6962
        }
6963
6964
        return (string) self::substr($str, 0, $offset, $encoding);
6965
    }
6966
6967
    /**
6968
     * Gets the substring before the last occurrence of a separator.
6969
     *
6970
     * @param string $str       <p>The input string.</p>
6971
     * @param string $separator <p>The string separator.</p>
6972
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6973
     *
6974
     * @psalm-pure
6975
     *
6976
     * @return string
6977
     */
6978 1
    public static function str_isubstr_before_last_separator(
6979
        string $str,
6980
        string $separator,
6981
        string $encoding = 'UTF-8'
6982
    ): string {
6983 1
        if ($separator === '' || $str === '') {
6984 1
            return '';
6985
        }
6986
6987 1
        if ($encoding === 'UTF-8') {
6988 1
            $offset = \mb_strripos($str, $separator);
6989 1
            if ($offset === false) {
6990 1
                return '';
6991
            }
6992
6993 1
            return (string) \mb_substr($str, 0, $offset);
6994
        }
6995
6996
        $offset = self::strripos($str, $separator, 0, $encoding);
6997
        if ($offset === false) {
6998
            return '';
6999
        }
7000
7001
        return (string) self::substr($str, 0, $offset, $encoding);
7002
    }
7003
7004
    /**
7005
     * Gets the substring after (or before via "$before_needle") the first occurrence of the "$needle".
7006
     *
7007
     * @param string $str           <p>The input string.</p>
7008
     * @param string $needle        <p>The string to look for.</p>
7009
     * @param bool   $before_needle [optional] <p>Default: false</p>
7010
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
7011
     *
7012
     * @psalm-pure
7013
     *
7014
     * @return string
7015
     */
7016 2
    public static function str_isubstr_first(
7017
        string $str,
7018
        string $needle,
7019
        bool $before_needle = false,
7020
        string $encoding = 'UTF-8'
7021
    ): string {
7022
        if (
7023 2
            $needle === ''
7024
            ||
7025 2
            $str === ''
7026
        ) {
7027 2
            return '';
7028
        }
7029
7030 2
        $part = self::stristr(
7031 2
            $str,
7032 2
            $needle,
7033 2
            $before_needle,
7034 2
            $encoding
7035
        );
7036 2
        if ($part === false) {
7037 2
            return '';
7038
        }
7039
7040 2
        return $part;
7041
    }
7042
7043
    /**
7044
     * Gets the substring after (or before via "$before_needle") the last occurrence of the "$needle".
7045
     *
7046
     * @param string $str           <p>The input string.</p>
7047
     * @param string $needle        <p>The string to look for.</p>
7048
     * @param bool   $before_needle [optional] <p>Default: false</p>
7049
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
7050
     *
7051
     * @psalm-pure
7052
     *
7053
     * @return string
7054
     */
7055 1
    public static function str_isubstr_last(
7056
        string $str,
7057
        string $needle,
7058
        bool $before_needle = false,
7059
        string $encoding = 'UTF-8'
7060
    ): string {
7061
        if (
7062 1
            $needle === ''
7063
            ||
7064 1
            $str === ''
7065
        ) {
7066 1
            return '';
7067
        }
7068
7069 1
        $part = self::strrichr(
7070 1
            $str,
7071 1
            $needle,
7072 1
            $before_needle,
7073 1
            $encoding
7074
        );
7075 1
        if ($part === false) {
7076 1
            return '';
7077
        }
7078
7079 1
        return $part;
7080
    }
7081
7082
    /**
7083
     * Returns the last $n characters of the string.
7084
     *
7085
     * @param string $str      <p>The input string.</p>
7086
     * @param int    $n        <p>Number of characters to retrieve from the end.</p>
7087
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7088
     *
7089
     * @psalm-pure
7090
     *
7091
     * @return string
7092
     */
7093 12
    public static function str_last_char(
7094
        string $str,
7095
        int $n = 1,
7096
        string $encoding = 'UTF-8'
7097
    ): string {
7098 12
        if ($str === '' || $n <= 0) {
7099 4
            return '';
7100
        }
7101
7102 8
        if ($encoding === 'UTF-8') {
7103 4
            return (string) \mb_substr($str, -$n);
7104
        }
7105
7106 4
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
7107
7108 4
        return (string) self::substr($str, -$n, null, $encoding);
7109
    }
7110
7111
    /**
7112
     * Limit the number of characters in a string.
7113
     *
7114
     * @param string $str        <p>The input string.</p>
7115
     * @param int    $length     [optional] <p>Default: 100</p>
7116
     * @param string $str_add_on [optional] <p>Default: …</p>
7117
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
7118
     *
7119
     * @psalm-pure
7120
     *
7121
     * @return string
7122
     */
7123 2
    public static function str_limit(
7124
        string $str,
7125
        int $length = 100,
7126
        string $str_add_on = '…',
7127
        string $encoding = 'UTF-8'
7128
    ): string {
7129 2
        if ($str === '' || $length <= 0) {
7130 2
            return '';
7131
        }
7132
7133 2
        if ($encoding === 'UTF-8') {
7134 2
            if ((int) \mb_strlen($str) <= $length) {
7135 2
                return $str;
7136
            }
7137
7138
            /** @noinspection UnnecessaryCastingInspection */
7139 2
            return (string) \mb_substr($str, 0, $length - (int) self::strlen($str_add_on)) . $str_add_on;
7140
        }
7141
7142
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
7143
7144
        if ((int) self::strlen($str, $encoding) <= $length) {
7145
            return $str;
7146
        }
7147
7148
        return ((string) self::substr($str, 0, $length - (int) self::strlen($str_add_on), $encoding)) . $str_add_on;
7149
    }
7150
7151
    /**
7152
     * Limit the number of characters in a string, but also after the next word.
7153
     *
7154
     * EXAMPLE: <code>UTF8::str_limit_after_word('fòô bàř fòô', 8, ''); // 'fòô bàř'</code>
7155
     *
7156
     * @param string $str        <p>The input string.</p>
7157
     * @param int    $length     [optional] <p>Default: 100</p>
7158
     * @param string $str_add_on [optional] <p>Default: …</p>
7159
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
7160
     *
7161
     * @psalm-pure
7162
     *
7163
     * @return string
7164
     */
7165 6
    public static function str_limit_after_word(
7166
        string $str,
7167
        int $length = 100,
7168
        string $str_add_on = '…',
7169
        string $encoding = 'UTF-8'
7170
    ): string {
7171 6
        if ($str === '' || $length <= 0) {
7172 2
            return '';
7173
        }
7174
7175 6
        if ($encoding === 'UTF-8') {
7176
            /** @noinspection UnnecessaryCastingInspection */
7177 2
            if ((int) \mb_strlen($str) <= $length) {
7178 2
                return $str;
7179
            }
7180
7181 2
            if (\mb_substr($str, $length - 1, 1) === ' ') {
7182 2
                return ((string) \mb_substr($str, 0, $length - 1)) . $str_add_on;
7183
            }
7184
7185 2
            $str = \mb_substr($str, 0, $length);
7186
7187 2
            $array = \explode(' ', $str, -1);
7188 2
            $new_str = \implode(' ', $array);
7189
7190 2
            if ($new_str === '') {
7191 2
                return ((string) \mb_substr($str, 0, $length - 1)) . $str_add_on;
7192
            }
7193
        } else {
7194 4
            if ((int) self::strlen($str, $encoding) <= $length) {
7195
                return $str;
7196
            }
7197
7198 4
            if (self::substr($str, $length - 1, 1, $encoding) === ' ') {
7199 3
                return ((string) self::substr($str, 0, $length - 1, $encoding)) . $str_add_on;
7200
            }
7201
7202
            /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
7203 1
            $str = self::substr($str, 0, $length, $encoding);
7204
            /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
7205 1
            if ($str === false) {
7206
                return '' . $str_add_on;
7207
            }
7208
7209 1
            $array = \explode(' ', $str, -1);
7210 1
            $new_str = \implode(' ', $array);
7211
7212 1
            if ($new_str === '') {
7213
                return ((string) self::substr($str, 0, $length - 1, $encoding)) . $str_add_on;
7214
            }
7215
        }
7216
7217 3
        return $new_str . $str_add_on;
7218
    }
7219
7220
    /**
7221
     * Returns the longest common prefix between the $str1 and $str2.
7222
     *
7223
     * @param string $str1     <p>The input sting.</p>
7224
     * @param string $str2     <p>Second string for comparison.</p>
7225
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7226
     *
7227
     * @psalm-pure
7228
     *
7229
     * @return string
7230
     */
7231 10
    public static function str_longest_common_prefix(
7232
        string $str1,
7233
        string $str2,
7234
        string $encoding = 'UTF-8'
7235
    ): string {
7236
        // init
7237 10
        $longest_common_prefix = '';
7238
7239 10
        if ($encoding === 'UTF-8') {
7240 5
            $max_length = (int) \min(
7241 5
                \mb_strlen($str1),
7242 5
                \mb_strlen($str2)
7243
            );
7244
7245 5
            for ($i = 0; $i < $max_length; ++$i) {
7246 4
                $char = \mb_substr($str1, $i, 1);
7247
7248
                if (
7249 4
                    $char !== false
7250
                    &&
7251 4
                    $char === \mb_substr($str2, $i, 1)
7252
                ) {
7253 3
                    $longest_common_prefix .= $char;
7254
                } else {
7255 3
                    break;
7256
                }
7257
            }
7258
        } else {
7259 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7260
7261 5
            $max_length = (int) \min(
7262 5
                self::strlen($str1, $encoding),
7263 5
                self::strlen($str2, $encoding)
7264
            );
7265
7266 5
            for ($i = 0; $i < $max_length; ++$i) {
7267 4
                $char = self::substr($str1, $i, 1, $encoding);
7268
7269
                if (
7270 4
                    $char !== false
7271
                    &&
7272 4
                    $char === self::substr($str2, $i, 1, $encoding)
7273
                ) {
7274 3
                    $longest_common_prefix .= $char;
7275
                } else {
7276 3
                    break;
7277
                }
7278
            }
7279
        }
7280
7281 10
        return $longest_common_prefix;
7282
    }
7283
7284
    /**
7285
     * Returns the longest common substring between the $str1 and $str2.
7286
     * In the case of ties, it returns that which occurs first.
7287
     *
7288
     * @param string $str1
7289
     * @param string $str2     <p>Second string for comparison.</p>
7290
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7291
     *
7292
     * @psalm-pure
7293
     *
7294
     * @return string
7295
     *                <p>A string with its $str being the longest common substring.</p>
7296
     */
7297 11
    public static function str_longest_common_substring(
7298
        string $str1,
7299
        string $str2,
7300
        string $encoding = 'UTF-8'
7301
    ): string {
7302 11
        if ($str1 === '' || $str2 === '') {
7303 2
            return '';
7304
        }
7305
7306
        // Uses dynamic programming to solve
7307
        // http://en.wikipedia.org/wiki/Longest_common_substring_problem
7308
7309 9
        if ($encoding === 'UTF-8') {
7310 4
            $str_length = (int) \mb_strlen($str1);
7311 4
            $other_length = (int) \mb_strlen($str2);
7312
        } else {
7313 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7314
7315 5
            $str_length = (int) self::strlen($str1, $encoding);
7316 5
            $other_length = (int) self::strlen($str2, $encoding);
7317
        }
7318
7319
        // Return if either string is empty
7320 9
        if ($str_length === 0 || $other_length === 0) {
7321
            return '';
7322
        }
7323
7324 9
        $len = 0;
7325 9
        $end = 0;
7326 9
        $table = \array_fill(
7327 9
            0,
7328 9
            $str_length + 1,
7329 9
            \array_fill(0, $other_length + 1, 0)
7330
        );
7331
7332 9
        if ($encoding === 'UTF-8') {
7333 9
            for ($i = 1; $i <= $str_length; ++$i) {
7334 9
                for ($j = 1; $j <= $other_length; ++$j) {
7335 9
                    $str_char = \mb_substr($str1, $i - 1, 1);
7336 9
                    $other_char = \mb_substr($str2, $j - 1, 1);
7337
7338 9
                    if ($str_char === $other_char) {
7339 8
                        $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
7340 8
                        if ($table[$i][$j] > $len) {
7341 8
                            $len = $table[$i][$j];
7342 8
                            $end = $i;
7343
                        }
7344
                    } else {
7345 9
                        $table[$i][$j] = 0;
7346
                    }
7347
                }
7348
            }
7349
        } else {
7350
            for ($i = 1; $i <= $str_length; ++$i) {
7351
                for ($j = 1; $j <= $other_length; ++$j) {
7352
                    $str_char = self::substr($str1, $i - 1, 1, $encoding);
7353
                    $other_char = self::substr($str2, $j - 1, 1, $encoding);
7354
7355
                    if ($str_char === $other_char) {
7356
                        $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
7357
                        if ($table[$i][$j] > $len) {
7358
                            $len = $table[$i][$j];
7359
                            $end = $i;
7360
                        }
7361
                    } else {
7362
                        $table[$i][$j] = 0;
7363
                    }
7364
                }
7365
            }
7366
        }
7367
7368 9
        if ($encoding === 'UTF-8') {
7369 9
            return (string) \mb_substr($str1, $end - $len, $len);
7370
        }
7371
7372
        return (string) self::substr($str1, $end - $len, $len, $encoding);
7373
    }
7374
7375
    /**
7376
     * Returns the longest common suffix between the $str1 and $str2.
7377
     *
7378
     * @param string $str1
7379
     * @param string $str2     <p>Second string for comparison.</p>
7380
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7381
     *
7382
     * @psalm-pure
7383
     *
7384
     * @return string
7385
     */
7386 10
    public static function str_longest_common_suffix(
7387
        string $str1,
7388
        string $str2,
7389
        string $encoding = 'UTF-8'
7390
    ): string {
7391 10
        if ($str1 === '' || $str2 === '') {
7392 2
            return '';
7393
        }
7394
7395 8
        if ($encoding === 'UTF-8') {
7396 4
            $max_length = (int) \min(
7397 4
                \mb_strlen($str1, $encoding),
7398 4
                \mb_strlen($str2, $encoding)
7399
            );
7400
7401 4
            $longest_common_suffix = '';
7402 4
            for ($i = 1; $i <= $max_length; ++$i) {
7403 4
                $char = \mb_substr($str1, -$i, 1);
7404
7405
                if (
7406 4
                    $char !== false
7407
                    &&
7408 4
                    $char === \mb_substr($str2, -$i, 1)
7409
                ) {
7410 3
                    $longest_common_suffix = $char . $longest_common_suffix;
7411
                } else {
7412 3
                    break;
7413
                }
7414
            }
7415
        } else {
7416 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7417
7418 4
            $max_length = (int) \min(
7419 4
                self::strlen($str1, $encoding),
7420 4
                self::strlen($str2, $encoding)
7421
            );
7422
7423 4
            $longest_common_suffix = '';
7424 4
            for ($i = 1; $i <= $max_length; ++$i) {
7425 4
                $char = self::substr($str1, -$i, 1, $encoding);
7426
7427
                if (
7428 4
                    $char !== false
7429
                    &&
7430 4
                    $char === self::substr($str2, -$i, 1, $encoding)
7431
                ) {
7432 3
                    $longest_common_suffix = $char . $longest_common_suffix;
7433
                } else {
7434 3
                    break;
7435
                }
7436
            }
7437
        }
7438
7439 8
        return $longest_common_suffix;
7440
    }
7441
7442
    /**
7443
     * Returns true if $str matches the supplied pattern, false otherwise.
7444
     *
7445
     * @param string $str     <p>The input string.</p>
7446
     * @param string $pattern <p>Regex pattern to match against.</p>
7447
     *
7448
     * @psalm-pure
7449
     *
7450
     * @return bool whether or not $str matches the pattern
7451
     */
7452 10
    public static function str_matches_pattern(string $str, string $pattern): bool
7453
    {
7454 10
        return (bool) \preg_match('/' . $pattern . '/u', $str);
7455
    }
7456
7457
    /**
7458
     * Returns whether or not a character exists at an index. Offsets may be
7459
     * negative to count from the last character in the string. Implements
7460
     * part of the ArrayAccess interface.
7461
     *
7462
     * @param string $str      <p>The input string.</p>
7463
     * @param int    $offset   <p>The index to check.</p>
7464
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7465
     *
7466
     * @psalm-pure
7467
     *
7468
     * @return bool whether or not the index exists
7469
     */
7470 6
    public static function str_offset_exists(string $str, int $offset, string $encoding = 'UTF-8'): bool
7471
    {
7472
        // init
7473 6
        $length = (int) self::strlen($str, $encoding);
7474
7475 6
        if ($offset >= 0) {
7476 3
            return $length > $offset;
7477
        }
7478
7479 3
        return $length >= \abs($offset);
7480
    }
7481
7482
    /**
7483
     * Returns the character at the given index. Offsets may be negative to
7484
     * count from the last character in the string. Implements part of the
7485
     * ArrayAccess interface, and throws an OutOfBoundsException if the index
7486
     * does not exist.
7487
     *
7488
     * @param string $str      <p>The input string.</p>
7489
     * @param int    $index    <p>The <strong>index</strong> from which to retrieve the char.</p>
7490
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7491
     *
7492
     * @throws \OutOfBoundsException if the positive or negative offset does not exist
7493
     *
7494
     * @return string
7495
     *                <p>The character at the specified index.</p>
7496
     *
7497
     * @psalm-pure
7498
     */
7499 2
    public static function str_offset_get(string $str, int $index, string $encoding = 'UTF-8'): string
7500
    {
7501
        // init
7502 2
        $length = (int) self::strlen($str);
7503
7504
        if (
7505 2
            ($index >= 0 && $length <= $index)
7506
            ||
7507 2
            $length < \abs($index)
7508
        ) {
7509 1
            throw new \OutOfBoundsException('No character exists at the index');
7510
        }
7511
7512 1
        return self::char_at($str, $index, $encoding);
7513
    }
7514
7515
    /**
7516
     * Pad a UTF-8 string to a given length with another string.
7517
     *
7518
     * EXAMPLE: <code>UTF8::str_pad('中文空白', 10, '_', STR_PAD_BOTH); // '___中文空白___'</code>
7519
     *
7520
     * @param string     $str        <p>The input string.</p>
7521
     * @param int        $pad_length <p>The length of return string.</p>
7522
     * @param string     $pad_string [optional] <p>String to use for padding the input string.</p>
7523
     * @param int|string $pad_type   [optional] <p>
7524
     *                               Can be <strong>STR_PAD_RIGHT</strong> (default), [or string "right"]<br>
7525
     *                               <strong>STR_PAD_LEFT</strong> [or string "left"] or<br>
7526
     *                               <strong>STR_PAD_BOTH</strong> [or string "both"]
7527
     *                               </p>
7528
     * @param string     $encoding   [optional] <p>Default: 'UTF-8'</p>
7529
     *
7530
     * @psalm-pure
7531
     *
7532
     * @return string
7533
     *                <p>Returns the padded string.</p>
7534
     */
7535 41
    public static function str_pad(
7536
        string $str,
7537
        int $pad_length,
7538
        string $pad_string = ' ',
7539
        $pad_type = \STR_PAD_RIGHT,
7540
        string $encoding = 'UTF-8'
7541
    ): string {
7542 41
        if ($pad_length === 0 || $pad_string === '') {
7543 1
            return $str;
7544
        }
7545
7546 41
        if ($pad_type !== (int) $pad_type) {
7547 13
            if ($pad_type === 'left') {
7548 3
                $pad_type = \STR_PAD_LEFT;
7549 10
            } elseif ($pad_type === 'right') {
7550 6
                $pad_type = \STR_PAD_RIGHT;
7551 4
            } elseif ($pad_type === 'both') {
7552 3
                $pad_type = \STR_PAD_BOTH;
7553
            } else {
7554 1
                throw new \InvalidArgumentException(
7555 1
                    'Pad expects $pad_type to be "STR_PAD_*" or ' . "to be one of 'left', 'right' or 'both'"
7556
                );
7557
            }
7558
        }
7559
7560 40
        if ($encoding === 'UTF-8') {
7561 25
            $str_length = (int) \mb_strlen($str);
7562
7563 25
            if ($pad_length >= $str_length) {
7564
                switch ($pad_type) {
7565 25
                    case \STR_PAD_LEFT:
7566 8
                        $ps_length = (int) \mb_strlen($pad_string);
7567
7568 8
                        $diff = ($pad_length - $str_length);
7569
7570 8
                        $pre = (string) \mb_substr(
7571 8
                            \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
7572 8
                            0,
7573 8
                            $diff
7574
                        );
7575 8
                        $post = '';
7576
7577 8
                        break;
7578
7579 20
                    case \STR_PAD_BOTH:
7580 14
                        $diff = ($pad_length - $str_length);
7581
7582 14
                        $ps_length_left = (int) \floor($diff / 2);
7583
7584 14
                        $ps_length_right = (int) \ceil($diff / 2);
7585
7586 14
                        $pre = (string) \mb_substr(
7587 14
                            \str_repeat($pad_string, $ps_length_left),
7588 14
                            0,
7589 14
                            $ps_length_left
7590
                        );
7591 14
                        $post = (string) \mb_substr(
7592 14
                            \str_repeat($pad_string, $ps_length_right),
7593 14
                            0,
7594 14
                            $ps_length_right
7595
                        );
7596
7597 14
                        break;
7598
7599 9
                    case \STR_PAD_RIGHT:
7600
                    default:
7601 9
                        $ps_length = (int) \mb_strlen($pad_string);
7602
7603 9
                        $diff = ($pad_length - $str_length);
7604
7605 9
                        $post = (string) \mb_substr(
7606 9
                            \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
7607 9
                            0,
7608 9
                            $diff
7609
                        );
7610 9
                        $pre = '';
7611
                }
7612
7613 25
                return $pre . $str . $post;
7614
            }
7615
7616 3
            return $str;
7617
        }
7618
7619 15
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
7620
7621 15
        $str_length = (int) self::strlen($str, $encoding);
7622
7623 15
        if ($pad_length >= $str_length) {
7624
            switch ($pad_type) {
7625 14
                case \STR_PAD_LEFT:
7626 5
                    $ps_length = (int) self::strlen($pad_string, $encoding);
7627
7628 5
                    $diff = ($pad_length - $str_length);
7629
7630 5
                    $pre = (string) self::substr(
7631 5
                        \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
7632 5
                        0,
7633 5
                        $diff,
7634 5
                        $encoding
7635
                    );
7636 5
                    $post = '';
7637
7638 5
                    break;
7639
7640 9
                case \STR_PAD_BOTH:
7641 3
                    $diff = ($pad_length - $str_length);
7642
7643 3
                    $ps_length_left = (int) \floor($diff / 2);
7644
7645 3
                    $ps_length_right = (int) \ceil($diff / 2);
7646
7647 3
                    $pre = (string) self::substr(
7648 3
                        \str_repeat($pad_string, $ps_length_left),
7649 3
                        0,
7650 3
                        $ps_length_left,
7651 3
                        $encoding
7652
                    );
7653 3
                    $post = (string) self::substr(
7654 3
                        \str_repeat($pad_string, $ps_length_right),
7655 3
                        0,
7656 3
                        $ps_length_right,
7657 3
                        $encoding
7658
                    );
7659
7660 3
                    break;
7661
7662 6
                case \STR_PAD_RIGHT:
7663
                default:
7664 6
                    $ps_length = (int) self::strlen($pad_string, $encoding);
7665
7666 6
                    $diff = ($pad_length - $str_length);
7667
7668 6
                    $post = (string) self::substr(
7669 6
                        \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
7670 6
                        0,
7671 6
                        $diff,
7672 6
                        $encoding
7673
                    );
7674 6
                    $pre = '';
7675
            }
7676
7677 14
            return $pre . $str . $post;
7678
        }
7679
7680 1
        return $str;
7681
    }
7682
7683
    /**
7684
     * Returns a new string of a given length such that both sides of the
7685
     * string are padded. Alias for "UTF8::str_pad()" with a $pad_type of 'both'.
7686
     *
7687
     * @param string $str
7688
     * @param int    $length   <p>Desired string length after padding.</p>
7689
     * @param string $pad_str  [optional] <p>String used to pad, defaults to space. Default: ' '</p>
7690
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7691
     *
7692
     * @psalm-pure
7693
     *
7694
     * @return string
7695
     *                <p>The string with padding applied.</p>
7696
     */
7697 11
    public static function str_pad_both(
7698
        string $str,
7699
        int $length,
7700
        string $pad_str = ' ',
7701
        string $encoding = 'UTF-8'
7702
    ): string {
7703 11
        return self::str_pad(
7704 11
            $str,
7705 11
            $length,
7706 11
            $pad_str,
7707 11
            \STR_PAD_BOTH,
7708 11
            $encoding
7709
        );
7710
    }
7711
7712
    /**
7713
     * Returns a new string of a given length such that the beginning of the
7714
     * string is padded. Alias for "UTF8::str_pad()" with a $pad_type of 'left'.
7715
     *
7716
     * @param string $str
7717
     * @param int    $length   <p>Desired string length after padding.</p>
7718
     * @param string $pad_str  [optional] <p>String used to pad, defaults to space. Default: ' '</p>
7719
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7720
     *
7721
     * @psalm-pure
7722
     *
7723
     * @return string
7724
     *                <p>The string with left padding.</p>
7725
     */
7726 7
    public static function str_pad_left(
7727
        string $str,
7728
        int $length,
7729
        string $pad_str = ' ',
7730
        string $encoding = 'UTF-8'
7731
    ): string {
7732 7
        return self::str_pad(
7733 7
            $str,
7734 7
            $length,
7735 7
            $pad_str,
7736 7
            \STR_PAD_LEFT,
7737 7
            $encoding
7738
        );
7739
    }
7740
7741
    /**
7742
     * Returns a new string of a given length such that the end of the string
7743
     * is padded. Alias for "UTF8::str_pad()" with a $pad_type of 'right'.
7744
     *
7745
     * @param string $str
7746
     * @param int    $length   <p>Desired string length after padding.</p>
7747
     * @param string $pad_str  [optional] <p>String used to pad, defaults to space. Default: ' '</p>
7748
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7749
     *
7750
     * @psalm-pure
7751
     *
7752
     * @return string
7753
     *                <p>The string with right padding.</p>
7754
     */
7755 7
    public static function str_pad_right(
7756
        string $str,
7757
        int $length,
7758
        string $pad_str = ' ',
7759
        string $encoding = 'UTF-8'
7760
    ): string {
7761 7
        return self::str_pad(
7762 7
            $str,
7763 7
            $length,
7764 7
            $pad_str,
7765 7
            \STR_PAD_RIGHT,
7766 7
            $encoding
7767
        );
7768
    }
7769
7770
    /**
7771
     * Repeat a string.
7772
     *
7773
     * EXAMPLE: <code>UTF8::str_repeat("°~\xf0\x90\x28\xbc", 2); // '°~ð(¼°~ð(¼'</code>
7774
     *
7775
     * @param string $str        <p>
7776
     *                           The string to be repeated.
7777
     *                           </p>
7778
     * @param int    $multiplier <p>
7779
     *                           Number of time the input string should be
7780
     *                           repeated.
7781
     *                           </p>
7782
     *                           <p>
7783
     *                           multiplier has to be greater than or equal to 0.
7784
     *                           If the multiplier is set to 0, the function
7785
     *                           will return an empty string.
7786
     *                           </p>
7787
     *
7788
     * @psalm-pure
7789
     *
7790
     * @return string
7791
     *                <p>The repeated string.</p>
7792
     */
7793 9
    public static function str_repeat(string $str, int $multiplier): string
7794
    {
7795 9
        $str = self::filter($str);
7796
7797 9
        return \str_repeat($str, $multiplier);
7798
    }
7799
7800
    /**
7801
     * INFO: This is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe.
7802
     *
7803
     * Replace all occurrences of the search string with the replacement string
7804
     *
7805
     * @see http://php.net/manual/en/function.str-replace.php
7806
     *
7807
     * @param string|string[] $search  <p>
7808
     *                                 The value being searched for, otherwise known as the needle.
7809
     *                                 An array may be used to designate multiple needles.
7810
     *                                 </p>
7811
     * @param string|string[] $replace <p>
7812
     *                                 The replacement value that replaces found search
7813
     *                                 values. An array may be used to designate multiple replacements.
7814
     *                                 </p>
7815
     * @param string|string[] $subject <p>
7816
     *                                 The string or array ofstrings being searched and replaced on,
7817
     *                                 otherwise known as the haystack.
7818
     *                                 </p>
7819
     *                                 <p>
7820
     *                                 If subject is an array, then the search and
7821
     *                                 replace is performed with every entry of
7822
     *                                 subject, and the return value is an array as
7823
     *                                 well.
7824
     *                                 </p>
7825
     * @param int             $count   [optional] If passed, this will hold the number of matched and replaced needles
7826
     *
7827
     * @psalm-pure
7828
     *
7829
     * @return string|string[] this function returns a string or an array with the replaced values
7830
     *
7831
     * @template TStrReplaceSubject
7832
     * @psalm-param TStrReplaceSubject $subject
7833
     * @psalm-return TStrReplaceSubject
7834
     *
7835
     * @deprecated please use \str_replace() instead
7836
     */
7837 12
    public static function str_replace(
7838
        $search,
7839
        $replace,
7840
        $subject,
7841
        int &$count = null
7842
    ) {
7843
        /**
7844
         * @psalm-suppress PossiblyNullArgument
7845
         * @psalm-var TStrReplaceSubject $return;
7846
         */
7847 12
        $return = \str_replace(
7848 12
            $search,
7849 12
            $replace,
7850 12
            $subject,
7851 12
            $count
7852
        );
7853
7854 12
        return $return;
7855
    }
7856
7857
    /**
7858
     * Replaces $search from the beginning of string with $replacement.
7859
     *
7860
     * @param string $str         <p>The input string.</p>
7861
     * @param string $search      <p>The string to search for.</p>
7862
     * @param string $replacement <p>The replacement.</p>
7863
     *
7864
     * @psalm-pure
7865
     *
7866
     * @return string
7867
     *                <p>A string after the replacements.</p>
7868
     */
7869 17
    public static function str_replace_beginning(
7870
        string $str,
7871
        string $search,
7872
        string $replacement
7873
    ): string {
7874 17
        if ($str === '') {
7875 4
            if ($replacement === '') {
7876 2
                return '';
7877
            }
7878
7879 2
            if ($search === '') {
7880 2
                return $replacement;
7881
            }
7882
        }
7883
7884 13
        if ($search === '') {
7885 2
            return $str . $replacement;
7886
        }
7887
7888 11
        if (\strpos($str, $search) === 0) {
7889 9
            return $replacement . \substr($str, \strlen($search));
7890
        }
7891
7892 2
        return $str;
7893
    }
7894
7895
    /**
7896
     * Replaces $search from the ending of string with $replacement.
7897
     *
7898
     * @param string $str         <p>The input string.</p>
7899
     * @param string $search      <p>The string to search for.</p>
7900
     * @param string $replacement <p>The replacement.</p>
7901
     *
7902
     * @psalm-pure
7903
     *
7904
     * @return string
7905
     *                <p>A string after the replacements.</p>
7906
     */
7907 17
    public static function str_replace_ending(
7908
        string $str,
7909
        string $search,
7910
        string $replacement
7911
    ): string {
7912 17
        if ($str === '') {
7913 4
            if ($replacement === '') {
7914 2
                return '';
7915
            }
7916
7917 2
            if ($search === '') {
7918 2
                return $replacement;
7919
            }
7920
        }
7921
7922 13
        if ($search === '') {
7923 2
            return $str . $replacement;
7924
        }
7925
7926 11
        if (\strpos($str, $search, \strlen($str) - \strlen($search)) !== false) {
7927 8
            $str = \substr($str, 0, -\strlen($search)) . $replacement;
7928
        }
7929
7930 11
        return $str;
7931
    }
7932
7933
    /**
7934
     * Replace the first "$search"-term with the "$replace"-term.
7935
     *
7936
     * @param string $search
7937
     * @param string $replace
7938
     * @param string $subject
7939
     *
7940
     * @psalm-pure
7941
     *
7942
     * @return string
7943
     *
7944
     * @psalm-suppress InvalidReturnType
7945
     */
7946 2
    public static function str_replace_first(
7947
        string $search,
7948
        string $replace,
7949
        string $subject
7950
    ): string {
7951 2
        $pos = self::strpos($subject, $search);
7952
7953 2
        if ($pos !== false) {
7954
            /**
7955
             * @psalm-suppress InvalidReturnStatement
7956
             */
7957 2
            return self::substr_replace(
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr_repl...)self::strlen($search)) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
7958 2
                $subject,
7959 2
                $replace,
7960 2
                $pos,
7961 2
                (int) self::strlen($search)
7962
            );
7963
        }
7964
7965 2
        return $subject;
7966
    }
7967
7968
    /**
7969
     * Replace the last "$search"-term with the "$replace"-term.
7970
     *
7971
     * @param string $search
7972
     * @param string $replace
7973
     * @param string $subject
7974
     *
7975
     * @psalm-pure
7976
     *
7977
     * @return string
7978
     *
7979
     * @psalm-suppress InvalidReturnType
7980
     */
7981 2
    public static function str_replace_last(
7982
        string $search,
7983
        string $replace,
7984
        string $subject
7985
    ): string {
7986 2
        $pos = self::strrpos($subject, $search);
7987 2
        if ($pos !== false) {
7988
            /**
7989
             * @psalm-suppress InvalidReturnStatement
7990
             */
7991 2
            return self::substr_replace(
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr_repl...)self::strlen($search)) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
7992 2
                $subject,
7993 2
                $replace,
7994 2
                $pos,
7995 2
                (int) self::strlen($search)
7996
            );
7997
        }
7998
7999 2
        return $subject;
8000
    }
8001
8002
    /**
8003
     * Shuffles all the characters in the string.
8004
     *
8005
     * INFO: uses random algorithm which is weak for cryptography purposes
8006
     *
8007
     * EXAMPLE: <code>UTF8::str_shuffle('fòô bàř fòô'); // 'àòôřb ffòô '</code>
8008
     *
8009
     * @param string $str      <p>The input string</p>
8010
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8011
     *
8012
     * @return string
8013
     *                <p>The shuffled string.</p>
8014
     */
8015 5
    public static function str_shuffle(string $str, string $encoding = 'UTF-8'): string
8016
    {
8017 5
        if ($encoding === 'UTF-8') {
8018 5
            $indexes = \range(0, (int) \mb_strlen($str) - 1);
8019
            /** @noinspection NonSecureShuffleUsageInspection */
8020 5
            \shuffle($indexes);
8021
8022
            // init
8023 5
            $shuffled_str = '';
8024
8025 5
            foreach ($indexes as &$i) {
8026 5
                $tmp_sub_str = \mb_substr($str, $i, 1);
8027 5
                if ($tmp_sub_str !== false) {
8028 5
                    $shuffled_str .= $tmp_sub_str;
8029
                }
8030
            }
8031
        } else {
8032
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8033
8034
            $indexes = \range(0, (int) self::strlen($str, $encoding) - 1);
8035
            /** @noinspection NonSecureShuffleUsageInspection */
8036
            \shuffle($indexes);
8037
8038
            // init
8039
            $shuffled_str = '';
8040
8041
            foreach ($indexes as &$i) {
8042
                $tmp_sub_str = self::substr($str, $i, 1, $encoding);
8043
                if ($tmp_sub_str !== false) {
8044
                    $shuffled_str .= $tmp_sub_str;
8045
                }
8046
            }
8047
        }
8048
8049 5
        return $shuffled_str;
8050
    }
8051
8052
    /**
8053
     * Returns the substring beginning at $start, and up to, but not including
8054
     * the index specified by $end. If $end is omitted, the function extracts
8055
     * the remaining string. If $end is negative, it is computed from the end
8056
     * of the string.
8057
     *
8058
     * @param string $str
8059
     * @param int    $start    <p>Initial index from which to begin extraction.</p>
8060
     * @param int    $end      [optional] <p>Index at which to end extraction. Default: null</p>
8061
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8062
     *
8063
     * @psalm-pure
8064
     *
8065
     * @return false|string
8066
     *                      <p>The extracted substring.</p><p>If <i>str</i> is shorter than <i>start</i>
8067
     *                      characters long, <b>FALSE</b> will be returned.
8068
     */
8069 18
    public static function str_slice(
8070
        string $str,
8071
        int $start,
8072
        int $end = null,
8073
        string $encoding = 'UTF-8'
8074
    ) {
8075 18
        if ($encoding === 'UTF-8') {
8076 7
            if ($end === null) {
8077 1
                $length = (int) \mb_strlen($str);
8078 6
            } elseif ($end >= 0 && $end <= $start) {
8079 2
                return '';
8080 4
            } elseif ($end < 0) {
8081 1
                $length = (int) \mb_strlen($str) + $end - $start;
8082
            } else {
8083 3
                $length = $end - $start;
8084
            }
8085
8086 5
            return \mb_substr($str, $start, $length);
8087
        }
8088
8089 11
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
8090
8091 11
        if ($end === null) {
8092 5
            $length = (int) self::strlen($str, $encoding);
8093 6
        } elseif ($end >= 0 && $end <= $start) {
8094 2
            return '';
8095 4
        } elseif ($end < 0) {
8096 1
            $length = (int) self::strlen($str, $encoding) + $end - $start;
8097
        } else {
8098 3
            $length = $end - $start;
8099
        }
8100
8101 9
        return self::substr($str, $start, $length, $encoding);
8102
    }
8103
8104
    /**
8105
     * Convert a string to e.g.: "snake_case"
8106
     *
8107
     * @param string $str
8108
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8109
     *
8110
     * @psalm-pure
8111
     *
8112
     * @return string
8113
     *                <p>A string in snake_case.</p>
8114
     */
8115 22
    public static function str_snakeize(string $str, string $encoding = 'UTF-8'): string
8116
    {
8117 22
        if ($str === '') {
8118
            return '';
8119
        }
8120
8121 22
        $str = \str_replace(
8122 22
            '-',
8123 22
            '_',
8124 22
            self::normalize_whitespace($str)
8125
        );
8126
8127 22
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8128 19
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8129
        }
8130
8131 22
        $str = (string) \preg_replace_callback(
8132 22
            '/([\\p{N}|\\p{Lu}])/u',
8133
            /**
8134
             * @param string[] $matches
8135
             *
8136
             * @psalm-pure
8137
             *
8138
             * @return string
8139
             */
8140
            static function (array $matches) use ($encoding): string {
8141 9
                $match = $matches[1];
8142 9
                $match_int = (int) $match;
8143
8144 9
                if ((string) $match_int === $match) {
8145 4
                    return '_' . $match . '_';
8146
                }
8147
8148 5
                if ($encoding === 'UTF-8') {
8149 5
                    return '_' . \mb_strtolower($match);
8150
                }
8151
8152
                return '_' . self::strtolower($match, $encoding);
8153 22
            },
8154 22
            $str
8155
        );
8156
8157 22
        $str = (string) \preg_replace(
8158
            [
8159 22
                '/\\s+/u',           // convert spaces to "_"
8160
                '/^\\s+|\\s+$/u', // trim leading & trailing spaces
8161
                '/_+/',                 // remove double "_"
8162
            ],
8163
            [
8164 22
                '_',
8165
                '',
8166
                '_',
8167
            ],
8168 22
            $str
8169
        );
8170
8171 22
        return \trim(\trim($str, '_')); // trim leading & trailing "_" + whitespace
8172
    }
8173
8174
    /**
8175
     * Sort all characters according to code points.
8176
     *
8177
     * EXAMPLE: <code>UTF8::str_sort('  -ABC-中文空白-  '); // '    ---ABC中文白空'</code>
8178
     *
8179
     * @param string $str    <p>A UTF-8 string.</p>
8180
     * @param bool   $unique <p>Sort unique. If <strong>true</strong>, repeated characters are ignored.</p>
8181
     * @param bool   $desc   <p>If <strong>true</strong>, will sort characters in reverse code point order.</p>
8182
     *
8183
     * @psalm-pure
8184
     *
8185
     * @return string
8186
     *                <p>A string of sorted characters.</p>
8187
     */
8188 2
    public static function str_sort(string $str, bool $unique = false, bool $desc = false): string
8189
    {
8190 2
        $array = self::codepoints($str);
8191
8192 2
        if ($unique) {
8193 2
            $array = \array_flip(\array_flip($array));
8194
        }
8195
8196 2
        if ($desc) {
8197 2
            \arsort($array);
0 ignored issues
show
Bug introduced by
It seems like $array can also be of type null; however, parameter $array of arsort() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8197
            \arsort(/** @scrutinizer ignore-type */ $array);
Loading history...
8198
        } else {
8199 2
            \asort($array);
0 ignored issues
show
Bug introduced by
It seems like $array can also be of type null; however, parameter $array of asort() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8199
            \asort(/** @scrutinizer ignore-type */ $array);
Loading history...
8200
        }
8201
8202 2
        return self::string($array);
8203
    }
8204
8205
    /**
8206
     * Convert a string to an array of Unicode characters.
8207
     *
8208
     * EXAMPLE: <code>
8209
     * UTF8::str_split_array(['中文空白', 'test'], 2); // [['中文', '空白'], ['te', 'st']]
8210
     * </code>
8211
     *
8212
     * @param int[]|string[] $input                   <p>The string[] or int[] to split into array.</p>
8213
     * @param int            $length                  [optional] <p>Max character length of each array
8214
     *                                                lement.</p>
8215
     * @param bool           $clean_utf8              [optional] <p>Remove non UTF-8 chars from the
8216
     *                                                string.</p>
8217
     * @param bool           $try_to_use_mb_functions [optional] <p>Set to false, if you don't want to use
8218
     *                                                "mb_substr"</p>
8219
     *
8220
     * @psalm-pure
8221
     *
8222
     * @return string[][]
8223
     *                    <p>An array containing chunks of the input.</p>
8224
     */
8225 1
    public static function str_split_array(
8226
        array $input,
8227
        int $length = 1,
8228
        bool $clean_utf8 = false,
8229
        bool $try_to_use_mb_functions = true
8230
    ): array {
8231 1
        foreach ($input as $k => &$v) {
8232 1
            $v = self::str_split(
8233 1
                $v,
8234 1
                $length,
8235 1
                $clean_utf8,
8236 1
                $try_to_use_mb_functions
8237
            );
8238
        }
8239
8240
        /** @var string[][] $input */
8241 1
        return $input;
8242
    }
8243
8244
    /**
8245
     * Convert a string to an array of unicode characters.
8246
     *
8247
     * EXAMPLE: <code>UTF8::str_split('中文空白'); // array('中', '文', '空', '白')</code>
8248
     *
8249
     * @param int|string $input                   <p>The string or int to split into array.</p>
8250
     * @param int        $length                  [optional] <p>Max character length of each array
8251
     *                                            element.</p>
8252
     * @param bool       $clean_utf8              [optional] <p>Remove non UTF-8 chars from the
8253
     *                                            string.</p>
8254
     * @param bool       $try_to_use_mb_functions [optional] <p>Set to false, if you don't want to use
8255
     *                                            "mb_substr"</p>
8256
     *
8257
     * @psalm-pure
8258
     *
8259
     * @return string[]
8260
     *                  <p>An array containing chunks of chars from the input.</p>
8261
     *
8262
     * @noinspection SuspiciousBinaryOperationInspection
8263
     * @noinspection OffsetOperationsInspection
8264
     */
8265 89
    public static function str_split(
8266
        $input,
8267
        int $length = 1,
8268
        bool $clean_utf8 = false,
8269
        bool $try_to_use_mb_functions = true
8270
    ): array {
8271 89
        if ($length <= 0) {
8272 3
            return [];
8273
        }
8274
8275
        // this is only an old fallback
8276
        /** @noinspection PhpSillyAssignmentInspection - hack for phpstan */
8277
        /** @var int|int[]|string|string[] $input */
8278 88
        $input = $input;
8279 88
        if (\is_array($input)) {
8280
            /**
8281
             * @psalm-suppress InvalidReturnStatement
8282
             */
8283
            return self::str_split_array(
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::str_split_a...ry_to_use_mb_functions) returns the type array<mixed,string[]> which is incompatible with the documented return type string[].
Loading history...
8284
                $input,
8285
                $length,
8286
                $clean_utf8,
8287
                $try_to_use_mb_functions
8288
            );
8289
        }
8290
8291
        // init
8292 88
        $input = (string) $input;
8293
8294 88
        if ($input === '') {
8295 13
            return [];
8296
        }
8297
8298 85
        if ($clean_utf8) {
8299 19
            $input = self::clean($input);
8300
        }
8301
8302
        if (
8303 85
            $try_to_use_mb_functions
8304
            &&
8305 85
            self::$SUPPORT['mbstring'] === true
8306
        ) {
8307 81
            if (Bootup::is_php('7.4')) {
8308
                /**
8309
                 * @psalm-suppress ImpureFunctionCall - why?
8310
                 */
8311
                $return = \mb_str_split($input, $length);
8312
                if ($return !== false) {
8313
                    return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
8314
                }
8315
            }
8316
8317 81
            $i_max = \mb_strlen($input);
8318 81
            if ($i_max <= 127) {
8319 75
                $ret = [];
8320 75
                for ($i = 0; $i < $i_max; ++$i) {
8321 75
                    $ret[] = \mb_substr($input, $i, 1);
8322
                }
8323
            } else {
8324 16
                $return_array = [];
8325 16
                \preg_match_all('/./us', $input, $return_array);
8326 81
                $ret = $return_array[0] ?? [];
8327
            }
8328 23
        } elseif (self::$SUPPORT['pcre_utf8'] === true) {
8329 17
            $return_array = [];
8330 17
            \preg_match_all('/./us', $input, $return_array);
8331 17
            $ret = $return_array[0] ?? [];
8332
        } else {
8333
8334
            // fallback
8335
8336 8
            $ret = [];
8337 8
            $len = \strlen($input);
8338
8339
            /** @noinspection ForeachInvariantsInspection */
8340 8
            for ($i = 0; $i < $len; ++$i) {
8341 8
                if (($input[$i] & "\x80") === "\x00") {
8342 8
                    $ret[] = $input[$i];
8343
                } elseif (
8344 8
                    isset($input[$i + 1])
8345
                    &&
8346 8
                    ($input[$i] & "\xE0") === "\xC0"
8347
                ) {
8348 4
                    if (($input[$i + 1] & "\xC0") === "\x80") {
8349 4
                        $ret[] = $input[$i] . $input[$i + 1];
8350
8351 4
                        ++$i;
8352
                    }
8353
                } elseif (
8354 6
                    isset($input[$i + 2])
8355
                    &&
8356 6
                    ($input[$i] & "\xF0") === "\xE0"
8357
                ) {
8358
                    if (
8359 6
                        ($input[$i + 1] & "\xC0") === "\x80"
8360
                        &&
8361 6
                        ($input[$i + 2] & "\xC0") === "\x80"
8362
                    ) {
8363 6
                        $ret[] = $input[$i] . $input[$i + 1] . $input[$i + 2];
8364
8365 6
                        $i += 2;
8366
                    }
8367
                } elseif (
8368
                    isset($input[$i + 3])
8369
                    &&
8370
                    ($input[$i] & "\xF8") === "\xF0"
8371
                ) {
8372
                    if (
8373
                        ($input[$i + 1] & "\xC0") === "\x80"
8374
                        &&
8375
                        ($input[$i + 2] & "\xC0") === "\x80"
8376
                        &&
8377
                        ($input[$i + 3] & "\xC0") === "\x80"
8378
                    ) {
8379
                        $ret[] = $input[$i] . $input[$i + 1] . $input[$i + 2] . $input[$i + 3];
8380
8381
                        $i += 3;
8382
                    }
8383
                }
8384
            }
8385
        }
8386
8387 85
        if ($length > 1) {
8388 11
            $ret = \array_chunk($ret, $length);
8389
8390 11
            return \array_map(
8391
                static function (array &$item): string {
8392 11
                    return \implode('', $item);
8393 11
                },
8394 11
                $ret
8395
            );
8396
        }
8397
8398 78
        if (isset($ret[0]) && $ret[0] === '') {
8399
            return [];
8400
        }
8401
8402 78
        return $ret;
8403
    }
8404
8405
    /**
8406
     * Splits the string with the provided regular expression, returning an
8407
     * array of strings. An optional integer $limit will truncate the
8408
     * results.
8409
     *
8410
     * @param string $str
8411
     * @param string $pattern <p>The regex with which to split the string.</p>
8412
     * @param int    $limit   [optional] <p>Maximum number of results to return. Default: -1 === no limit</p>
8413
     *
8414
     * @psalm-pure
8415
     *
8416
     * @return string[]
8417
     *                  <p>An array of strings.</p>
8418
     */
8419 16
    public static function str_split_pattern(string $str, string $pattern, int $limit = -1): array
8420
    {
8421 16
        if ($limit === 0) {
8422 2
            return [];
8423
        }
8424
8425 14
        if ($pattern === '') {
8426 1
            return [$str];
8427
        }
8428
8429 13
        if (self::$SUPPORT['mbstring'] === true) {
8430 13
            if ($limit >= 0) {
8431
                /** @noinspection PhpComposerExtensionStubsInspection */
8432 8
                $result_tmp = \mb_split($pattern, $str);
8433
8434 8
                $result = [];
8435 8
                foreach ($result_tmp as $item_tmp) {
8436 8
                    if ($limit === 0) {
8437 4
                        break;
8438
                    }
8439 8
                    --$limit;
8440
8441 8
                    $result[] = $item_tmp;
8442
                }
8443
8444 8
                return $result;
8445
            }
8446
8447
            /** @noinspection PhpComposerExtensionStubsInspection */
8448 5
            return \mb_split($pattern, $str);
8449
        }
8450
8451
        if ($limit > 0) {
8452
            ++$limit;
8453
        } else {
8454
            $limit = -1;
8455
        }
8456
8457
        $array = \preg_split('/' . \preg_quote($pattern, '/') . '/u', $str, $limit);
8458
8459
        if ($array === false) {
8460
            return [];
8461
        }
8462
8463
        if ($limit > 0 && \count($array) === $limit) {
8464
            \array_pop($array);
8465
        }
8466
8467
        return $array;
8468
    }
8469
8470
    /**
8471
     * Check if the string starts with the given substring.
8472
     *
8473
     * EXAMPLE: <code>
8474
     * UTF8::str_starts_with('ΚόσμεMiddleEnd', 'Κόσμε'); // true
8475
     * UTF8::str_starts_with('ΚόσμεMiddleEnd', 'κόσμε'); // false
8476
     * </code>
8477
     *
8478
     * @param string $haystack <p>The string to search in.</p>
8479
     * @param string $needle   <p>The substring to search for.</p>
8480
     *
8481
     * @psalm-pure
8482
     *
8483
     * @return bool
8484
     */
8485 19
    public static function str_starts_with(string $haystack, string $needle): bool
8486
    {
8487 19
        if ($needle === '') {
8488 2
            return true;
8489
        }
8490
8491 19
        if ($haystack === '') {
8492
            return false;
8493
        }
8494
8495 19
        return \strpos($haystack, $needle) === 0;
8496
    }
8497
8498
    /**
8499
     * Returns true if the string begins with any of $substrings, false otherwise.
8500
     *
8501
     * - case-sensitive
8502
     *
8503
     * @param string $str        <p>The input string.</p>
8504
     * @param array  $substrings <p>Substrings to look for.</p>
8505
     *
8506
     * @psalm-pure
8507
     *
8508
     * @return bool whether or not $str starts with $substring
8509
     */
8510 8
    public static function str_starts_with_any(string $str, array $substrings): bool
8511
    {
8512 8
        if ($str === '') {
8513
            return false;
8514
        }
8515
8516 8
        if ($substrings === []) {
8517
            return false;
8518
        }
8519
8520 8
        foreach ($substrings as &$substring) {
8521 8
            if (self::str_starts_with($str, $substring)) {
8522 8
                return true;
8523
            }
8524
        }
8525
8526 6
        return false;
8527
    }
8528
8529
    /**
8530
     * Gets the substring after the first occurrence of a separator.
8531
     *
8532
     * @param string $str       <p>The input string.</p>
8533
     * @param string $separator <p>The string separator.</p>
8534
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
8535
     *
8536
     * @psalm-pure
8537
     *
8538
     * @return string
8539
     */
8540 1
    public static function str_substr_after_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
8541
    {
8542 1
        if ($separator === '' || $str === '') {
8543 1
            return '';
8544
        }
8545
8546 1
        if ($encoding === 'UTF-8') {
8547 1
            $offset = \mb_strpos($str, $separator);
8548 1
            if ($offset === false) {
8549 1
                return '';
8550
            }
8551
8552 1
            return (string) \mb_substr(
8553 1
                $str,
8554 1
                $offset + (int) \mb_strlen($separator)
8555
            );
8556
        }
8557
8558
        $offset = self::strpos($str, $separator, 0, $encoding);
8559
        if ($offset === false) {
8560
            return '';
8561
        }
8562
8563
        return (string) \mb_substr(
8564
            $str,
8565
            $offset + (int) self::strlen($separator, $encoding),
8566
            null,
8567
            $encoding
8568
        );
8569
    }
8570
8571
    /**
8572
     * Gets the substring after the last occurrence of a separator.
8573
     *
8574
     * @param string $str       <p>The input string.</p>
8575
     * @param string $separator <p>The string separator.</p>
8576
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
8577
     *
8578
     * @psalm-pure
8579
     *
8580
     * @return string
8581
     */
8582 1
    public static function str_substr_after_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
8583
    {
8584 1
        if ($separator === '' || $str === '') {
8585 1
            return '';
8586
        }
8587
8588 1
        if ($encoding === 'UTF-8') {
8589 1
            $offset = \mb_strrpos($str, $separator);
8590 1
            if ($offset === false) {
8591 1
                return '';
8592
            }
8593
8594 1
            return (string) \mb_substr(
8595 1
                $str,
8596 1
                $offset + (int) \mb_strlen($separator)
8597
            );
8598
        }
8599
8600
        $offset = self::strrpos($str, $separator, 0, $encoding);
8601
        if ($offset === false) {
8602
            return '';
8603
        }
8604
8605
        return (string) self::substr(
8606
            $str,
8607
            $offset + (int) self::strlen($separator, $encoding),
8608
            null,
8609
            $encoding
8610
        );
8611
    }
8612
8613
    /**
8614
     * Gets the substring before the first occurrence of a separator.
8615
     *
8616
     * @param string $str       <p>The input string.</p>
8617
     * @param string $separator <p>The string separator.</p>
8618
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
8619
     *
8620
     * @psalm-pure
8621
     *
8622
     * @return string
8623
     */
8624 1
    public static function str_substr_before_first_separator(
8625
        string $str,
8626
        string $separator,
8627
        string $encoding = 'UTF-8'
8628
    ): string {
8629 1
        if ($separator === '' || $str === '') {
8630 1
            return '';
8631
        }
8632
8633 1
        if ($encoding === 'UTF-8') {
8634 1
            $offset = \mb_strpos($str, $separator);
8635 1
            if ($offset === false) {
8636 1
                return '';
8637
            }
8638
8639 1
            return (string) \mb_substr(
8640 1
                $str,
8641 1
                0,
8642 1
                $offset
8643
            );
8644
        }
8645
8646
        $offset = self::strpos($str, $separator, 0, $encoding);
8647
        if ($offset === false) {
8648
            return '';
8649
        }
8650
8651
        return (string) self::substr(
8652
            $str,
8653
            0,
8654
            $offset,
8655
            $encoding
8656
        );
8657
    }
8658
8659
    /**
8660
     * Gets the substring before the last occurrence of a separator.
8661
     *
8662
     * @param string $str       <p>The input string.</p>
8663
     * @param string $separator <p>The string separator.</p>
8664
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
8665
     *
8666
     * @psalm-pure
8667
     *
8668
     * @return string
8669
     */
8670 1
    public static function str_substr_before_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
8671
    {
8672 1
        if ($separator === '' || $str === '') {
8673 1
            return '';
8674
        }
8675
8676 1
        if ($encoding === 'UTF-8') {
8677 1
            $offset = \mb_strrpos($str, $separator);
8678 1
            if ($offset === false) {
8679 1
                return '';
8680
            }
8681
8682 1
            return (string) \mb_substr(
8683 1
                $str,
8684 1
                0,
8685 1
                $offset
8686
            );
8687
        }
8688
8689
        $offset = self::strrpos($str, $separator, 0, $encoding);
8690
        if ($offset === false) {
8691
            return '';
8692
        }
8693
8694
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
8695
8696
        return (string) self::substr(
8697
            $str,
8698
            0,
8699
            $offset,
8700
            $encoding
8701
        );
8702
    }
8703
8704
    /**
8705
     * Gets the substring after (or before via "$before_needle") the first occurrence of the "$needle".
8706
     *
8707
     * @param string $str           <p>The input string.</p>
8708
     * @param string $needle        <p>The string to look for.</p>
8709
     * @param bool   $before_needle [optional] <p>Default: false</p>
8710
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
8711
     *
8712
     * @psalm-pure
8713
     *
8714
     * @return string
8715
     */
8716 2
    public static function str_substr_first(
8717
        string $str,
8718
        string $needle,
8719
        bool $before_needle = false,
8720
        string $encoding = 'UTF-8'
8721
    ): string {
8722 2
        if ($str === '' || $needle === '') {
8723 2
            return '';
8724
        }
8725
8726 2
        if ($encoding === 'UTF-8') {
8727 2
            if ($before_needle) {
8728 1
                $part = \mb_strstr(
8729 1
                    $str,
8730 1
                    $needle,
8731 1
                    $before_needle
8732
                );
8733
            } else {
8734 1
                $part = \mb_strstr(
8735 1
                    $str,
8736 2
                    $needle
8737
                );
8738
            }
8739
        } else {
8740
            $part = self::strstr(
8741
                $str,
8742
                $needle,
8743
                $before_needle,
8744
                $encoding
8745
            );
8746
        }
8747
8748 2
        return $part === false ? '' : $part;
8749
    }
8750
8751
    /**
8752
     * Gets the substring after (or before via "$before_needle") the last occurrence of the "$needle".
8753
     *
8754
     * @param string $str           <p>The input string.</p>
8755
     * @param string $needle        <p>The string to look for.</p>
8756
     * @param bool   $before_needle [optional] <p>Default: false</p>
8757
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
8758
     *
8759
     * @psalm-pure
8760
     *
8761
     * @return string
8762
     */
8763 2
    public static function str_substr_last(
8764
        string $str,
8765
        string $needle,
8766
        bool $before_needle = false,
8767
        string $encoding = 'UTF-8'
8768
    ): string {
8769 2
        if ($str === '' || $needle === '') {
8770 2
            return '';
8771
        }
8772
8773 2
        if ($encoding === 'UTF-8') {
8774 2
            if ($before_needle) {
8775 1
                $part = \mb_strrchr(
8776 1
                    $str,
8777 1
                    $needle,
8778 1
                    $before_needle
8779
                );
8780
            } else {
8781 1
                $part = \mb_strrchr(
8782 1
                    $str,
8783 2
                    $needle
8784
                );
8785
            }
8786
        } else {
8787
            $part = self::strrchr(
8788
                $str,
8789
                $needle,
8790
                $before_needle,
8791
                $encoding
8792
            );
8793
        }
8794
8795 2
        return $part === false ? '' : $part;
8796
    }
8797
8798
    /**
8799
     * Surrounds $str with the given substring.
8800
     *
8801
     * @param string $str
8802
     * @param string $substring <p>The substring to add to both sides.</p>
8803
     *
8804
     * @psalm-pure
8805
     *
8806
     * @return string
8807
     *                <p>A string with the substring both prepended and appended.</p>
8808
     */
8809 5
    public static function str_surround(string $str, string $substring): string
8810
    {
8811 5
        return $substring . $str . $substring;
8812
    }
8813
8814
    /**
8815
     * Returns a trimmed string with the first letter of each word capitalized.
8816
     * Also accepts an array, $ignore, allowing you to list words not to be
8817
     * capitalized.
8818
     *
8819
     * @param string              $str
8820
     * @param array|string[]|null $ignore                        [optional] <p>An array of words not to capitalize or
8821
     *                                                           null. Default: null</p>
8822
     * @param string              $encoding                      [optional] <p>Default: 'UTF-8'</p>
8823
     * @param bool                $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the
8824
     *                                                           string.</p>
8825
     * @param string|null         $lang                          [optional] <p>Set the language for special cases: az,
8826
     *                                                           el, lt, tr</p>
8827
     * @param bool                $try_to_keep_the_string_length [optional] <p>true === try to keep the string length:
8828
     *                                                           e.g. ẞ -> ß</p>
8829
     * @param bool                $use_trim_first                [optional] <p>true === trim the input string,
8830
     *                                                           first</p>
8831
     * @param string|null         $word_define_chars             [optional] <p>An string of chars that will be used as
8832
     *                                                           whitespace separator === words.</p>
8833
     *
8834
     * @psalm-pure
8835
     *
8836
     * @return string
8837
     *                <p>The titleized string.</p>
8838
     *
8839
     * @noinspection PhpTooManyParametersInspection
8840
     */
8841 10
    public static function str_titleize(
8842
        string $str,
8843
        array $ignore = null,
8844
        string $encoding = 'UTF-8',
8845
        bool $clean_utf8 = false,
8846
        string $lang = null,
8847
        bool $try_to_keep_the_string_length = false,
8848
        bool $use_trim_first = true,
8849
        string $word_define_chars = null
8850
    ): string {
8851 10
        if ($str === '') {
8852
            return '';
8853
        }
8854
8855 10
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8856 9
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8857
        }
8858
8859 10
        if ($use_trim_first) {
8860 10
            $str = \trim($str);
8861
        }
8862
8863 10
        if ($clean_utf8) {
8864
            $str = self::clean($str);
8865
        }
8866
8867 10
        $use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
8868
8869 10
        if ($word_define_chars) {
8870 4
            $word_define_chars = \preg_quote($word_define_chars, '/');
8871
        } else {
8872 6
            $word_define_chars = '';
8873
        }
8874
8875 10
        $str = (string) \preg_replace_callback(
8876 10
            '/([^\\s' . $word_define_chars . ']+)/u',
8877
            static function (array $match) use ($try_to_keep_the_string_length, $lang, $ignore, $use_mb_functions, $encoding): string {
8878 10
                if ($ignore !== null && \in_array($match[0], $ignore, true)) {
8879 4
                    return $match[0];
8880
                }
8881
8882 10
                if ($use_mb_functions) {
8883 10
                    if ($encoding === 'UTF-8') {
8884 10
                        return \mb_strtoupper(\mb_substr($match[0], 0, 1))
8885 10
                               . \mb_strtolower(\mb_substr($match[0], 1));
8886
                    }
8887
8888
                    return \mb_strtoupper(\mb_substr($match[0], 0, 1, $encoding), $encoding)
8889
                           . \mb_strtolower(\mb_substr($match[0], 1, null, $encoding), $encoding);
8890
                }
8891
8892
                return self::ucfirst(
8893
                    self::strtolower(
8894
                        $match[0],
8895
                        $encoding,
8896
                        false,
8897
                        $lang,
8898
                        $try_to_keep_the_string_length
8899
                    ),
8900
                    $encoding,
8901
                    false,
8902
                    $lang,
8903
                    $try_to_keep_the_string_length
8904
                );
8905 10
            },
8906 10
            $str
8907
        );
8908
8909 10
        return $str;
8910
    }
8911
8912
    /**
8913
     * Returns a trimmed string in proper title case.
8914
     *
8915
     * Also accepts an array, $ignore, allowing you to list words not to be
8916
     * capitalized.
8917
     *
8918
     * Adapted from John Gruber's script.
8919
     *
8920
     * @see https://gist.github.com/gruber/9f9e8650d68b13ce4d78
8921
     *
8922
     * @param string $str
8923
     * @param array  $ignore   <p>An array of words not to capitalize.</p>
8924
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8925
     *
8926
     * @psalm-pure
8927
     *
8928
     * @return string
8929
     *                <p>The titleized string.</p>
8930
     */
8931 35
    public static function str_titleize_for_humans(
8932
        string $str,
8933
        array $ignore = [],
8934
        string $encoding = 'UTF-8'
8935
    ): string {
8936 35
        if ($str === '') {
8937
            return '';
8938
        }
8939
8940
        $small_words = [
8941 35
            '(?<!q&)a',
8942
            'an',
8943
            'and',
8944
            'as',
8945
            'at(?!&t)',
8946
            'but',
8947
            'by',
8948
            'en',
8949
            'for',
8950
            'if',
8951
            'in',
8952
            'of',
8953
            'on',
8954
            'or',
8955
            'the',
8956
            'to',
8957
            'v[.]?',
8958
            'via',
8959
            'vs[.]?',
8960
        ];
8961
8962 35
        if ($ignore !== []) {
8963 1
            $small_words = \array_merge($small_words, $ignore);
8964
        }
8965
8966 35
        $small_words_rx = \implode('|', $small_words);
8967 35
        $apostrophe_rx = '(?x: [\'’] [[:lower:]]* )?';
8968
8969 35
        $str = \trim($str);
8970
8971 35
        if (!self::has_lowercase($str)) {
8972 2
            $str = self::strtolower($str, $encoding);
8973
        }
8974
8975
        // the main substitutions
8976
        /** @noinspection RegExpDuplicateAlternationBranch - false-positive - https://youtrack.jetbrains.com/issue/WI-51002 */
8977 35
        $str = (string) \preg_replace_callback(
8978
            '~\\b (_*) (?:                                                           # 1. Leading underscore and
8979
                        ( (?<=[ ][/\\\\]) [[:alpha:]]+ [-_[:alpha:]/\\\\]+ |                # 2. file path or 
8980 35
                          [-_[:alpha:]]+ [@.:] [-_[:alpha:]@.:/]+ ' . $apostrophe_rx . ' )  #    URL, domain, or email
8981
                        |
8982 35
                        ( (?i: ' . $small_words_rx . ' ) ' . $apostrophe_rx . ' )           # 3. or small word (case-insensitive)
8983
                        |
8984 35
                        ( [[:alpha:]] [[:lower:]\'’()\[\]{}]* ' . $apostrophe_rx . ' )     # 4. or word w/o internal caps
8985
                        |
8986 35
                        ( [[:alpha:]] [[:alpha:]\'’()\[\]{}]* ' . $apostrophe_rx . ' )     # 5. or some other word
8987
                      ) (_*) \\b                                                          # 6. With trailing underscore
8988
                    ~ux',
8989
            /**
8990
             * @param string[] $matches
8991
             *
8992
             * @psalm-pure
8993
             *
8994
             * @return string
8995
             */
8996
            static function (array $matches) use ($encoding): string {
8997
                // preserve leading underscore
8998 35
                $str = $matches[1];
8999 35
                if ($matches[2]) {
9000
                    // preserve URLs, domains, emails and file paths
9001 5
                    $str .= $matches[2];
9002 35
                } elseif ($matches[3]) {
9003
                    // lower-case small words
9004 25
                    $str .= self::strtolower($matches[3], $encoding);
9005 35
                } elseif ($matches[4]) {
9006
                    // capitalize word w/o internal caps
9007 34
                    $str .= static::ucfirst($matches[4], $encoding);
9008
                } else {
9009
                    // preserve other kinds of word (iPhone)
9010 7
                    $str .= $matches[5];
9011
                }
9012
                // preserve trailing underscore
9013 35
                $str .= $matches[6];
9014
9015 35
                return $str;
9016 35
            },
9017 35
            $str
9018
        );
9019
9020
        // Exceptions for small words: capitalize at start of title...
9021 35
        $str = (string) \preg_replace_callback(
9022
            '~(  \\A [[:punct:]]*            # start of title...
9023
                      |  [:.;?!][ ]+                # or of subsentence...
9024
                      |  [ ][\'"“‘(\[][ ]* )        # or of inserted subphrase...
9025 35
                      ( ' . $small_words_rx . ' ) \\b # ...followed by small word
9026
                     ~uxi',
9027
            /**
9028
             * @param string[] $matches
9029
             *
9030
             * @psalm-pure
9031
             *
9032
             * @return string
9033
             */
9034
            static function (array $matches) use ($encoding): string {
9035 11
                return $matches[1] . static::ucfirst($matches[2], $encoding);
9036 35
            },
9037 35
            $str
9038
        );
9039
9040
        // ...and end of title
9041 35
        $str = (string) \preg_replace_callback(
9042 35
            '~\\b ( ' . $small_words_rx . ' ) # small word...
9043
                      (?= [[:punct:]]* \Z          # ...at the end of the title...
9044
                      |   [\'"’”)\]] [ ] )         # ...or of an inserted subphrase?
9045
                     ~uxi',
9046
            /**
9047
             * @param string[] $matches
9048
             *
9049
             * @psalm-pure
9050
             *
9051
             * @return string
9052
             */
9053
            static function (array $matches) use ($encoding): string {
9054 3
                return static::ucfirst($matches[1], $encoding);
9055 35
            },
9056 35
            $str
9057
        );
9058
9059
        // Exceptions for small words in hyphenated compound words.
9060
        // e.g. "in-flight" -> In-Flight
9061 35
        $str = (string) \preg_replace_callback(
9062
            '~\\b
9063
                        (?<! -)                   # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (in-flight)
9064 35
                        ( ' . $small_words_rx . ' )
9065
                        (?= -[[:alpha:]]+)        # lookahead for "-someword"
9066
                       ~uxi',
9067
            /**
9068
             * @param string[] $matches
9069
             *
9070
             * @psalm-pure
9071
             *
9072
             * @return string
9073
             */
9074
            static function (array $matches) use ($encoding): string {
9075
                return static::ucfirst($matches[1], $encoding);
9076 35
            },
9077 35
            $str
9078
        );
9079
9080
        // e.g. "Stand-in" -> "Stand-In" (Stand is already capped at this point)
9081 35
        $str = (string) \preg_replace_callback(
9082
            '~\\b
9083
                      (?<!…)                    # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (stand-in)
9084
                      ( [[:alpha:]]+- )         # $1 = first word and hyphen, should already be properly capped
9085 35
                      ( ' . $small_words_rx . ' ) # ...followed by small word
9086
                      (?!	- )                 # Negative lookahead for another -
9087
                     ~uxi',
9088
            /**
9089
             * @param string[] $matches
9090
             *
9091
             * @psalm-pure
9092
             *
9093
             * @return string
9094
             */
9095
            static function (array $matches) use ($encoding): string {
9096
                return $matches[1] . static::ucfirst($matches[2], $encoding);
9097 35
            },
9098 35
            $str
9099
        );
9100
9101 35
        return $str;
9102
    }
9103
9104
    /**
9105
     * Get a binary representation of a specific string.
9106
     *
9107
     * EXAPLE: <code>UTF8::str_to_binary('😃'); // '11110000100111111001100010000011'</code>
9108
     *
9109
     * @param string $str <p>The input string.</p>
9110
     *
9111
     * @psalm-pure
9112
     *
9113
     * @return false|string
9114
     *                      <p>false on error</p>
9115
     */
9116 2
    public static function str_to_binary(string $str)
9117
    {
9118
        /** @var array|false $value - needed for PhpStan (stubs error) */
9119 2
        $value = \unpack('H*', $str);
9120 2
        if ($value === false) {
9121
            return false;
9122
        }
9123
9124
        /** @noinspection OffsetOperationsInspection */
9125 2
        return \base_convert($value[1], 16, 2);
9126
    }
9127
9128
    /**
9129
     * @param string   $str
9130
     * @param bool     $remove_empty_values <p>Remove empty values.</p>
9131
     * @param int|null $remove_short_values <p>The min. string length or null to disable</p>
9132
     *
9133
     * @psalm-pure
9134
     *
9135
     * @return string[]
9136
     */
9137 17
    public static function str_to_lines(string $str, bool $remove_empty_values = false, int $remove_short_values = null): array
9138
    {
9139 17
        if ($str === '') {
9140 1
            return $remove_empty_values ? [] : [''];
9141
        }
9142
9143 16
        if (self::$SUPPORT['mbstring'] === true) {
9144
            /** @noinspection PhpComposerExtensionStubsInspection */
9145 16
            $return = \mb_split("[\r\n]{1,2}", $str);
9146
        } else {
9147
            $return = \preg_split("/[\r\n]{1,2}/u", $str);
9148
        }
9149
9150 16
        if ($return === false) {
9151
            return $remove_empty_values ? [] : [''];
9152
        }
9153
9154
        if (
9155 16
            $remove_short_values === null
9156
            &&
9157 16
            !$remove_empty_values
9158
        ) {
9159 16
            return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
9160
        }
9161
9162
        return self::reduce_string_array(
9163
            $return,
9164
            $remove_empty_values,
9165
            $remove_short_values
9166
        );
9167
    }
9168
9169
    /**
9170
     * Convert a string into an array of words.
9171
     *
9172
     * EXAMPLE: <code>UTF8::str_to_words('中文空白 oöäü#s', '#') // array('', '中文空白', ' ', 'oöäü#s', '')</code>
9173
     *
9174
     * @param string   $str
9175
     * @param string   $char_list           <p>Additional chars for the definition of "words".</p>
9176
     * @param bool     $remove_empty_values <p>Remove empty values.</p>
9177
     * @param int|null $remove_short_values <p>The min. string length or null to disable</p>
9178
     *
9179
     * @psalm-pure
9180
     *
9181
     * @return string[]
9182
     */
9183 13
    public static function str_to_words(
9184
        string $str,
9185
        string $char_list = '',
9186
        bool $remove_empty_values = false,
9187
        int $remove_short_values = null
9188
    ): array {
9189 13
        if ($str === '') {
9190 4
            return $remove_empty_values ? [] : [''];
9191
        }
9192
9193 13
        $char_list = self::rxClass($char_list, '\pL');
9194
9195 13
        $return = \preg_split("/({$char_list}+(?:[\p{Pd}’']{$char_list}+)*)/u", $str, -1, \PREG_SPLIT_DELIM_CAPTURE);
9196 13
        if ($return === false) {
9197
            return $remove_empty_values ? [] : [''];
9198
        }
9199
9200
        if (
9201 13
            $remove_short_values === null
9202
            &&
9203 13
            !$remove_empty_values
9204
        ) {
9205 13
            return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
9206
        }
9207
9208 2
        $tmp_return = self::reduce_string_array(
9209 2
            $return,
9210 2
            $remove_empty_values,
9211 2
            $remove_short_values
9212
        );
9213
9214 2
        foreach ($tmp_return as &$item) {
9215 2
            $item = (string) $item;
9216
        }
9217
9218 2
        return $tmp_return;
9219
    }
9220
9221
    /**
9222
     * alias for "UTF8::to_ascii()"
9223
     *
9224
     * @param string $str
9225
     * @param string $unknown
9226
     * @param bool   $strict
9227
     *
9228
     * @psalm-pure
9229
     *
9230
     * @return string
9231
     *
9232
     * @see        UTF8::to_ascii()
9233
     * @deprecated <p>please use "UTF8::to_ascii()"</p>
9234
     */
9235 7
    public static function str_transliterate(
9236
        string $str,
9237
        string $unknown = '?',
9238
        bool $strict = false
9239
    ): string {
9240 7
        return self::to_ascii($str, $unknown, $strict);
9241
    }
9242
9243
    /**
9244
     * Truncates the string to a given length. If $substring is provided, and
9245
     * truncating occurs, the string is further truncated so that the substring
9246
     * may be appended without exceeding the desired length.
9247
     *
9248
     * @param string $str
9249
     * @param int    $length    <p>Desired length of the truncated string.</p>
9250
     * @param string $substring [optional] <p>The substring to append if it can fit. Default: ''</p>
9251
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
9252
     *
9253
     * @psalm-pure
9254
     *
9255
     * @return string
9256
     *                <p>A string after truncating.</p>
9257
     */
9258 22
    public static function str_truncate(
9259
        string $str,
9260
        int $length,
9261
        string $substring = '',
9262
        string $encoding = 'UTF-8'
9263
    ): string {
9264 22
        if ($str === '') {
9265
            return '';
9266
        }
9267
9268 22
        if ($encoding === 'UTF-8') {
9269 10
            if ($length >= (int) \mb_strlen($str)) {
9270 2
                return $str;
9271
            }
9272
9273 8
            if ($substring !== '') {
9274 4
                $length -= (int) \mb_strlen($substring);
9275
9276
                /** @noinspection UnnecessaryCastingInspection */
9277 4
                return (string) \mb_substr($str, 0, $length) . $substring;
9278
            }
9279
9280
            /** @noinspection UnnecessaryCastingInspection */
9281 4
            return (string) \mb_substr($str, 0, $length);
9282
        }
9283
9284 12
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
9285
9286 12
        if ($length >= (int) self::strlen($str, $encoding)) {
9287 2
            return $str;
9288
        }
9289
9290 10
        if ($substring !== '') {
9291 6
            $length -= (int) self::strlen($substring, $encoding);
9292
        }
9293
9294
        return (
9295 10
               (string) self::substr(
9296 10
                   $str,
9297 10
                   0,
9298 10
                   $length,
9299 10
                   $encoding
9300
               )
9301 10
               ) . $substring;
9302
    }
9303
9304
    /**
9305
     * Truncates the string to a given length, while ensuring that it does not
9306
     * split words. If $substring is provided, and truncating occurs, the
9307
     * string is further truncated so that the substring may be appended without
9308
     * exceeding the desired length.
9309
     *
9310
     * @param string $str
9311
     * @param int    $length                                 <p>Desired length of the truncated string.</p>
9312
     * @param string $substring                              [optional] <p>The substring to append if it can fit.
9313
     *                                                       Default:
9314
     *                                                       ''</p>
9315
     * @param string $encoding                               [optional] <p>Default: 'UTF-8'</p>
9316
     * @param bool   $ignore_do_not_split_words_for_one_word [optional] <p>Default: false</p>
9317
     *
9318
     * @psalm-pure
9319
     *
9320
     * @return string
9321
     *                <p>A string after truncating.</p>
9322
     */
9323 47
    public static function str_truncate_safe(
9324
        string $str,
9325
        int $length,
9326
        string $substring = '',
9327
        string $encoding = 'UTF-8',
9328
        bool $ignore_do_not_split_words_for_one_word = false
9329
    ): string {
9330 47
        if ($str === '' || $length <= 0) {
9331 1
            return $substring;
9332
        }
9333
9334 47
        if ($encoding === 'UTF-8') {
9335 21
            if ($length >= (int) \mb_strlen($str)) {
9336 5
                return $str;
9337
            }
9338
9339
            // need to further trim the string so we can append the substring
9340 17
            $length -= (int) \mb_strlen($substring);
9341 17
            if ($length <= 0) {
9342 1
                return $substring;
9343
            }
9344
9345
            /** @var false|string $truncated - needed for PhpStan (stubs error) */
9346 17
            $truncated = \mb_substr($str, 0, $length);
9347 17
            if ($truncated === false) {
9348
                return '';
9349
            }
9350
9351
            // if the last word was truncated
9352 17
            $space_position = \mb_strpos($str, ' ', $length - 1);
9353 17
            if ($space_position !== $length) {
9354
                // find pos of the last occurrence of a space, get up to that
9355 13
                $last_position = \mb_strrpos($truncated, ' ', 0);
9356
9357
                if (
9358 13
                    $last_position !== false
9359
                    ||
9360
                    (
9361 3
                        $space_position !== false
9362
                        &&
9363 13
                         !$ignore_do_not_split_words_for_one_word
9364
                    )
9365
                ) {
9366 17
                    $truncated = (string) \mb_substr($truncated, 0, (int) $last_position);
9367
                }
9368
            }
9369
        } else {
9370 26
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9371
9372 26
            if ($length >= (int) self::strlen($str, $encoding)) {
9373 4
                return $str;
9374
            }
9375
9376
            // need to further trim the string so we can append the substring
9377 22
            $length -= (int) self::strlen($substring, $encoding);
9378 22
            if ($length <= 0) {
9379
                return $substring;
9380
            }
9381
9382 22
            $truncated = self::substr($str, 0, $length, $encoding);
9383
9384 22
            if ($truncated === false) {
9385
                return '';
9386
            }
9387
9388
            // if the last word was truncated
9389 22
            $space_position = self::strpos($str, ' ', $length - 1, $encoding);
9390 22
            if ($space_position !== $length) {
9391
                // find pos of the last occurrence of a space, get up to that
9392 12
                $last_position = self::strrpos($truncated, ' ', 0, $encoding);
9393
9394
                if (
9395 12
                    $last_position !== false
9396
                    ||
9397
                    (
9398 4
                        $space_position !== false
9399
                        &&
9400 12
                        !$ignore_do_not_split_words_for_one_word
9401
                    )
9402
                ) {
9403 9
                    $truncated = (string) self::substr($truncated, 0, (int) $last_position, $encoding);
9404
                }
9405
            }
9406
        }
9407
9408 39
        return $truncated . $substring;
9409
    }
9410
9411
    /**
9412
     * Returns a lowercase and trimmed string separated by underscores.
9413
     * Underscores are inserted before uppercase characters (with the exception
9414
     * of the first character of the string), and in place of spaces as well as
9415
     * dashes.
9416
     *
9417
     * @param string $str
9418
     *
9419
     * @psalm-pure
9420
     *
9421
     * @return string
9422
     *                <p>The underscored string.</p>
9423
     */
9424 16
    public static function str_underscored(string $str): string
9425
    {
9426 16
        return self::str_delimit($str, '_');
9427
    }
9428
9429
    /**
9430
     * Returns an UpperCamelCase version of the supplied string. It trims
9431
     * surrounding spaces, capitalizes letters following digits, spaces, dashes
9432
     * and underscores, and removes spaces, dashes, underscores.
9433
     *
9434
     * @param string      $str                           <p>The input string.</p>
9435
     * @param string      $encoding                      [optional] <p>Default: 'UTF-8'</p>
9436
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
9437
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
9438
     *                                                   tr</p>
9439
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
9440
     *                                                   -> ß</p>
9441
     *
9442
     * @psalm-pure
9443
     *
9444
     * @return string
9445
     *                <p>A string in UpperCamelCase.</p>
9446
     */
9447 13
    public static function str_upper_camelize(
9448
        string $str,
9449
        string $encoding = 'UTF-8',
9450
        bool $clean_utf8 = false,
9451
        string $lang = null,
9452
        bool $try_to_keep_the_string_length = false
9453
    ): string {
9454 13
        return self::ucfirst(self::str_camelize($str, $encoding), $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
9455
    }
9456
9457
    /**
9458
     * alias for "UTF8::ucfirst()"
9459
     *
9460
     * @param string      $str
9461
     * @param string      $encoding
9462
     * @param bool        $clean_utf8
9463
     * @param string|null $lang
9464
     * @param bool        $try_to_keep_the_string_length
9465
     *
9466
     * @psalm-pure
9467
     *
9468
     * @return string
9469
     *
9470
     * @see        UTF8::ucfirst()
9471
     * @deprecated <p>please use "UTF8::ucfirst()"</p>
9472
     */
9473 5
    public static function str_upper_first(
9474
        string $str,
9475
        string $encoding = 'UTF-8',
9476
        bool $clean_utf8 = false,
9477
        string $lang = null,
9478
        bool $try_to_keep_the_string_length = false
9479
    ): string {
9480 5
        return self::ucfirst(
9481 5
            $str,
9482 5
            $encoding,
9483 5
            $clean_utf8,
9484 5
            $lang,
9485 5
            $try_to_keep_the_string_length
9486
        );
9487
    }
9488
9489
    /**
9490
     * Get the number of words in a specific string.
9491
     *
9492
     * EXAMPLES: <code>
9493
     * // format: 0 -> return only word count (int)
9494
     * //
9495
     * UTF8::str_word_count('中文空白 öäü abc#c'); // 4
9496
     * UTF8::str_word_count('中文空白 öäü abc#c', 0, '#'); // 3
9497
     *
9498
     * // format: 1 -> return words (array)
9499
     * //
9500
     * UTF8::str_word_count('中文空白 öäü abc#c', 1); // array('中文空白', 'öäü', 'abc', 'c')
9501
     * UTF8::str_word_count('中文空白 öäü abc#c', 1, '#'); // array('中文空白', 'öäü', 'abc#c')
9502
     *
9503
     * // format: 2 -> return words with offset (array)
9504
     * //
9505
     * UTF8::str_word_count('中文空白 öäü ab#c', 2); // array(0 => '中文空白', 5 => 'öäü', 9 => 'abc', 13 => 'c')
9506
     * UTF8::str_word_count('中文空白 öäü ab#c', 2, '#'); // array(0 => '中文空白', 5 => 'öäü', 9 => 'abc#c')
9507
     * </code>
9508
     *
9509
     * @param string $str       <p>The input string.</p>
9510
     * @param int    $format    [optional] <p>
9511
     *                          <strong>0</strong> => return a number of words (default)<br>
9512
     *                          <strong>1</strong> => return an array of words<br>
9513
     *                          <strong>2</strong> => return an array of words with word-offset as key
9514
     *                          </p>
9515
     * @param string $char_list [optional] <p>Additional chars that contains to words and do not start a new word.</p>
9516
     *
9517
     * @psalm-pure
9518
     *
9519
     * @return int|string[]
9520
     *                      <p>The number of words in the string.</p>
9521
     */
9522 2
    public static function str_word_count(string $str, int $format = 0, string $char_list = '')
9523
    {
9524 2
        $str_parts = self::str_to_words($str, $char_list);
9525
9526 2
        $len = \count($str_parts);
9527
9528 2
        if ($format === 1) {
9529 2
            $number_of_words = [];
9530 2
            for ($i = 1; $i < $len; $i += 2) {
9531 2
                $number_of_words[] = $str_parts[$i];
9532
            }
9533 2
        } elseif ($format === 2) {
9534 2
            $number_of_words = [];
9535 2
            $offset = (int) self::strlen($str_parts[0]);
9536 2
            for ($i = 1; $i < $len; $i += 2) {
9537 2
                $number_of_words[$offset] = $str_parts[$i];
9538 2
                $offset += (int) self::strlen($str_parts[$i]) + (int) self::strlen($str_parts[$i + 1]);
9539
            }
9540
        } else {
9541 2
            $number_of_words = (int) (($len - 1) / 2);
9542
        }
9543
9544 2
        return $number_of_words;
9545
    }
9546
9547
    /**
9548
     * Case-insensitive string comparison.
9549
     *
9550
     * INFO: Case-insensitive version of UTF8::strcmp()
9551
     *
9552
     * EXAMPLE: <code>UTF8::strcasecmp("iñtërnâtiôn\nàlizætiøn", "Iñtërnâtiôn\nàlizætiøn"); // 0</code>
9553
     *
9554
     * @param string $str1     <p>The first string.</p>
9555
     * @param string $str2     <p>The second string.</p>
9556
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
9557
     *
9558
     * @psalm-pure
9559
     *
9560
     * @return int
9561
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
9562
     *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
9563
     *             <strong>0</strong> if they are equal
9564
     */
9565 23
    public static function strcasecmp(
9566
        string $str1,
9567
        string $str2,
9568
        string $encoding = 'UTF-8'
9569
    ): int {
9570 23
        return self::strcmp(
9571 23
            self::strtocasefold(
9572 23
                $str1,
9573 23
                true,
9574 23
                false,
9575 23
                $encoding,
9576 23
                null,
9577 23
                false
9578
            ),
9579 23
            self::strtocasefold(
9580 23
                $str2,
9581 23
                true,
9582 23
                false,
9583 23
                $encoding,
9584 23
                null,
9585 23
                false
9586
            )
9587
        );
9588
    }
9589
9590
    /**
9591
     * alias for "UTF8::strstr()"
9592
     *
9593
     * @param string $haystack
9594
     * @param string $needle
9595
     * @param bool   $before_needle
9596
     * @param string $encoding
9597
     * @param bool   $clean_utf8
9598
     *
9599
     * @psalm-pure
9600
     *
9601
     * @return false|string
9602
     *
9603
     * @see        UTF8::strstr()
9604
     * @deprecated <p>please use "UTF8::strstr()"</p>
9605
     */
9606 2
    public static function strchr(
9607
        string $haystack,
9608
        string $needle,
9609
        bool $before_needle = false,
9610
        string $encoding = 'UTF-8',
9611
        bool $clean_utf8 = false
9612
    ) {
9613 2
        return self::strstr(
9614 2
            $haystack,
9615 2
            $needle,
9616 2
            $before_needle,
9617 2
            $encoding,
9618 2
            $clean_utf8
9619
        );
9620
    }
9621
9622
    /**
9623
     * Case-sensitive string comparison.
9624
     *
9625
     * EXAMPLE: <code>UTF8::strcmp("iñtërnâtiôn\nàlizætiøn", "iñtërnâtiôn\nàlizætiøn"); // 0</code>
9626
     *
9627
     * @param string $str1 <p>The first string.</p>
9628
     * @param string $str2 <p>The second string.</p>
9629
     *
9630
     * @psalm-pure
9631
     *
9632
     * @return int
9633
     *             <strong>&lt; 0</strong> if str1 is less than str2<br>
9634
     *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
9635
     *             <strong>0</strong> if they are equal
9636
     */
9637 29
    public static function strcmp(string $str1, string $str2): int
9638
    {
9639 29
        if ($str1 === $str2) {
9640 21
            return 0;
9641
        }
9642
9643 24
        return \strcmp(
9644 24
            \Normalizer::normalize($str1, \Normalizer::NFD),
9645 24
            \Normalizer::normalize($str2, \Normalizer::NFD)
9646
        );
9647
    }
9648
9649
    /**
9650
     * Find length of initial segment not matching mask.
9651
     *
9652
     * @param string $str
9653
     * @param string $char_list
9654
     * @param int    $offset
9655
     * @param int    $length
9656
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
9657
     *
9658
     * @psalm-pure
9659
     *
9660
     * @return int
9661
     */
9662 12
    public static function strcspn(
9663
        string $str,
9664
        string $char_list,
9665
        int $offset = null,
9666
        int $length = null,
9667
        string $encoding = 'UTF-8'
9668
    ): int {
9669 12
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9670
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9671
        }
9672
9673 12
        if ($char_list === '') {
9674 2
            return (int) self::strlen($str, $encoding);
9675
        }
9676
9677 11
        if ($offset !== null || $length !== null) {
9678 3
            if ($encoding === 'UTF-8') {
9679 3
                if ($length === null) {
9680
                    /** @noinspection UnnecessaryCastingInspection */
9681 2
                    $str_tmp = \mb_substr($str, (int) $offset);
9682
                } else {
9683
                    /** @noinspection UnnecessaryCastingInspection */
9684 3
                    $str_tmp = \mb_substr($str, (int) $offset, $length);
9685
                }
9686
            } else {
9687
                /** @noinspection UnnecessaryCastingInspection */
9688
                $str_tmp = self::substr($str, (int) $offset, $length, $encoding);
9689
            }
9690
9691 3
            if ($str_tmp === false) {
9692
                return 0;
9693
            }
9694
9695
            /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
9696 3
            $str = $str_tmp;
9697
        }
9698
9699 11
        if ($str === '') {
9700 2
            return 0;
9701
        }
9702
9703 10
        $matches = [];
9704 10
        if (\preg_match('/^(.*?)' . self::rxClass($char_list) . '/us', $str, $matches)) {
9705 9
            $return = self::strlen($matches[1], $encoding);
9706 9
            if ($return === false) {
9707
                return 0;
9708
            }
9709
9710 9
            return $return;
9711
        }
9712
9713 2
        return (int) self::strlen($str, $encoding);
9714
    }
9715
9716
    /**
9717
     * alias for "UTF8::stristr()"
9718
     *
9719
     * @param string $haystack
9720
     * @param string $needle
9721
     * @param bool   $before_needle
9722
     * @param string $encoding
9723
     * @param bool   $clean_utf8
9724
     *
9725
     * @psalm-pure
9726
     *
9727
     * @return false|string
9728
     *
9729
     * @see        UTF8::stristr()
9730
     * @deprecated <p>please use "UTF8::stristr()"</p>
9731
     */
9732 1
    public static function strichr(
9733
        string $haystack,
9734
        string $needle,
9735
        bool $before_needle = false,
9736
        string $encoding = 'UTF-8',
9737
        bool $clean_utf8 = false
9738
    ) {
9739 1
        return self::stristr(
9740 1
            $haystack,
9741 1
            $needle,
9742 1
            $before_needle,
9743 1
            $encoding,
9744 1
            $clean_utf8
9745
        );
9746
    }
9747
9748
    /**
9749
     * Create a UTF-8 string from code points.
9750
     *
9751
     * INFO: opposite to UTF8::codepoints()
9752
     *
9753
     * EXAMPLE: <code>UTF8::string(array(246, 228, 252)); // 'öäü'</code>
9754
     *
9755
     * @param int|int[]|string|string[] $intOrHex <p>Integer or Hexadecimal codepoints.</p>
9756
     *
9757
     * @psalm-param int[]|numeric-string[]|int|numeric-string $intOrHex
9758
     *
9759
     * @psalm-pure
9760
     *
9761
     * @return string
9762
     *                <p>A UTF-8 encoded string.</p>
9763
     */
9764 4
    public static function string($intOrHex): string
9765
    {
9766 4
        if ($intOrHex === []) {
9767 4
            return '';
9768
        }
9769
9770 4
        if (!\is_array($intOrHex)) {
9771 1
            $intOrHex = [$intOrHex];
9772
        }
9773
9774 4
        $str = '';
9775 4
        foreach ($intOrHex as $strPart) {
9776 4
            $str .= '&#' . (int) $strPart . ';';
9777
        }
9778
9779 4
        return self::html_entity_decode($str, \ENT_QUOTES | \ENT_HTML5);
9780
    }
9781
9782
    /**
9783
     * Checks if string starts with "BOM" (Byte Order Mark Character) character.
9784
     *
9785
     * EXAMPLE: <code>UTF8::string_has_bom("\xef\xbb\xbf foobar"); // true</code>
9786
     *
9787
     * @param string $str <p>The input string.</p>
9788
     *
9789
     * @psalm-pure
9790
     *
9791
     * @return bool
9792
     *              <strong>true</strong> if the string has BOM at the start,<br>
9793
     *              <strong>false</strong> otherwise
9794
     */
9795 6
    public static function string_has_bom(string $str): bool
9796
    {
9797
        /** @noinspection PhpUnusedLocalVariableInspection */
9798 6
        foreach (self::$BOM as $bom_string => &$bom_byte_length) {
9799 6
            if (\strpos($str, $bom_string) === 0) {
9800 6
                return true;
9801
            }
9802
        }
9803
9804 6
        return false;
9805
    }
9806
9807
    /**
9808
     * Strip HTML and PHP tags from a string + clean invalid UTF-8.
9809
     *
9810
     * EXAMPLE: <code>UTF8::strip_tags("<span>κόσμε\xa0\xa1</span>"); // 'κόσμε'</code>
9811
     *
9812
     * @see http://php.net/manual/en/function.strip-tags.php
9813
     *
9814
     * @param string $str            <p>
9815
     *                               The input string.
9816
     *                               </p>
9817
     * @param string $allowable_tags [optional] <p>
9818
     *                               You can use the optional second parameter to specify tags which should
9819
     *                               not be stripped.
9820
     *                               </p>
9821
     *                               <p>
9822
     *                               HTML comments and PHP tags are also stripped. This is hardcoded and
9823
     *                               can not be changed with allowable_tags.
9824
     *                               </p>
9825
     * @param bool   $clean_utf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
9826
     *
9827
     * @psalm-pure
9828
     *
9829
     * @return string
9830
     *                <p>The stripped string.</p>
9831
     */
9832 4
    public static function strip_tags(
9833
        string $str,
9834
        string $allowable_tags = null,
9835
        bool $clean_utf8 = false
9836
    ): string {
9837 4
        if ($str === '') {
9838 1
            return '';
9839
        }
9840
9841 4
        if ($clean_utf8) {
9842 2
            $str = self::clean($str);
9843
        }
9844
9845 4
        if ($allowable_tags === null) {
9846 4
            return \strip_tags($str);
9847
        }
9848
9849 2
        return \strip_tags($str, $allowable_tags);
9850
    }
9851
9852
    /**
9853
     * Strip all whitespace characters. This includes tabs and newline
9854
     * characters, as well as multibyte whitespace such as the thin space
9855
     * and ideographic space.
9856
     *
9857
     * EXAMPLE: <code>UTF8::strip_whitespace('   Ο     συγγραφέας  '); // 'Οσυγγραφέας'</code>
9858
     *
9859
     * @param string $str
9860
     *
9861
     * @psalm-pure
9862
     *
9863
     * @return string
9864
     */
9865 36
    public static function strip_whitespace(string $str): string
9866
    {
9867 36
        if ($str === '') {
9868 3
            return '';
9869
        }
9870
9871 33
        return (string) \preg_replace('/[[:space:]]+/u', '', $str);
9872
    }
9873
9874
    /**
9875
     * Find the position of the first occurrence of a substring in a string, case-insensitive.
9876
     *
9877
     * INFO: use UTF8::stripos_in_byte() for the byte-length
9878
     *
9879
     * EXAMPLE: <code>UTF8::stripos('aσσb', 'ΣΣ'); // 1</code> (σσ == ΣΣ)
9880
     *
9881
     * @see http://php.net/manual/en/function.mb-stripos.php
9882
     *
9883
     * @param string $haystack   <p>The string from which to get the position of the first occurrence of needle.</p>
9884
     * @param string $needle     <p>The string to find in haystack.</p>
9885
     * @param int    $offset     [optional] <p>The position in haystack to start searching.</p>
9886
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
9887
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9888
     *
9889
     * @psalm-pure
9890
     *
9891
     * @return false|int
9892
     *                   Return the <strong>(int)</strong> numeric position of the first occurrence of needle in the
9893
     *                   haystack string,<br> or <strong>false</strong> if needle is not found
9894
     */
9895 25
    public static function stripos(
9896
        string $haystack,
9897
        string $needle,
9898
        int $offset = 0,
9899
        string $encoding = 'UTF-8',
9900
        bool $clean_utf8 = false
9901
    ) {
9902 25
        if ($haystack === '' || $needle === '') {
9903 5
            return false;
9904
        }
9905
9906 24
        if ($clean_utf8) {
9907
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9908
            // if invalid characters are found in $haystack before $needle
9909 1
            $haystack = self::clean($haystack);
9910 1
            $needle = self::clean($needle);
9911
        }
9912
9913 24
        if (self::$SUPPORT['mbstring'] === true) {
9914 24
            if ($encoding === 'UTF-8') {
9915 24
                return \mb_stripos($haystack, $needle, $offset);
9916
            }
9917
9918 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9919
9920 2
            return \mb_stripos($haystack, $needle, $offset, $encoding);
9921
        }
9922
9923 2
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
9924
9925
        if (
9926 2
            $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
9927
            &&
9928 2
            $offset >= 0 // grapheme_stripos() can't handle negative offset
9929
            &&
9930 2
            self::$SUPPORT['intl'] === true
9931
        ) {
9932
            $return_tmp = \grapheme_stripos($haystack, $needle, $offset);
9933
            if ($return_tmp !== false) {
9934
                return $return_tmp;
9935
            }
9936
        }
9937
9938
        //
9939
        // fallback for ascii only
9940
        //
9941
9942 2
        if (ASCII::is_ascii($haystack . $needle)) {
9943
            return \stripos($haystack, $needle, $offset);
9944
        }
9945
9946
        //
9947
        // fallback via vanilla php
9948
        //
9949
9950 2
        $haystack = self::strtocasefold($haystack, true, false, $encoding, null, false);
9951 2
        $needle = self::strtocasefold($needle, true, false, $encoding, null, false);
9952
9953 2
        return self::strpos($haystack, $needle, $offset, $encoding);
9954
    }
9955
9956
    /**
9957
     * Returns all of haystack starting from and including the first occurrence of needle to the end.
9958
     *
9959
     * EXAMPLE: <code>
9960
     * $str = 'iñtërnâtiônàlizætiøn';
9961
     * $search = 'NÂT';
9962
     *
9963
     * UTF8::stristr($str, $search)); // 'nâtiônàlizætiøn'
9964
     * UTF8::stristr($str, $search, true)); // 'iñtër'
9965
     * </code>
9966
     *
9967
     * @param string $haystack      <p>The input string. Must be valid UTF-8.</p>
9968
     * @param string $needle        <p>The string to look for. Must be valid UTF-8.</p>
9969
     * @param bool   $before_needle [optional] <p>
9970
     *                              If <b>TRUE</b>, it returns the part of the
9971
     *                              haystack before the first occurrence of the needle (excluding the needle).
9972
     *                              </p>
9973
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
9974
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
9975
     *
9976
     * @psalm-pure
9977
     *
9978
     * @return false|string
9979
     *                      <p>A sub-string,<br>or <strong>false</strong> if needle is not found.</p>
9980
     */
9981 12
    public static function stristr(
9982
        string $haystack,
9983
        string $needle,
9984
        bool $before_needle = false,
9985
        string $encoding = 'UTF-8',
9986
        bool $clean_utf8 = false
9987
    ) {
9988 12
        if ($haystack === '' || $needle === '') {
9989 3
            return false;
9990
        }
9991
9992 9
        if ($clean_utf8) {
9993
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9994
            // if invalid characters are found in $haystack before $needle
9995 1
            $needle = self::clean($needle);
9996 1
            $haystack = self::clean($haystack);
9997
        }
9998
9999 9
        if (!$needle) {
10000
            return $haystack;
10001
        }
10002
10003 9
        if (self::$SUPPORT['mbstring'] === true) {
10004 9
            if ($encoding === 'UTF-8') {
10005 9
                return \mb_stristr($haystack, $needle, $before_needle);
10006
            }
10007
10008 1
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10009
10010 1
            return \mb_stristr($haystack, $needle, $before_needle, $encoding);
10011
        }
10012
10013
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
10014
10015
        if (
10016
            $encoding !== 'UTF-8'
10017
            &&
10018
            self::$SUPPORT['mbstring'] === false
10019
        ) {
10020
            /**
10021
             * @psalm-suppress ImpureFunctionCall - is is only a warning
10022
             */
10023
            \trigger_error('UTF8::stristr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10024
        }
10025
10026
        if (
10027
            $encoding === 'UTF-8' // INFO: "grapheme_stristr()" can't handle other encodings
10028
            &&
10029
            self::$SUPPORT['intl'] === true
10030
        ) {
10031
            $return_tmp = \grapheme_stristr($haystack, $needle, $before_needle);
10032
            if ($return_tmp !== false) {
10033
                return $return_tmp;
10034
            }
10035
        }
10036
10037
        if (ASCII::is_ascii($needle . $haystack)) {
10038
            return \stristr($haystack, $needle, $before_needle);
10039
        }
10040
10041
        \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/usi', $haystack, $match);
10042
10043
        if (!isset($match[1])) {
10044
            return false;
10045
        }
10046
10047
        if ($before_needle) {
10048
            return $match[1];
10049
        }
10050
10051
        return self::substr($haystack, (int) self::strlen($match[1], $encoding), null, $encoding);
10052
    }
10053
10054
    /**
10055
     * Get the string length, not the byte-length!
10056
     *
10057
     * INFO: use UTF8::strwidth() for the char-length
10058
     *
10059
     * EXAMPLE: <code>UTF8::strlen("Iñtërnâtiôn\xE9àlizætiøn")); // 20</code>
10060
     *
10061
     * @see http://php.net/manual/en/function.mb-strlen.php
10062
     *
10063
     * @param string $str        <p>The string being checked for length.</p>
10064
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
10065
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10066
     *
10067
     * @psalm-pure
10068
     *
10069
     * @return false|int
10070
     *                   <p>
10071
     *                   The number <strong>(int)</strong> of characters in the string $str having character encoding
10072
     *                   $encoding.
10073
     *                   (One multi-byte character counted as +1).
10074
     *                   <br>
10075
     *                   Can return <strong>false</strong>, if e.g. mbstring is not installed and we process invalid
10076
     *                   chars.
10077
     *                   </p>
10078
     */
10079 174
    public static function strlen(
10080
        string $str,
10081
        string $encoding = 'UTF-8',
10082
        bool $clean_utf8 = false
10083
    ) {
10084 174
        if ($str === '') {
10085 21
            return 0;
10086
        }
10087
10088 172
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10089 12
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10090
        }
10091
10092 172
        if ($clean_utf8) {
10093
            // "mb_strlen" and "\iconv_strlen" returns wrong length,
10094
            // if invalid characters are found in $str
10095 4
            $str = self::clean($str);
10096
        }
10097
10098
        //
10099
        // fallback via mbstring
10100
        //
10101
10102 172
        if (self::$SUPPORT['mbstring'] === true) {
10103 166
            if ($encoding === 'UTF-8') {
10104
                /** @noinspection PhpUsageOfSilenceOperatorInspection - ignore warnings, it's working anyway */
10105 166
                return @\mb_strlen($str);
10106
            }
10107
10108
            /** @noinspection PhpUsageOfSilenceOperatorInspection - ignore warnings, it's working anyway */
10109 4
            return @\mb_strlen($str, $encoding);
10110
        }
10111
10112
        //
10113
        // fallback for binary || ascii only
10114
        //
10115
10116
        if (
10117 8
            $encoding === 'CP850'
10118
            ||
10119 8
            $encoding === 'ASCII'
10120
        ) {
10121
            return \strlen($str);
10122
        }
10123
10124
        if (
10125 8
            $encoding !== 'UTF-8'
10126
            &&
10127 8
            self::$SUPPORT['mbstring'] === false
10128
            &&
10129 8
            self::$SUPPORT['iconv'] === false
10130
        ) {
10131
            /**
10132
             * @psalm-suppress ImpureFunctionCall - is is only a warning
10133
             */
10134 2
            \trigger_error('UTF8::strlen() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10135
        }
10136
10137
        //
10138
        // fallback via iconv
10139
        //
10140
10141 8
        if (self::$SUPPORT['iconv'] === true) {
10142
            $return_tmp = \iconv_strlen($str, $encoding);
10143
            if ($return_tmp !== false) {
10144
                return $return_tmp;
10145
            }
10146
        }
10147
10148
        //
10149
        // fallback via intl
10150
        //
10151
10152
        if (
10153 8
            $encoding === 'UTF-8' // INFO: "grapheme_strlen()" can't handle other encodings
10154
            &&
10155 8
            self::$SUPPORT['intl'] === true
10156
        ) {
10157
            $return_tmp = \grapheme_strlen($str);
10158
            if ($return_tmp !== null) {
10159
                return $return_tmp;
10160
            }
10161
        }
10162
10163
        //
10164
        // fallback for ascii only
10165
        //
10166
10167 8
        if (ASCII::is_ascii($str)) {
10168 4
            return \strlen($str);
10169
        }
10170
10171
        //
10172
        // fallback via vanilla php
10173
        //
10174
10175 8
        \preg_match_all('/./us', $str, $parts);
10176
10177 8
        $return_tmp = \count($parts[0]);
10178 8
        if ($return_tmp === 0) {
10179
            return false;
10180
        }
10181
10182 8
        return $return_tmp;
10183
    }
10184
10185
    /**
10186
     * Get string length in byte.
10187
     *
10188
     * @param string $str
10189
     *
10190
     * @psalm-pure
10191
     *
10192
     * @return int
10193
     */
10194 1
    public static function strlen_in_byte(string $str): int
10195
    {
10196 1
        if ($str === '') {
10197
            return 0;
10198
        }
10199
10200 1
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10201
            // "mb_" is available if overload is used, so use it ...
10202
            return \mb_strlen($str, 'CP850'); // 8-BIT
10203
        }
10204
10205 1
        return \strlen($str);
10206
    }
10207
10208
    /**
10209
     * Case-insensitive string comparisons using a "natural order" algorithm.
10210
     *
10211
     * INFO: natural order version of UTF8::strcasecmp()
10212
     *
10213
     * EXAMPLES: <code>
10214
     * UTF8::strnatcasecmp('2', '10Hello WORLD 中文空白!'); // -1
10215
     * UTF8::strcasecmp('2Hello world 中文空白!', '10Hello WORLD 中文空白!'); // 1
10216
     *
10217
     * UTF8::strnatcasecmp('10Hello world 中文空白!', '2Hello WORLD 中文空白!'); // 1
10218
     * UTF8::strcasecmp('10Hello world 中文空白!', '2Hello WORLD 中文空白!'); // -1
10219
     * </code>
10220
     *
10221
     * @param string $str1     <p>The first string.</p>
10222
     * @param string $str2     <p>The second string.</p>
10223
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
10224
     *
10225
     * @psalm-pure
10226
     *
10227
     * @return int
10228
     *             <strong>&lt; 0</strong> if str1 is less than str2<br>
10229
     *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
10230
     *             <strong>0</strong> if they are equal
10231
     */
10232 2
    public static function strnatcasecmp(string $str1, string $str2, string $encoding = 'UTF-8'): int
10233
    {
10234 2
        return self::strnatcmp(
10235 2
            self::strtocasefold($str1, true, false, $encoding, null, false),
10236 2
            self::strtocasefold($str2, true, false, $encoding, null, false)
10237
        );
10238
    }
10239
10240
    /**
10241
     * String comparisons using a "natural order" algorithm
10242
     *
10243
     * INFO: natural order version of UTF8::strcmp()
10244
     *
10245
     * EXAMPLES: <code>
10246
     * UTF8::strnatcmp('2Hello world 中文空白!', '10Hello WORLD 中文空白!'); // -1
10247
     * UTF8::strcmp('2Hello world 中文空白!', '10Hello WORLD 中文空白!'); // 1
10248
     *
10249
     * UTF8::strnatcmp('10Hello world 中文空白!', '2Hello WORLD 中文空白!'); // 1
10250
     * UTF8::strcmp('10Hello world 中文空白!', '2Hello WORLD 中文空白!'); // -1
10251
     * </code>
10252
     *
10253
     * @see http://php.net/manual/en/function.strnatcmp.php
10254
     *
10255
     * @param string $str1 <p>The first string.</p>
10256
     * @param string $str2 <p>The second string.</p>
10257
     *
10258
     * @psalm-pure
10259
     *
10260
     * @return int
10261
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
10262
     *             <strong>&gt; 0</strong> if str1 is greater than str2;<br>
10263
     *             <strong>0</strong> if they are equal
10264
     */
10265 4
    public static function strnatcmp(string $str1, string $str2): int
10266
    {
10267 4
        if ($str1 === $str2) {
10268 4
            return 0;
10269
        }
10270
10271 4
        return \strnatcmp(
10272 4
            (string) self::strtonatfold($str1),
10273 4
            (string) self::strtonatfold($str2)
10274
        );
10275
    }
10276
10277
    /**
10278
     * Case-insensitive string comparison of the first n characters.
10279
     *
10280
     * EXAMPLE: <code>
10281
     * UTF8::strcasecmp("iñtërnâtiôn\nàlizætiøn321", "iñtërnâtiôn\nàlizætiøn123", 5); // 0
10282
     * </code>
10283
     *
10284
     * @see http://php.net/manual/en/function.strncasecmp.php
10285
     *
10286
     * @param string $str1     <p>The first string.</p>
10287
     * @param string $str2     <p>The second string.</p>
10288
     * @param int    $len      <p>The length of strings to be used in the comparison.</p>
10289
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
10290
     *
10291
     * @psalm-pure
10292
     *
10293
     * @return int
10294
     *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
10295
     *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
10296
     *             <strong>0</strong> if they are equal
10297
     */
10298 2
    public static function strncasecmp(
10299
        string $str1,
10300
        string $str2,
10301
        int $len,
10302
        string $encoding = 'UTF-8'
10303
    ): int {
10304 2
        return self::strncmp(
10305 2
            self::strtocasefold($str1, true, false, $encoding, null, false),
10306 2
            self::strtocasefold($str2, true, false, $encoding, null, false),
10307 2
            $len
10308
        );
10309
    }
10310
10311
    /**
10312
     * String comparison of the first n characters.
10313
     *
10314
     * EXAMPLE: <code>
10315
     * UTF8::strncmp("Iñtërnâtiôn\nàlizætiøn321", "Iñtërnâtiôn\nàlizætiøn123", 5); // 0
10316
     * </code>
10317
     *
10318
     * @see http://php.net/manual/en/function.strncmp.php
10319
     *
10320
     * @param string $str1     <p>The first string.</p>
10321
     * @param string $str2     <p>The second string.</p>
10322
     * @param int    $len      <p>Number of characters to use in the comparison.</p>
10323
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
10324
     *
10325
     * @psalm-pure
10326
     *
10327
     * @return int
10328
     *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
10329
     *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
10330
     *             <strong>0</strong> if they are equal
10331
     */
10332 4
    public static function strncmp(
10333
        string $str1,
10334
        string $str2,
10335
        int $len,
10336
        string $encoding = 'UTF-8'
10337
    ): int {
10338 4
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10339
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10340
        }
10341
10342 4
        if ($encoding === 'UTF-8') {
10343 4
            $str1 = (string) \mb_substr($str1, 0, $len);
10344 4
            $str2 = (string) \mb_substr($str2, 0, $len);
10345
        } else {
10346
            $str1 = (string) self::substr($str1, 0, $len, $encoding);
10347
            $str2 = (string) self::substr($str2, 0, $len, $encoding);
10348
        }
10349
10350 4
        return self::strcmp($str1, $str2);
10351
    }
10352
10353
    /**
10354
     * Search a string for any of a set of characters.
10355
     *
10356
     * EXAMPLE: <code>UTF8::strpbrk('-中文空白-', '白'); // '白-'</code>
10357
     *
10358
     * @see http://php.net/manual/en/function.strpbrk.php
10359
     *
10360
     * @param string $haystack  <p>The string where char_list is looked for.</p>
10361
     * @param string $char_list <p>This parameter is case-sensitive.</p>
10362
     *
10363
     * @psalm-pure
10364
     *
10365
     * @return false|string
10366
     *                      <p>The string starting from the character found, or false if it is not found.</p>
10367
     */
10368 2
    public static function strpbrk(string $haystack, string $char_list)
10369
    {
10370 2
        if ($haystack === '' || $char_list === '') {
10371 2
            return false;
10372
        }
10373
10374 2
        if (\preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
10375 2
            return \substr($haystack, (int) \strpos($haystack, $m[0]));
10376
        }
10377
10378 2
        return false;
10379
    }
10380
10381
    /**
10382
     * Find the position of the first occurrence of a substring in a string.
10383
     *
10384
     * INFO: use UTF8::strpos_in_byte() for the byte-length
10385
     *
10386
     * EXAMPLE: <code>UTF8::strpos('ABC-ÖÄÜ-中文空白-中文空白', '中'); // 8</code>
10387
     *
10388
     * @see http://php.net/manual/en/function.mb-strpos.php
10389
     *
10390
     * @param string     $haystack   <p>The string from which to get the position of the first occurrence of needle.</p>
10391
     * @param int|string $needle     <p>The string to find in haystack.<br>Or a code point as int.</p>
10392
     * @param int        $offset     [optional] <p>The search offset. If it is not specified, 0 is used.</p>
10393
     * @param string     $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
10394
     * @param bool       $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10395
     *
10396
     * @psalm-pure
10397
     *
10398
     * @return false|int
10399
     *                   The <strong>(int)</strong> numeric position of the first occurrence of needle in the haystack
10400
     *                   string.<br> If needle is not found it returns false.
10401
     */
10402 53
    public static function strpos(
10403
        string $haystack,
10404
        $needle,
10405
        int $offset = 0,
10406
        string $encoding = 'UTF-8',
10407
        bool $clean_utf8 = false
10408
    ) {
10409 53
        if ($haystack === '') {
10410 4
            return false;
10411
        }
10412
10413
        // iconv and mbstring do not support integer $needle
10414 52
        if ((int) $needle === $needle) {
10415
            $needle = (string) self::chr($needle);
10416
        }
10417 52
        $needle = (string) $needle;
10418
10419 52
        if ($needle === '') {
10420 2
            return false;
10421
        }
10422
10423 52
        if ($clean_utf8) {
10424
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10425
            // if invalid characters are found in $haystack before $needle
10426 3
            $needle = self::clean($needle);
10427 3
            $haystack = self::clean($haystack);
10428
        }
10429
10430 52
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10431 10
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10432
        }
10433
10434
        //
10435
        // fallback via mbstring
10436
        //
10437
10438 52
        if (self::$SUPPORT['mbstring'] === true) {
10439 50
            if ($encoding === 'UTF-8') {
10440
                /** @noinspection PhpUsageOfSilenceOperatorInspection - Offset not contained in string */
10441 50
                return @\mb_strpos($haystack, $needle, $offset);
10442
            }
10443
10444
            /** @noinspection PhpUsageOfSilenceOperatorInspection - Offset not contained in string */
10445 2
            return @\mb_strpos($haystack, $needle, $offset, $encoding);
10446
        }
10447
10448
        //
10449
        // fallback for binary || ascii only
10450
        //
10451
        if (
10452 4
            $encoding === 'CP850'
10453
            ||
10454 4
            $encoding === 'ASCII'
10455
        ) {
10456 2
            return \strpos($haystack, $needle, $offset);
10457
        }
10458
10459
        if (
10460 4
            $encoding !== 'UTF-8'
10461
            &&
10462 4
            self::$SUPPORT['iconv'] === false
10463
            &&
10464 4
            self::$SUPPORT['mbstring'] === false
10465
        ) {
10466
            /**
10467
             * @psalm-suppress ImpureFunctionCall - is is only a warning
10468
             */
10469 2
            \trigger_error('UTF8::strpos() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10470
        }
10471
10472
        //
10473
        // fallback via intl
10474
        //
10475
10476
        if (
10477 4
            $encoding === 'UTF-8' // INFO: "grapheme_strpos()" can't handle other encodings
10478
            &&
10479 4
            $offset >= 0 // grapheme_strpos() can't handle negative offset
10480
            &&
10481 4
            self::$SUPPORT['intl'] === true
10482
        ) {
10483
            $return_tmp = \grapheme_strpos($haystack, $needle, $offset);
10484
            if ($return_tmp !== false) {
10485
                return $return_tmp;
10486
            }
10487
        }
10488
10489
        //
10490
        // fallback via iconv
10491
        //
10492
10493
        if (
10494 4
            $offset >= 0 // iconv_strpos() can't handle negative offset
10495
            &&
10496 4
            self::$SUPPORT['iconv'] === true
10497
        ) {
10498
            // ignore invalid negative offset to keep compatibility
10499
            // with php < 5.5.35, < 5.6.21, < 7.0.6
10500
            $return_tmp = \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
10501
            if ($return_tmp !== false) {
10502
                return $return_tmp;
10503
            }
10504
        }
10505
10506
        //
10507
        // fallback for ascii only
10508
        //
10509
10510 4
        if (ASCII::is_ascii($haystack . $needle)) {
10511
            /** @noinspection PhpUsageOfSilenceOperatorInspection - Offset not contained in string */
10512 2
            return @\strpos($haystack, $needle, $offset);
10513
        }
10514
10515
        //
10516
        // fallback via vanilla php
10517
        //
10518
10519 4
        $haystack_tmp = self::substr($haystack, $offset, null, $encoding);
10520 4
        if ($haystack_tmp === false) {
10521
            $haystack_tmp = '';
10522
        }
10523 4
        $haystack = (string) $haystack_tmp;
10524
10525 4
        if ($offset < 0) {
10526
            $offset = 0;
10527
        }
10528
10529 4
        $pos = \strpos($haystack, $needle);
10530 4
        if ($pos === false) {
10531 2
            return false;
10532
        }
10533
10534 4
        if ($pos) {
10535 4
            return $offset + (int) self::strlen(\substr($haystack, 0, $pos), $encoding);
10536
        }
10537
10538 2
        return $offset + 0;
10539
    }
10540
10541
    /**
10542
     * Find the position of the first occurrence of a substring in a string.
10543
     *
10544
     * @param string $haystack <p>
10545
     *                         The string being checked.
10546
     *                         </p>
10547
     * @param string $needle   <p>
10548
     *                         The position counted from the beginning of haystack.
10549
     *                         </p>
10550
     * @param int    $offset   [optional] <p>
10551
     *                         The search offset. If it is not specified, 0 is used.
10552
     *                         </p>
10553
     *
10554
     * @psalm-pure
10555
     *
10556
     * @return false|int
10557
     *                   <p>The numeric position of the first occurrence of needle in the
10558
     *                   haystack string. If needle is not found, it returns false.</p>
10559
     */
10560 2
    public static function strpos_in_byte(string $haystack, string $needle, int $offset = 0)
10561
    {
10562 2
        if ($haystack === '' || $needle === '') {
10563
            return false;
10564
        }
10565
10566 2
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10567
            // "mb_" is available if overload is used, so use it ...
10568
            return \mb_strpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
10569
        }
10570
10571 2
        return \strpos($haystack, $needle, $offset);
10572
    }
10573
10574
    /**
10575
     * Find the position of the first occurrence of a substring in a string, case-insensitive.
10576
     *
10577
     * @param string $haystack <p>
10578
     *                         The string being checked.
10579
     *                         </p>
10580
     * @param string $needle   <p>
10581
     *                         The position counted from the beginning of haystack.
10582
     *                         </p>
10583
     * @param int    $offset   [optional] <p>
10584
     *                         The search offset. If it is not specified, 0 is used.
10585
     *                         </p>
10586
     *
10587
     * @psalm-pure
10588
     *
10589
     * @return false|int
10590
     *                   <p>The numeric position of the first occurrence of needle in the
10591
     *                   haystack string. If needle is not found, it returns false.</p>
10592
     */
10593 2
    public static function stripos_in_byte(string $haystack, string $needle, int $offset = 0)
10594
    {
10595 2
        if ($haystack === '' || $needle === '') {
10596
            return false;
10597
        }
10598
10599 2
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10600
            // "mb_" is available if overload is used, so use it ...
10601
            return \mb_stripos($haystack, $needle, $offset, 'CP850'); // 8-BIT
10602
        }
10603
10604 2
        return \stripos($haystack, $needle, $offset);
10605
    }
10606
10607
    /**
10608
     * Find the last occurrence of a character in a string within another.
10609
     *
10610
     * EXAMPLE: <code>UTF8::strrchr('κόσμεκόσμε-äöü', 'κόσμε'); // 'κόσμε-äöü'</code>
10611
     *
10612
     * @see http://php.net/manual/en/function.mb-strrchr.php
10613
     *
10614
     * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
10615
     * @param string $needle        <p>The string to find in haystack</p>
10616
     * @param bool   $before_needle [optional] <p>
10617
     *                              Determines which portion of haystack
10618
     *                              this function returns.
10619
     *                              If set to true, it returns all of haystack
10620
     *                              from the beginning to the last occurrence of needle.
10621
     *                              If set to false, it returns all of haystack
10622
     *                              from the last occurrence of needle to the end,
10623
     *                              </p>
10624
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
10625
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
10626
     *
10627
     * @psalm-pure
10628
     *
10629
     * @return false|string
10630
     *                      <p>The portion of haystack or false if needle is not found.</p>
10631
     */
10632 2
    public static function strrchr(
10633
        string $haystack,
10634
        string $needle,
10635
        bool $before_needle = false,
10636
        string $encoding = 'UTF-8',
10637
        bool $clean_utf8 = false
10638
    ) {
10639 2
        if ($haystack === '' || $needle === '') {
10640 2
            return false;
10641
        }
10642
10643 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10644 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10645
        }
10646
10647 2
        if ($clean_utf8) {
10648
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10649
            // if invalid characters are found in $haystack before $needle
10650 2
            $needle = self::clean($needle);
10651 2
            $haystack = self::clean($haystack);
10652
        }
10653
10654
        //
10655
        // fallback via mbstring
10656
        //
10657
10658 2
        if (self::$SUPPORT['mbstring'] === true) {
10659 2
            if ($encoding === 'UTF-8') {
10660 2
                return \mb_strrchr($haystack, $needle, $before_needle);
10661
            }
10662
10663 2
            return \mb_strrchr($haystack, $needle, $before_needle, $encoding);
10664
        }
10665
10666
        //
10667
        // fallback for binary || ascii only
10668
        //
10669
10670
        if (
10671
            !$before_needle
10672
            &&
10673
            (
10674
                $encoding === 'CP850'
10675
                ||
10676
                $encoding === 'ASCII'
10677
            )
10678
        ) {
10679
            return \strrchr($haystack, $needle);
10680
        }
10681
10682
        if (
10683
            $encoding !== 'UTF-8'
10684
            &&
10685
            self::$SUPPORT['mbstring'] === false
10686
        ) {
10687
            /**
10688
             * @psalm-suppress ImpureFunctionCall - is is only a warning
10689
             */
10690
            \trigger_error('UTF8::strrchr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10691
        }
10692
10693
        //
10694
        // fallback via iconv
10695
        //
10696
10697
        if (self::$SUPPORT['iconv'] === true) {
10698
            $needle_tmp = self::substr($needle, 0, 1, $encoding);
10699
            if ($needle_tmp === false) {
10700
                return false;
10701
            }
10702
            $needle = (string) $needle_tmp;
10703
10704
            $pos = \iconv_strrpos($haystack, $needle, $encoding);
10705
            if ($pos === false) {
10706
                return false;
10707
            }
10708
10709
            if ($before_needle) {
10710
                return self::substr($haystack, 0, $pos, $encoding);
10711
            }
10712
10713
            return self::substr($haystack, $pos, null, $encoding);
10714
        }
10715
10716
        //
10717
        // fallback via vanilla php
10718
        //
10719
10720
        $needle_tmp = self::substr($needle, 0, 1, $encoding);
10721
        if ($needle_tmp === false) {
10722
            return false;
10723
        }
10724
        $needle = (string) $needle_tmp;
10725
10726
        $pos = self::strrpos($haystack, $needle, 0, $encoding);
10727
        if ($pos === false) {
10728
            return false;
10729
        }
10730
10731
        if ($before_needle) {
10732
            return self::substr($haystack, 0, $pos, $encoding);
10733
        }
10734
10735
        return self::substr($haystack, $pos, null, $encoding);
10736
    }
10737
10738
    /**
10739
     * Reverses characters order in the string.
10740
     *
10741
     * EXAMPLE: <code>UTF8::strrev('κ-öäü'); // 'üäö-κ'</code>
10742
     *
10743
     * @param string $str      <p>The input string.</p>
10744
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
10745
     *
10746
     * @psalm-pure
10747
     *
10748
     * @return string
10749
     *                <p>The string with characters in the reverse sequence.</p>
10750
     */
10751 10
    public static function strrev(string $str, string $encoding = 'UTF-8'): string
10752
    {
10753 10
        if ($str === '') {
10754 4
            return '';
10755
        }
10756
10757
        // init
10758 8
        $reversed = '';
10759
10760 8
        $str = self::emoji_encode($str, true);
10761
10762 8
        if ($encoding === 'UTF-8') {
10763 8
            if (self::$SUPPORT['intl'] === true) {
10764
                // try "grapheme" first: https://stackoverflow.com/questions/17496493/strrev-dosent-support-utf-8
10765 8
                $i = (int) \grapheme_strlen($str);
10766 8
                while ($i--) {
10767 8
                    $reversed_tmp = \grapheme_substr($str, $i, 1);
10768 8
                    if ($reversed_tmp !== false) {
10769 8
                        $reversed .= $reversed_tmp;
10770
                    }
10771
                }
10772
            } else {
10773
                $i = (int) \mb_strlen($str);
10774 8
                while ($i--) {
10775
                    $reversed_tmp = \mb_substr($str, $i, 1);
10776
                    if ($reversed_tmp !== false) {
10777
                        $reversed .= $reversed_tmp;
10778
                    }
10779
                }
10780
            }
10781
        } else {
10782
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10783
10784
            $i = (int) self::strlen($str, $encoding);
10785
            while ($i--) {
10786
                $reversed_tmp = self::substr($str, $i, 1, $encoding);
10787
                if ($reversed_tmp !== false) {
10788
                    $reversed .= $reversed_tmp;
10789
                }
10790
            }
10791
        }
10792
10793 8
        return self::emoji_decode($reversed, true);
10794
    }
10795
10796
    /**
10797
     * Find the last occurrence of a character in a string within another, case-insensitive.
10798
     *
10799
     * EXAMPLE: <code>UTF8::strrichr('Aκόσμεκόσμε-äöü', 'aκόσμε'); // 'Aκόσμεκόσμε-äöü'</code>
10800
     *
10801
     * @see http://php.net/manual/en/function.mb-strrichr.php
10802
     *
10803
     * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
10804
     * @param string $needle        <p>The string to find in haystack.</p>
10805
     * @param bool   $before_needle [optional] <p>
10806
     *                              Determines which portion of haystack
10807
     *                              this function returns.
10808
     *                              If set to true, it returns all of haystack
10809
     *                              from the beginning to the last occurrence of needle.
10810
     *                              If set to false, it returns all of haystack
10811
     *                              from the last occurrence of needle to the end,
10812
     *                              </p>
10813
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
10814
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
10815
     *
10816
     * @psalm-pure
10817
     *
10818
     * @return false|string
10819
     *                      <p>The portion of haystack or<br>false if needle is not found.</p>
10820
     */
10821 3
    public static function strrichr(
10822
        string $haystack,
10823
        string $needle,
10824
        bool $before_needle = false,
10825
        string $encoding = 'UTF-8',
10826
        bool $clean_utf8 = false
10827
    ) {
10828 3
        if ($haystack === '' || $needle === '') {
10829 2
            return false;
10830
        }
10831
10832 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10833 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10834
        }
10835
10836 3
        if ($clean_utf8) {
10837
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10838
            // if invalid characters are found in $haystack before $needle
10839 2
            $needle = self::clean($needle);
10840 2
            $haystack = self::clean($haystack);
10841
        }
10842
10843
        //
10844
        // fallback via mbstring
10845
        //
10846
10847 3
        if (self::$SUPPORT['mbstring'] === true) {
10848 3
            if ($encoding === 'UTF-8') {
10849 3
                return \mb_strrichr($haystack, $needle, $before_needle);
10850
            }
10851
10852 2
            return \mb_strrichr($haystack, $needle, $before_needle, $encoding);
10853
        }
10854
10855
        //
10856
        // fallback via vanilla php
10857
        //
10858
10859
        $needle_tmp = self::substr($needle, 0, 1, $encoding);
10860
        if ($needle_tmp === false) {
10861
            return false;
10862
        }
10863
        $needle = (string) $needle_tmp;
10864
10865
        $pos = self::strripos($haystack, $needle, 0, $encoding);
10866
        if ($pos === false) {
10867
            return false;
10868
        }
10869
10870
        if ($before_needle) {
10871
            return self::substr($haystack, 0, $pos, $encoding);
10872
        }
10873
10874
        return self::substr($haystack, $pos, null, $encoding);
10875
    }
10876
10877
    /**
10878
     * Find the position of the last occurrence of a substring in a string, case-insensitive.
10879
     *
10880
     * EXAMPLE: <code>UTF8::strripos('ABC-ÖÄÜ-中文空白-中文空白', '中'); // 13</code>
10881
     *
10882
     * @param string     $haystack   <p>The string to look in.</p>
10883
     * @param int|string $needle     <p>The string to look for.</p>
10884
     * @param int        $offset     [optional] <p>Number of characters to ignore in the beginning or end.</p>
10885
     * @param string     $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
10886
     * @param bool       $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10887
     *
10888
     * @psalm-pure
10889
     *
10890
     * @return false|int
10891
     *                   <p>The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
10892
     *                   string.<br>If needle is not found, it returns false.</p>
10893
     */
10894 14
    public static function strripos(
10895
        string $haystack,
10896
        $needle,
10897
        int $offset = 0,
10898
        string $encoding = 'UTF-8',
10899
        bool $clean_utf8 = false
10900
    ) {
10901 14
        if ($haystack === '') {
10902
            return false;
10903
        }
10904
10905
        // iconv and mbstring do not support integer $needle
10906 14
        if ((int) $needle === $needle && $needle >= 0) {
10907
            $needle = (string) self::chr($needle);
10908
        }
10909 14
        $needle = (string) $needle;
10910
10911 14
        if ($needle === '') {
10912
            return false;
10913
        }
10914
10915 14
        if ($clean_utf8) {
10916
            // mb_strripos() && iconv_strripos() is not tolerant to invalid characters
10917 3
            $needle = self::clean($needle);
10918 3
            $haystack = self::clean($haystack);
10919
        }
10920
10921 14
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10922 9
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10923
        }
10924
10925
        //
10926
        // fallback via mbstrig
10927
        //
10928
10929 14
        if (self::$SUPPORT['mbstring'] === true) {
10930 14
            if ($encoding === 'UTF-8') {
10931 14
                return \mb_strripos($haystack, $needle, $offset);
10932
            }
10933
10934
            return \mb_strripos($haystack, $needle, $offset, $encoding);
10935
        }
10936
10937
        //
10938
        // fallback for binary || ascii only
10939
        //
10940
10941
        if (
10942
            $encoding === 'CP850'
10943
            ||
10944
            $encoding === 'ASCII'
10945
        ) {
10946
            return \strripos($haystack, $needle, $offset);
10947
        }
10948
10949
        if (
10950
            $encoding !== 'UTF-8'
10951
            &&
10952
            self::$SUPPORT['mbstring'] === false
10953
        ) {
10954
            /**
10955
             * @psalm-suppress ImpureFunctionCall - is is only a warning
10956
             */
10957
            \trigger_error('UTF8::strripos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10958
        }
10959
10960
        //
10961
        // fallback via intl
10962
        //
10963
10964
        if (
10965
            $encoding === 'UTF-8' // INFO: "grapheme_strripos()" can't handle other encodings
10966
            &&
10967
            $offset >= 0 // grapheme_strripos() can't handle negative offset
10968
            &&
10969
            self::$SUPPORT['intl'] === true
10970
        ) {
10971
            $return_tmp = \grapheme_strripos($haystack, $needle, $offset);
10972
            if ($return_tmp !== false) {
10973
                return $return_tmp;
10974
            }
10975
        }
10976
10977
        //
10978
        // fallback for ascii only
10979
        //
10980
10981
        if (ASCII::is_ascii($haystack . $needle)) {
10982
            return \strripos($haystack, $needle, $offset);
10983
        }
10984
10985
        //
10986
        // fallback via vanilla php
10987
        //
10988
10989
        $haystack = self::strtocasefold($haystack, true, false, $encoding);
10990
        $needle = self::strtocasefold($needle, true, false, $encoding);
10991
10992
        return self::strrpos($haystack, $needle, $offset, $encoding, $clean_utf8);
10993
    }
10994
10995
    /**
10996
     * Finds position of last occurrence of a string within another, case-insensitive.
10997
     *
10998
     * @param string $haystack <p>
10999
     *                         The string from which to get the position of the last occurrence
11000
     *                         of needle.
11001
     *                         </p>
11002
     * @param string $needle   <p>
11003
     *                         The string to find in haystack.
11004
     *                         </p>
11005
     * @param int    $offset   [optional] <p>
11006
     *                         The position in haystack
11007
     *                         to start searching.
11008
     *                         </p>
11009
     *
11010
     * @psalm-pure
11011
     *
11012
     * @return false|int
11013
     *                   <p>eturn the numeric position of the last occurrence of needle in the
11014
     *                   haystack string, or false if needle is not found.</p>
11015
     */
11016 2
    public static function strripos_in_byte(string $haystack, string $needle, int $offset = 0)
11017
    {
11018 2
        if ($haystack === '' || $needle === '') {
11019
            return false;
11020
        }
11021
11022 2
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
11023
            // "mb_" is available if overload is used, so use it ...
11024
            return \mb_strripos($haystack, $needle, $offset, 'CP850'); // 8-BIT
11025
        }
11026
11027 2
        return \strripos($haystack, $needle, $offset);
11028
    }
11029
11030
    /**
11031
     * Find the position of the last occurrence of a substring in a string.
11032
     *
11033
     * EXAMPLE: <code>UTF8::strrpos('ABC-ÖÄÜ-中文空白-中文空白', '中'); // 13</code>
11034
     *
11035
     * @see http://php.net/manual/en/function.mb-strrpos.php
11036
     *
11037
     * @param string     $haystack   <p>The string being checked, for the last occurrence of needle</p>
11038
     * @param int|string $needle     <p>The string to find in haystack.<br>Or a code point as int.</p>
11039
     * @param int        $offset     [optional] <p>May be specified to begin searching an arbitrary number of characters
11040
     *                               into the string. Negative values will stop searching at an arbitrary point prior to
11041
     *                               the end of the string.
11042
     *                               </p>
11043
     * @param string     $encoding   [optional] <p>Set the charset.</p>
11044
     * @param bool       $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
11045
     *
11046
     * @psalm-pure
11047
     *
11048
     * @return false|int
11049
     *                   <p>The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
11050
     *                   string.<br>If needle is not found, it returns false.</p>
11051
     */
11052 35
    public static function strrpos(
11053
        string $haystack,
11054
        $needle,
11055
        int $offset = 0,
11056
        string $encoding = 'UTF-8',
11057
        bool $clean_utf8 = false
11058
    ) {
11059 35
        if ($haystack === '') {
11060 3
            return false;
11061
        }
11062
11063
        // iconv and mbstring do not support integer $needle
11064 34
        if ((int) $needle === $needle && $needle >= 0) {
11065 1
            $needle = (string) self::chr($needle);
11066
        }
11067 34
        $needle = (string) $needle;
11068
11069 34
        if ($needle === '') {
11070 2
            return false;
11071
        }
11072
11073 34
        if ($clean_utf8) {
11074
            // mb_strrpos && iconv_strrpos is not tolerant to invalid characters
11075 4
            $needle = self::clean($needle);
11076 4
            $haystack = self::clean($haystack);
11077
        }
11078
11079 34
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
11080 8
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11081
        }
11082
11083
        //
11084
        // fallback via mbstring
11085
        //
11086
11087 34
        if (self::$SUPPORT['mbstring'] === true) {
11088 34
            if ($encoding === 'UTF-8') {
11089 34
                return \mb_strrpos($haystack, $needle, $offset);
11090
            }
11091
11092 2
            return \mb_strrpos($haystack, $needle, $offset, $encoding);
11093
        }
11094
11095
        //
11096
        // fallback for binary || ascii only
11097
        //
11098
11099
        if (
11100
            $encoding === 'CP850'
11101
            ||
11102
            $encoding === 'ASCII'
11103
        ) {
11104
            return \strrpos($haystack, $needle, $offset);
11105
        }
11106
11107
        if (
11108
            $encoding !== 'UTF-8'
11109
            &&
11110
            self::$SUPPORT['mbstring'] === false
11111
        ) {
11112
            /**
11113
             * @psalm-suppress ImpureFunctionCall - is is only a warning
11114
             */
11115
            \trigger_error('UTF8::strrpos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
11116
        }
11117
11118
        //
11119
        // fallback via intl
11120
        //
11121
11122
        if (
11123
            $offset >= 0 // grapheme_strrpos() can't handle negative offset
11124
            &&
11125
            $encoding === 'UTF-8' // INFO: "grapheme_strrpos()" can't handle other encodings
11126
            &&
11127
            self::$SUPPORT['intl'] === true
11128
        ) {
11129
            $return_tmp = \grapheme_strrpos($haystack, $needle, $offset);
11130
            if ($return_tmp !== false) {
11131
                return $return_tmp;
11132
            }
11133
        }
11134
11135
        //
11136
        // fallback for ascii only
11137
        //
11138
11139
        if (ASCII::is_ascii($haystack . $needle)) {
11140
            return \strrpos($haystack, $needle, $offset);
11141
        }
11142
11143
        //
11144
        // fallback via vanilla php
11145
        //
11146
11147
        $haystack_tmp = null;
11148
        if ($offset > 0) {
11149
            $haystack_tmp = self::substr($haystack, $offset);
11150
        } elseif ($offset < 0) {
11151
            $haystack_tmp = self::substr($haystack, 0, $offset);
11152
            $offset = 0;
11153
        }
11154
11155
        if ($haystack_tmp !== null) {
11156
            if ($haystack_tmp === false) {
11157
                $haystack_tmp = '';
11158
            }
11159
            $haystack = (string) $haystack_tmp;
11160
        }
11161
11162
        $pos = \strrpos($haystack, $needle);
11163
        if ($pos === false) {
11164
            return false;
11165
        }
11166
11167
        /** @var false|string $str_tmp - needed for PhpStan (stubs error) */
11168
        $str_tmp = \substr($haystack, 0, $pos);
11169
        if ($str_tmp === false) {
11170
            return false;
11171
        }
11172
11173
        return $offset + (int) self::strlen($str_tmp);
11174
    }
11175
11176
    /**
11177
     * Find the position of the last occurrence of a substring in a string.
11178
     *
11179
     * @param string $haystack <p>
11180
     *                         The string being checked, for the last occurrence
11181
     *                         of needle.
11182
     *                         </p>
11183
     * @param string $needle   <p>
11184
     *                         The string to find in haystack.
11185
     *                         </p>
11186
     * @param int    $offset   [optional] <p>May be specified to begin searching an arbitrary number of characters into
11187
     *                         the string. Negative values will stop searching at an arbitrary point
11188
     *                         prior to the end of the string.
11189
     *                         </p>
11190
     *
11191
     * @psalm-pure
11192
     *
11193
     * @return false|int
11194
     *                   <p>The numeric position of the last occurrence of needle in the
11195
     *                   haystack string. If needle is not found, it returns false.</p>
11196
     */
11197 2
    public static function strrpos_in_byte(string $haystack, string $needle, int $offset = 0)
11198
    {
11199 2
        if ($haystack === '' || $needle === '') {
11200
            return false;
11201
        }
11202
11203 2
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
11204
            // "mb_" is available if overload is used, so use it ...
11205
            return \mb_strrpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
11206
        }
11207
11208 2
        return \strrpos($haystack, $needle, $offset);
11209
    }
11210
11211
    /**
11212
     * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
11213
     * mask.
11214
     *
11215
     * EXAMPLE: <code>UTF8::strspn('iñtërnâtiônàlizætiøn', 'itñ'); // '3'</code>
11216
     *
11217
     * @param string $str      <p>The input string.</p>
11218
     * @param string $mask     <p>The mask of chars</p>
11219
     * @param int    $offset   [optional]
11220
     * @param int    $length   [optional]
11221
     * @param string $encoding [optional] <p>Set the charset.</p>
11222
     *
11223
     * @psalm-pure
11224
     *
11225
     * @return false|int
11226
     */
11227 10
    public static function strspn(
11228
        string $str,
11229
        string $mask,
11230
        int $offset = 0,
11231
        int $length = null,
11232
        string $encoding = 'UTF-8'
11233
    ) {
11234 10
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
11235
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11236
        }
11237
11238 10
        if ($offset || $length !== null) {
11239 2
            if ($encoding === 'UTF-8') {
11240 2
                if ($length === null) {
11241
                    $str = (string) \mb_substr($str, $offset);
11242
                } else {
11243 2
                    $str = (string) \mb_substr($str, $offset, $length);
11244
                }
11245
            } else {
11246
                $str = (string) self::substr($str, $offset, $length, $encoding);
11247
            }
11248
        }
11249
11250 10
        if ($str === '' || $mask === '') {
11251 2
            return 0;
11252
        }
11253
11254 8
        $matches = [];
11255
11256 8
        return \preg_match('/^' . self::rxClass($mask) . '+/u', $str, $matches) ? (int) self::strlen($matches[0], $encoding) : 0;
11257
    }
11258
11259
    /**
11260
     * Returns part of haystack string from the first occurrence of needle to the end of haystack.
11261
     *
11262
     * EXAMPLE: <code>
11263
     * $str = 'iñtërnâtiônàlizætiøn';
11264
     * $search = 'nât';
11265
     *
11266
     * UTF8::strstr($str, $search)); // 'nâtiônàlizætiøn'
11267
     * UTF8::strstr($str, $search, true)); // 'iñtër'
11268
     * </code>
11269
     *
11270
     * @param string $haystack      <p>The input string. Must be valid UTF-8.</p>
11271
     * @param string $needle        <p>The string to look for. Must be valid UTF-8.</p>
11272
     * @param bool   $before_needle [optional] <p>
11273
     *                              If <b>TRUE</b>, strstr() returns the part of the
11274
     *                              haystack before the first occurrence of the needle (excluding the needle).
11275
     *                              </p>
11276
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
11277
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
11278
     *
11279
     * @psalm-pure
11280
     *
11281
     * @return false|string
11282
     *                      A sub-string,<br>or <strong>false</strong> if needle is not found
11283
     */
11284 3
    public static function strstr(
11285
        string $haystack,
11286
        string $needle,
11287
        bool $before_needle = false,
11288
        string $encoding = 'UTF-8',
11289
        bool $clean_utf8 = false
11290
    ) {
11291 3
        if ($haystack === '' || $needle === '') {
11292 2
            return false;
11293
        }
11294
11295 3
        if ($clean_utf8) {
11296
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11297
            // if invalid characters are found in $haystack before $needle
11298
            $needle = self::clean($needle);
11299
            $haystack = self::clean($haystack);
11300
        }
11301
11302 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
11303 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11304
        }
11305
11306
        //
11307
        // fallback via mbstring
11308
        //
11309
11310 3
        if (self::$SUPPORT['mbstring'] === true) {
11311 3
            if ($encoding === 'UTF-8') {
11312 3
                return \mb_strstr($haystack, $needle, $before_needle);
11313
            }
11314
11315 2
            return \mb_strstr($haystack, $needle, $before_needle, $encoding);
11316
        }
11317
11318
        //
11319
        // fallback for binary || ascii only
11320
        //
11321
11322
        if (
11323
            $encoding === 'CP850'
11324
            ||
11325
            $encoding === 'ASCII'
11326
        ) {
11327
            return \strstr($haystack, $needle, $before_needle);
11328
        }
11329
11330
        if (
11331
            $encoding !== 'UTF-8'
11332
            &&
11333
            self::$SUPPORT['mbstring'] === false
11334
        ) {
11335
            /**
11336
             * @psalm-suppress ImpureFunctionCall - is is only a warning
11337
             */
11338
            \trigger_error('UTF8::strstr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
11339
        }
11340
11341
        //
11342
        // fallback via intl
11343
        //
11344
11345
        if (
11346
            $encoding === 'UTF-8' // INFO: "grapheme_strstr()" can't handle other encodings
11347
            &&
11348
            self::$SUPPORT['intl'] === true
11349
        ) {
11350
            $return_tmp = \grapheme_strstr($haystack, $needle, $before_needle);
11351
            if ($return_tmp !== false) {
11352
                return $return_tmp;
11353
            }
11354
        }
11355
11356
        //
11357
        // fallback for ascii only
11358
        //
11359
11360
        if (ASCII::is_ascii($haystack . $needle)) {
11361
            return \strstr($haystack, $needle, $before_needle);
11362
        }
11363
11364
        //
11365
        // fallback via vanilla php
11366
        //
11367
11368
        \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/us', $haystack, $match);
11369
11370
        if (!isset($match[1])) {
11371
            return false;
11372
        }
11373
11374
        if ($before_needle) {
11375
            return $match[1];
11376
        }
11377
11378
        return self::substr($haystack, (int) self::strlen($match[1]));
11379
    }
11380
11381
    /**
11382
     * Finds first occurrence of a string within another.
11383
     *
11384
     * @param string $haystack      <p>
11385
     *                              The string from which to get the first occurrence
11386
     *                              of needle.
11387
     *                              </p>
11388
     * @param string $needle        <p>
11389
     *                              The string to find in haystack.
11390
     *                              </p>
11391
     * @param bool   $before_needle [optional] <p>
11392
     *                              Determines which portion of haystack
11393
     *                              this function returns.
11394
     *                              If set to true, it returns all of haystack
11395
     *                              from the beginning to the first occurrence of needle.
11396
     *                              If set to false, it returns all of haystack
11397
     *                              from the first occurrence of needle to the end,
11398
     *                              </p>
11399
     *
11400
     * @psalm-pure
11401
     *
11402
     * @return false|string
11403
     *                      <p>The portion of haystack,
11404
     *                      or false if needle is not found.</p>
11405
     */
11406 2
    public static function strstr_in_byte(
11407
        string $haystack,
11408
        string $needle,
11409
        bool $before_needle = false
11410
    ) {
11411 2
        if ($haystack === '' || $needle === '') {
11412
            return false;
11413
        }
11414
11415 2
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
11416
            // "mb_" is available if overload is used, so use it ...
11417
            return \mb_strstr($haystack, $needle, $before_needle, 'CP850'); // 8-BIT
11418
        }
11419
11420 2
        return \strstr($haystack, $needle, $before_needle);
11421
    }
11422
11423
    /**
11424
     * Unicode transformation for case-less matching.
11425
     *
11426
     * EXAMPLE: <code>UTF8::strtocasefold('ǰ◌̱'); // 'ǰ◌̱'</code>
11427
     *
11428
     * @see http://unicode.org/reports/tr21/tr21-5.html
11429
     *
11430
     * @param string      $str        <p>The input string.</p>
11431
     * @param bool        $full       [optional] <p>
11432
     *                                <b>true</b>, replace full case folding chars (default)<br>
11433
     *                                <b>false</b>, use only limited static array [UTF8::$COMMON_CASE_FOLD]
11434
     *                                </p>
11435
     * @param bool        $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
11436
     * @param string      $encoding   [optional] <p>Set the charset.</p>
11437
     * @param string|null $lang       [optional] <p>Set the language for special cases: az, el, lt, tr</p>
11438
     * @param bool        $lower      [optional] <p>Use lowercase string, otherwise use uppercase string. PS: uppercase
11439
     *                                is for some languages better ...</p>
11440
     *
11441
     * @psalm-pure
11442
     *
11443
     * @return string
11444
     */
11445 32
    public static function strtocasefold(
11446
        string $str,
11447
        bool $full = true,
11448
        bool $clean_utf8 = false,
11449
        string $encoding = 'UTF-8',
11450
        string $lang = null,
11451
        bool $lower = true
11452
    ): string {
11453 32
        if ($str === '') {
11454 5
            return '';
11455
        }
11456
11457 31
        if ($clean_utf8) {
11458
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11459
            // if invalid characters are found in $haystack before $needle
11460 2
            $str = self::clean($str);
11461
        }
11462
11463 31
        $str = self::fixStrCaseHelper($str, $lower, $full);
11464
11465 31
        if ($lang === null && $encoding === 'UTF-8') {
11466 31
            if ($lower) {
11467 2
                return \mb_strtolower($str);
11468
            }
11469
11470 29
            return \mb_strtoupper($str);
11471
        }
11472
11473 2
        if ($lower) {
11474
            return self::strtolower($str, $encoding, false, $lang);
11475
        }
11476
11477 2
        return self::strtoupper($str, $encoding, false, $lang);
11478
    }
11479
11480
    /**
11481
     * Make a string lowercase.
11482
     *
11483
     * EXAMPLE: <code>UTF8::strtolower('DÉJÀ Σσς Iıİi'); // 'déjà σσς iıii'</code>
11484
     *
11485
     * @see http://php.net/manual/en/function.mb-strtolower.php
11486
     *
11487
     * @param string      $str                           <p>The string being lowercased.</p>
11488
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
11489
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
11490
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
11491
     *                                                   tr</p>
11492
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
11493
     *                                                   -> ß</p>
11494
     *
11495
     * @psalm-pure
11496
     *
11497
     * @return string
11498
     *                <p>String with all alphabetic characters converted to lowercase.</p>
11499
     */
11500 73
    public static function strtolower(
11501
        $str,
11502
        string $encoding = 'UTF-8',
11503
        bool $clean_utf8 = false,
11504
        string $lang = null,
11505
        bool $try_to_keep_the_string_length = false
11506
    ): string {
11507
        // init
11508 73
        $str = (string) $str;
11509
11510 73
        if ($str === '') {
11511 1
            return '';
11512
        }
11513
11514 72
        if ($clean_utf8) {
11515
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11516
            // if invalid characters are found in $haystack before $needle
11517 2
            $str = self::clean($str);
11518
        }
11519
11520
        // hack for old php version or for the polyfill ...
11521 72
        if ($try_to_keep_the_string_length) {
11522
            $str = self::fixStrCaseHelper($str, true);
11523
        }
11524
11525 72
        if ($lang === null && $encoding === 'UTF-8') {
11526 13
            return \mb_strtolower($str);
11527
        }
11528
11529 61
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
11530
11531 61
        if ($lang !== null) {
11532 2
            if (self::$SUPPORT['intl'] === true) {
11533 2
                if (self::$INTL_TRANSLITERATOR_LIST === null) {
11534
                    self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
11535
                }
11536
11537 2
                $language_code = $lang . '-Lower';
11538 2
                if (!\in_array($language_code, self::$INTL_TRANSLITERATOR_LIST, true)) {
11539
                    /**
11540
                     * @psalm-suppress ImpureFunctionCall - is is only a warning
11541
                     */
11542
                    \trigger_error('UTF8::strtolower() cannot handle special language: ' . $lang . ' | supported: ' . \print_r(self::$INTL_TRANSLITERATOR_LIST, true), \E_USER_WARNING);
11543
11544
                    $language_code = 'Any-Lower';
11545
                }
11546
11547
                /** @noinspection PhpComposerExtensionStubsInspection */
11548
                /** @noinspection UnnecessaryCastingInspection */
11549 2
                return (string) \transliterator_transliterate($language_code, $str);
11550
            }
11551
11552
            /**
11553
             * @psalm-suppress ImpureFunctionCall - is is only a warning
11554
             */
11555
            \trigger_error('UTF8::strtolower() without intl cannot handle the "lang" parameter: ' . $lang, \E_USER_WARNING);
11556
        }
11557
11558
        // always fallback via symfony polyfill
11559 61
        return \mb_strtolower($str, $encoding);
11560
    }
11561
11562
    /**
11563
     * Make a string uppercase.
11564
     *
11565
     * EXAMPLE: <code>UTF8::strtoupper('Déjà Σσς Iıİi'); // 'DÉJÀ ΣΣΣ IIİI'</code>
11566
     *
11567
     * @see http://php.net/manual/en/function.mb-strtoupper.php
11568
     *
11569
     * @param string      $str                           <p>The string being uppercased.</p>
11570
     * @param string      $encoding                      [optional] <p>Set the charset.</p>
11571
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
11572
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
11573
     *                                                   tr</p>
11574
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
11575
     *                                                   -> ß</p>
11576
     *
11577
     * @psalm-pure
11578
     *
11579
     * @return string
11580
     *                <p>String with all alphabetic characters converted to uppercase.</p>
11581
     */
11582 17
    public static function strtoupper(
11583
        $str,
11584
        string $encoding = 'UTF-8',
11585
        bool $clean_utf8 = false,
11586
        string $lang = null,
11587
        bool $try_to_keep_the_string_length = false
11588
    ): string {
11589
        // init
11590 17
        $str = (string) $str;
11591
11592 17
        if ($str === '') {
11593 1
            return '';
11594
        }
11595
11596 16
        if ($clean_utf8) {
11597
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11598
            // if invalid characters are found in $haystack before $needle
11599 2
            $str = self::clean($str);
11600
        }
11601
11602
        // hack for old php version or for the polyfill ...
11603 16
        if ($try_to_keep_the_string_length) {
11604 2
            $str = self::fixStrCaseHelper($str);
11605
        }
11606
11607 16
        if ($lang === null && $encoding === 'UTF-8') {
11608 8
            return \mb_strtoupper($str);
11609
        }
11610
11611 10
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
11612
11613 10
        if ($lang !== null) {
11614 2
            if (self::$SUPPORT['intl'] === true) {
11615 2
                if (self::$INTL_TRANSLITERATOR_LIST === null) {
11616
                    self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
11617
                }
11618
11619 2
                $language_code = $lang . '-Upper';
11620 2
                if (!\in_array($language_code, self::$INTL_TRANSLITERATOR_LIST, true)) {
11621
                    /**
11622
                     * @psalm-suppress ImpureFunctionCall - is is only a warning
11623
                     */
11624
                    \trigger_error('UTF8::strtoupper() without intl for special language: ' . $lang, \E_USER_WARNING);
11625
11626
                    $language_code = 'Any-Upper';
11627
                }
11628
11629
                /** @noinspection PhpComposerExtensionStubsInspection */
11630
                /** @noinspection UnnecessaryCastingInspection */
11631 2
                return (string) \transliterator_transliterate($language_code, $str);
11632
            }
11633
11634
            /**
11635
             * @psalm-suppress ImpureFunctionCall - is is only a warning
11636
             */
11637
            \trigger_error('UTF8::strtolower() without intl cannot handle the "lang"-parameter: ' . $lang, \E_USER_WARNING);
11638
        }
11639
11640
        // always fallback via symfony polyfill
11641 10
        return \mb_strtoupper($str, $encoding);
11642
    }
11643
11644
    /**
11645
     * Translate characters or replace sub-strings.
11646
     *
11647
     * EXAMPLE:
11648
     * <code>
11649
     * $array = [
11650
     *     'Hello'   => '○●◎',
11651
     *     '中文空白' => 'earth',
11652
     * ];
11653
     * UTF8::strtr('Hello 中文空白', $array); // '○●◎ earth'
11654
     * </code>
11655
     *
11656
     * @see http://php.net/manual/en/function.strtr.php
11657
     *
11658
     * @param string          $str  <p>The string being translated.</p>
11659
     * @param string|string[] $from <p>The string replacing from.</p>
11660
     * @param string|string[] $to   [optional] <p>The string being translated to to.</p>
11661
     *
11662
     * @psalm-pure
11663
     *
11664
     * @return string
11665
     *                <p>This function returns a copy of str, translating all occurrences of each character in "from"
11666
     *                to the corresponding character in "to".</p>
11667
     */
11668 2
    public static function strtr(string $str, $from, $to = ''): string
11669
    {
11670 2
        if ($str === '') {
11671
            return '';
11672
        }
11673
11674 2
        if ($from === $to) {
11675
            return $str;
11676
        }
11677
11678 2
        if ($to !== '') {
11679 2
            if (!\is_array($from)) {
11680 2
                $from = self::str_split($from);
11681
            }
11682
11683 2
            if (!\is_array($to)) {
11684 2
                $to = self::str_split($to);
11685
            }
11686
11687 2
            $count_from = \count($from);
11688 2
            $count_to = \count($to);
11689
11690 2
            if ($count_from !== $count_to) {
11691 2
                if ($count_from > $count_to) {
11692 2
                    $from = \array_slice($from, 0, $count_to);
11693 2
                } elseif ($count_from < $count_to) {
11694 2
                    $to = \array_slice($to, 0, $count_from);
11695
                }
11696
            }
11697
11698 2
            $from = \array_combine($from, $to);
11699
            /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
11700 2
            if ($from === false) {
11701
                throw new \InvalidArgumentException('The number of elements for each array isn\'t equal or the arrays are empty: (from: ' . \print_r($from, true) . ' | to: ' . \print_r($to, true) . ')');
11702
            }
11703
        }
11704
11705 2
        if (\is_string($from)) {
11706 2
            return \str_replace($from, $to, $str);
0 ignored issues
show
Bug introduced by
It seems like $to can also be of type array<mixed,array> and array<mixed,string[]>; however, parameter $replace of str_replace() does only seem to accept string|string[], maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

11706
            return \str_replace($from, /** @scrutinizer ignore-type */ $to, $str);
Loading history...
11707
        }
11708
11709 2
        return \strtr($str, $from);
11710
    }
11711
11712
    /**
11713
     * Return the width of a string.
11714
     *
11715
     * INFO: use UTF8::strlen() for the byte-length
11716
     *
11717
     * EXAMPLE: <code>UTF8::strwidth("Iñtërnâtiôn\xE9àlizætiøn")); // 21</code>
11718
     *
11719
     * @param string $str        <p>The input string.</p>
11720
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
11721
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
11722
     *
11723
     * @psalm-pure
11724
     *
11725
     * @return int
11726
     */
11727 2
    public static function strwidth(
11728
        string $str,
11729
        string $encoding = 'UTF-8',
11730
        bool $clean_utf8 = false
11731
    ): int {
11732 2
        if ($str === '') {
11733 2
            return 0;
11734
        }
11735
11736 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
11737 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11738
        }
11739
11740 2
        if ($clean_utf8) {
11741
            // iconv and mbstring are not tolerant to invalid encoding
11742
            // further, their behaviour is inconsistent with that of PHP's substr
11743 2
            $str = self::clean($str);
11744
        }
11745
11746
        //
11747
        // fallback via mbstring
11748
        //
11749
11750 2
        if (self::$SUPPORT['mbstring'] === true) {
11751 2
            if ($encoding === 'UTF-8') {
11752 2
                return \mb_strwidth($str);
11753
            }
11754
11755
            return \mb_strwidth($str, $encoding);
11756
        }
11757
11758
        //
11759
        // fallback via vanilla php
11760
        //
11761
11762
        if ($encoding !== 'UTF-8') {
11763
            $str = self::encode('UTF-8', $str, false, $encoding);
11764
        }
11765
11766
        $wide = 0;
11767
        $str = (string) \preg_replace('/[\x{1100}-\x{115F}\x{2329}\x{232A}\x{2E80}-\x{303E}\x{3040}-\x{A4CF}\x{AC00}-\x{D7A3}\x{F900}-\x{FAFF}\x{FE10}-\x{FE19}\x{FE30}-\x{FE6F}\x{FF00}-\x{FF60}\x{FFE0}-\x{FFE6}\x{20000}-\x{2FFFD}\x{30000}-\x{3FFFD}]/u', '', $str, -1, $wide);
11768
11769
        return ($wide << 1) + (int) self::strlen($str);
11770
    }
11771
11772
    /**
11773
     * Get part of a string.
11774
     *
11775
     * EXAMPLE: <code>UTF8::substr('中文空白', 1, 2); // '文空'</code>
11776
     *
11777
     * @see http://php.net/manual/en/function.mb-substr.php
11778
     *
11779
     * @param string $str        <p>The string being checked.</p>
11780
     * @param int    $offset     <p>The first position used in str.</p>
11781
     * @param int    $length     [optional] <p>The maximum length of the returned string.</p>
11782
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
11783
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
11784
     *
11785
     * @psalm-pure
11786
     *
11787
     * @return false|string
11788
     *                      The portion of <i>str</i> specified by the <i>offset</i> and
11789
     *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
11790
     *                      characters long, <b>FALSE</b> will be returned.
11791
     */
11792 172
    public static function substr(
11793
        string $str,
11794
        int $offset = 0,
11795
        int $length = null,
11796
        string $encoding = 'UTF-8',
11797
        bool $clean_utf8 = false
11798
    ) {
11799
        // empty string
11800 172
        if ($str === '' || $length === 0) {
11801 8
            return '';
11802
        }
11803
11804 168
        if ($clean_utf8) {
11805
            // iconv and mbstring are not tolerant to invalid encoding
11806
            // further, their behaviour is inconsistent with that of PHP's substr
11807 2
            $str = self::clean($str);
11808
        }
11809
11810
        // whole string
11811 168
        if (!$offset && $length === null) {
11812 7
            return $str;
11813
        }
11814
11815 163
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
11816 19
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11817
        }
11818
11819
        //
11820
        // fallback via mbstring
11821
        //
11822
11823 163
        if (self::$SUPPORT['mbstring'] === true && $encoding === 'UTF-8') {
11824 161
            if ($length === null) {
11825 64
                return \mb_substr($str, $offset);
11826
            }
11827
11828 102
            return \mb_substr($str, $offset, $length);
11829
        }
11830
11831
        //
11832
        // fallback for binary || ascii only
11833
        //
11834
11835
        if (
11836 4
            $encoding === 'CP850'
11837
            ||
11838 4
            $encoding === 'ASCII'
11839
        ) {
11840
            if ($length === null) {
11841
                return \substr($str, $offset);
11842
            }
11843
11844
            return \substr($str, $offset, $length);
11845
        }
11846
11847
        // otherwise we need the string-length
11848 4
        $str_length = 0;
11849 4
        if ($offset || $length === null) {
11850 4
            $str_length = self::strlen($str, $encoding);
11851
        }
11852
11853
        // e.g.: invalid chars + mbstring not installed
11854 4
        if ($str_length === false) {
11855
            return false;
11856
        }
11857
11858
        // empty string
11859 4
        if ($offset === $str_length && !$length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type integer|null is loosely compared to false; this is ambiguous if the integer can be 0. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
11860
            return '';
11861
        }
11862
11863
        // impossible
11864 4
        if ($offset && $offset > $str_length) {
11865
            return '';
11866
        }
11867
11868 4
        if ($length === null) {
11869 4
            $length = (int) $str_length;
11870
        } else {
11871 2
            $length = (int) $length;
11872
        }
11873
11874
        if (
11875 4
            $encoding !== 'UTF-8'
11876
            &&
11877 4
            self::$SUPPORT['mbstring'] === false
11878
        ) {
11879
            /**
11880
             * @psalm-suppress ImpureFunctionCall - is is only a warning
11881
             */
11882 2
            \trigger_error('UTF8::substr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
11883
        }
11884
11885
        //
11886
        // fallback via intl
11887
        //
11888
11889
        if (
11890 4
            $encoding === 'UTF-8' // INFO: "grapheme_substr()" can't handle other encodings
11891
            &&
11892 4
            $offset >= 0 // grapheme_substr() can't handle negative offset
11893
            &&
11894 4
            self::$SUPPORT['intl'] === true
11895
        ) {
11896
            $return_tmp = \grapheme_substr($str, $offset, $length);
11897
            if ($return_tmp !== false) {
11898
                return $return_tmp;
11899
            }
11900
        }
11901
11902
        //
11903
        // fallback via iconv
11904
        //
11905
11906
        if (
11907 4
            $length >= 0 // "iconv_substr()" can't handle negative length
11908
            &&
11909 4
            self::$SUPPORT['iconv'] === true
11910
        ) {
11911
            $return_tmp = \iconv_substr($str, $offset, $length);
11912
            if ($return_tmp !== false) {
11913
                return $return_tmp;
11914
            }
11915
        }
11916
11917
        //
11918
        // fallback for ascii only
11919
        //
11920
11921 4
        if (ASCII::is_ascii($str)) {
11922
            return \substr($str, $offset, $length);
11923
        }
11924
11925
        //
11926
        // fallback via vanilla php
11927
        //
11928
11929
        // split to array, and remove invalid characters
11930 4
        $array = self::str_split($str);
11931
11932
        // extract relevant part, and join to make sting again
11933 4
        return \implode('', \array_slice($array, $offset, $length));
11934
    }
11935
11936
    /**
11937
     * Binary-safe comparison of two strings from an offset, up to a length of characters.
11938
     *
11939
     * EXAMPLE: <code>
11940
     * UTF8::substr_compare("○●◎\r", '●◎', 0, 2); // -1
11941
     * UTF8::substr_compare("○●◎\r", '◎●', 1, 2); // 1
11942
     * UTF8::substr_compare("○●◎\r", '●◎', 1, 2); // 0
11943
     * </code>
11944
     *
11945
     * @param string   $str1               <p>The main string being compared.</p>
11946
     * @param string   $str2               <p>The secondary string being compared.</p>
11947
     * @param int      $offset             [optional] <p>The start position for the comparison. If negative, it starts
11948
     *                                     counting from the end of the string.</p>
11949
     * @param int|null $length             [optional] <p>The length of the comparison. The default value is the largest
11950
     *                                     of the length of the str compared to the length of main_str less the
11951
     *                                     offset.</p>
11952
     * @param bool     $case_insensitivity [optional] <p>If case_insensitivity is TRUE, comparison is case
11953
     *                                     insensitive.</p>
11954
     * @param string   $encoding           [optional] <p>Set the charset for e.g. "mb_" function</p>
11955
     *
11956
     * @psalm-pure
11957
     *
11958
     * @return int
11959
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
11960
     *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
11961
     *             <strong>0</strong> if they are equal
11962
     */
11963 2
    public static function substr_compare(
11964
        string $str1,
11965
        string $str2,
11966
        int $offset = 0,
11967
        int $length = null,
11968
        bool $case_insensitivity = false,
11969
        string $encoding = 'UTF-8'
11970
    ): int {
11971
        if (
11972 2
            $offset !== 0
11973
            ||
11974 2
            $length !== null
11975
        ) {
11976 2
            if ($encoding === 'UTF-8') {
11977 2
                if ($length === null) {
11978 2
                    $str1 = (string) \mb_substr($str1, $offset);
11979
                } else {
11980 2
                    $str1 = (string) \mb_substr($str1, $offset, $length);
11981
                }
11982 2
                $str2 = (string) \mb_substr($str2, 0, (int) self::strlen($str1));
11983
            } else {
11984
                $encoding = self::normalize_encoding($encoding, 'UTF-8');
11985
11986
                $str1 = (string) self::substr($str1, $offset, $length, $encoding);
11987
                $str2 = (string) self::substr($str2, 0, (int) self::strlen($str1), $encoding);
11988
            }
11989
        }
11990
11991 2
        if ($case_insensitivity) {
11992 2
            return self::strcasecmp($str1, $str2, $encoding);
11993
        }
11994
11995 2
        return self::strcmp($str1, $str2);
11996
    }
11997
11998
    /**
11999
     * Count the number of substring occurrences.
12000
     *
12001
     * EXAMPLE: <code>UTF8::substr_count('中文空白', '文空', 1, 2); // 1</code>
12002
     *
12003
     * @see http://php.net/manual/en/function.substr-count.php
12004
     *
12005
     * @param string $haystack   <p>The string to search in.</p>
12006
     * @param string $needle     <p>The substring to search for.</p>
12007
     * @param int    $offset     [optional] <p>The offset where to start counting.</p>
12008
     * @param int    $length     [optional] <p>
12009
     *                           The maximum length after the specified offset to search for the
12010
     *                           substring. It outputs a warning if the offset plus the length is
12011
     *                           greater than the haystack length.
12012
     *                           </p>
12013
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
12014
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
12015
     *
12016
     * @psalm-pure
12017
     *
12018
     * @return false|int
12019
     *                   <p>This functions returns an integer or false if there isn't a string.</p>
12020
     */
12021 5
    public static function substr_count(
12022
        string $haystack,
12023
        string $needle,
12024
        int $offset = 0,
12025
        int $length = null,
12026
        string $encoding = 'UTF-8',
12027
        bool $clean_utf8 = false
12028
    ) {
12029 5
        if ($haystack === '' || $needle === '') {
12030 2
            return false;
12031
        }
12032
12033 5
        if ($length === 0) {
12034 2
            return 0;
12035
        }
12036
12037 5
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
12038 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
12039
        }
12040
12041 5
        if ($clean_utf8) {
12042
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
12043
            // if invalid characters are found in $haystack before $needle
12044
            $needle = self::clean($needle);
12045
            $haystack = self::clean($haystack);
12046
        }
12047
12048 5
        if ($offset || $length > 0) {
12049 2
            if ($length === null) {
12050 2
                $length_tmp = self::strlen($haystack, $encoding);
12051 2
                if ($length_tmp === false) {
12052
                    return false;
12053
                }
12054 2
                $length = (int) $length_tmp;
12055
            }
12056
12057 2
            if ($encoding === 'UTF-8') {
12058 2
                $haystack = (string) \mb_substr($haystack, $offset, $length);
12059
            } else {
12060 2
                $haystack = (string) \mb_substr($haystack, $offset, $length, $encoding);
12061
            }
12062
        }
12063
12064
        if (
12065 5
            $encoding !== 'UTF-8'
12066
            &&
12067 5
            self::$SUPPORT['mbstring'] === false
12068
        ) {
12069
            /**
12070
             * @psalm-suppress ImpureFunctionCall - is is only a warning
12071
             */
12072
            \trigger_error('UTF8::substr_count() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
12073
        }
12074
12075 5
        if (self::$SUPPORT['mbstring'] === true) {
12076 5
            if ($encoding === 'UTF-8') {
12077 5
                return \mb_substr_count($haystack, $needle);
12078
            }
12079
12080 2
            return \mb_substr_count($haystack, $needle, $encoding);
12081
        }
12082
12083
        \preg_match_all('/' . \preg_quote($needle, '/') . '/us', $haystack, $matches, \PREG_SET_ORDER);
12084
12085
        return \count($matches);
12086
    }
12087
12088
    /**
12089
     * Count the number of substring occurrences.
12090
     *
12091
     * @param string $haystack <p>
12092
     *                         The string being checked.
12093
     *                         </p>
12094
     * @param string $needle   <p>
12095
     *                         The string being found.
12096
     *                         </p>
12097
     * @param int    $offset   [optional] <p>
12098
     *                         The offset where to start counting
12099
     *                         </p>
12100
     * @param int    $length   [optional] <p>
12101
     *                         The maximum length after the specified offset to search for the
12102
     *                         substring. It outputs a warning if the offset plus the length is
12103
     *                         greater than the haystack length.
12104
     *                         </p>
12105
     *
12106
     * @psalm-pure
12107
     *
12108
     * @return false|int
12109
     *                   <p>The number of times the
12110
     *                   needle substring occurs in the
12111
     *                   haystack string.</p>
12112
     */
12113 4
    public static function substr_count_in_byte(
12114
        string $haystack,
12115
        string $needle,
12116
        int $offset = 0,
12117
        int $length = null
12118
    ) {
12119 4
        if ($haystack === '' || $needle === '') {
12120 1
            return 0;
12121
        }
12122
12123
        if (
12124 3
            ($offset || $length !== null)
12125
            &&
12126 3
            self::$SUPPORT['mbstring_func_overload'] === true
12127
        ) {
12128
            if ($length === null) {
12129
                $length_tmp = self::strlen($haystack);
12130
                if ($length_tmp === false) {
12131
                    return false;
12132
                }
12133
                $length = (int) $length_tmp;
12134
            }
12135
12136
            if (
12137
                (
12138
                    $length !== 0
12139
                    &&
12140
                    $offset !== 0
12141
                )
12142
                &&
12143
                ($length + $offset) <= 0
12144
                &&
12145
                !Bootup::is_php('7.1') // output from "substr_count()" have changed in PHP 7.1
12146
            ) {
12147
                return false;
12148
            }
12149
12150
            /** @var false|string $haystack_tmp - needed for PhpStan (stubs error) */
12151
            $haystack_tmp = \substr($haystack, $offset, $length);
12152
            if ($haystack_tmp === false) {
12153
                $haystack_tmp = '';
12154
            }
12155
            $haystack = (string) $haystack_tmp;
12156
        }
12157
12158 3
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
12159
            // "mb_" is available if overload is used, so use it ...
12160
            return \mb_substr_count($haystack, $needle, 'CP850'); // 8-BIT
12161
        }
12162
12163 3
        if ($length === null) {
12164 3
            return \substr_count($haystack, $needle, $offset);
12165
        }
12166
12167
        return \substr_count($haystack, $needle, $offset, $length);
12168
    }
12169
12170
    /**
12171
     * Returns the number of occurrences of $substring in the given string.
12172
     * By default, the comparison is case-sensitive, but can be made insensitive
12173
     * by setting $case_sensitive to false.
12174
     *
12175
     * @param string $str            <p>The input string.</p>
12176
     * @param string $substring      <p>The substring to search for.</p>
12177
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
12178
     * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
12179
     *
12180
     * @psalm-pure
12181
     *
12182
     * @return int
12183
     */
12184 15
    public static function substr_count_simple(
12185
        string $str,
12186
        string $substring,
12187
        bool $case_sensitive = true,
12188
        string $encoding = 'UTF-8'
12189
    ): int {
12190 15
        if ($str === '' || $substring === '') {
12191 2
            return 0;
12192
        }
12193
12194 13
        if ($encoding === 'UTF-8') {
12195 7
            if ($case_sensitive) {
12196
                return (int) \mb_substr_count($str, $substring);
12197
            }
12198
12199 7
            return (int) \mb_substr_count(
12200 7
                \mb_strtoupper($str),
12201 7
                \mb_strtoupper($substring)
12202
            );
12203
        }
12204
12205 6
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
12206
12207 6
        if ($case_sensitive) {
12208 3
            return (int) \mb_substr_count($str, $substring, $encoding);
12209
        }
12210
12211 3
        return (int) \mb_substr_count(
12212 3
            self::strtocasefold($str, true, false, $encoding, null, false),
12213 3
            self::strtocasefold($substring, true, false, $encoding, null, false),
12214 3
            $encoding
12215
        );
12216
    }
12217
12218
    /**
12219
     * Removes a prefix ($needle) from the beginning of the string ($haystack), case-insensitive.
12220
     *
12221
     * EXMAPLE: <code>
12222
     * UTF8::substr_ileft('ΚόσμεMiddleEnd', 'Κόσμε'); // 'MiddleEnd'
12223
     * UTF8::substr_ileft('ΚόσμεMiddleEnd', 'κόσμε'); // 'MiddleEnd'
12224
     * </code>
12225
     *
12226
     * @param string $haystack <p>The string to search in.</p>
12227
     * @param string $needle   <p>The substring to search for.</p>
12228
     *
12229
     * @psalm-pure
12230
     *
12231
     * @return string
12232
     *                <p>Return the sub-string.</p>
12233
     */
12234 2
    public static function substr_ileft(string $haystack, string $needle): string
12235
    {
12236 2
        if ($haystack === '') {
12237 2
            return '';
12238
        }
12239
12240 2
        if ($needle === '') {
12241 2
            return $haystack;
12242
        }
12243
12244 2
        if (self::str_istarts_with($haystack, $needle)) {
12245 2
            $haystack = (string) \mb_substr($haystack, (int) self::strlen($needle));
12246
        }
12247
12248 2
        return $haystack;
12249
    }
12250
12251
    /**
12252
     * Get part of a string process in bytes.
12253
     *
12254
     * @param string $str    <p>The string being checked.</p>
12255
     * @param int    $offset <p>The first position used in str.</p>
12256
     * @param int    $length [optional] <p>The maximum length of the returned string.</p>
12257
     *
12258
     * @psalm-pure
12259
     *
12260
     * @return false|string
12261
     *                      The portion of <i>str</i> specified by the <i>offset</i> and
12262
     *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
12263
     *                      characters long, <b>FALSE</b> will be returned.
12264
     */
12265 1
    public static function substr_in_byte(string $str, int $offset = 0, int $length = null)
12266
    {
12267
        // empty string
12268 1
        if ($str === '' || $length === 0) {
12269
            return '';
12270
        }
12271
12272
        // whole string
12273 1
        if (!$offset && $length === null) {
12274
            return $str;
12275
        }
12276
12277 1
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
12278
            // "mb_" is available if overload is used, so use it ...
12279
            return \mb_substr($str, $offset, $length, 'CP850'); // 8-BIT
12280
        }
12281
12282 1
        return \substr($str, $offset, $length ?? 2147483647);
12283
    }
12284
12285
    /**
12286
     * Removes a suffix ($needle) from the end of the string ($haystack), case-insensitive.
12287
     *
12288
     * EXAMPLE: <code>
12289
     * UTF8::substr_iright('BeginMiddleΚόσμε', 'Κόσμε'); // 'BeginMiddle'
12290
     * UTF8::substr_iright('BeginMiddleΚόσμε', 'κόσμε'); // 'BeginMiddle'
12291
     * </code>
12292
     *
12293
     * @param string $haystack <p>The string to search in.</p>
12294
     * @param string $needle   <p>The substring to search for.</p>
12295
     *
12296
     * @psalm-pure
12297
     *
12298
     * @return string
12299
     *                <p>Return the sub-string.<p>
12300
     */
12301 2
    public static function substr_iright(string $haystack, string $needle): string
12302
    {
12303 2
        if ($haystack === '') {
12304 2
            return '';
12305
        }
12306
12307 2
        if ($needle === '') {
12308 2
            return $haystack;
12309
        }
12310
12311 2
        if (self::str_iends_with($haystack, $needle)) {
12312 2
            $haystack = (string) \mb_substr($haystack, 0, (int) self::strlen($haystack) - (int) self::strlen($needle));
12313
        }
12314
12315 2
        return $haystack;
12316
    }
12317
12318
    /**
12319
     * Removes a prefix ($needle) from the beginning of the string ($haystack).
12320
     *
12321
     * EXAMPLE: <code>
12322
     * UTF8::substr_left('ΚόσμεMiddleEnd', 'Κόσμε'); // 'MiddleEnd'
12323
     * UTF8::substr_left('ΚόσμεMiddleEnd', 'κόσμε'); // 'ΚόσμεMiddleEnd'
12324
     * </code>
12325
     *
12326
     * @param string $haystack <p>The string to search in.</p>
12327
     * @param string $needle   <p>The substring to search for.</p>
12328
     *
12329
     * @psalm-pure
12330
     *
12331
     * @return string
12332
     *                <p>Return the sub-string.</p>
12333
     */
12334 2
    public static function substr_left(string $haystack, string $needle): string
12335
    {
12336 2
        if ($haystack === '') {
12337 2
            return '';
12338
        }
12339
12340 2
        if ($needle === '') {
12341 2
            return $haystack;
12342
        }
12343
12344 2
        if (self::str_starts_with($haystack, $needle)) {
12345 2
            $haystack = (string) \mb_substr($haystack, (int) self::strlen($needle));
12346
        }
12347
12348 2
        return $haystack;
12349
    }
12350
12351
    /**
12352
     * Replace text within a portion of a string.
12353
     *
12354
     * EXAMPLE: <code>UTF8::substr_replace(array('Iñtërnâtiônàlizætiøn', 'foo'), 'æ', 1); // array('Iæñtërnâtiônàlizætiøn', 'fæoo')</code>
12355
     *
12356
     * source: https://gist.github.com/stemar/8287074
12357
     *
12358
     * @param string|string[] $str         <p>The input string or an array of stings.</p>
12359
     * @param string|string[] $replacement <p>The replacement string or an array of stings.</p>
12360
     * @param int|int[]       $offset      <p>
12361
     *                                     If start is positive, the replacing will begin at the start'th offset
12362
     *                                     into string.
12363
     *                                     <br><br>
12364
     *                                     If start is negative, the replacing will begin at the start'th character
12365
     *                                     from the end of string.
12366
     *                                     </p>
12367
     * @param int|int[]|null  $length      [optional] <p>If given and is positive, it represents the length of the
12368
     *                                     portion of string which is to be replaced. If it is negative, it
12369
     *                                     represents the number of characters from the end of string at which to
12370
     *                                     stop replacing. If it is not given, then it will default to strlen(
12371
     *                                     string ); i.e. end the replacing at the end of string. Of course, if
12372
     *                                     length is zero then this function will have the effect of inserting
12373
     *                                     replacement into string at the given start offset.</p>
12374
     * @param string          $encoding    [optional] <p>Set the charset for e.g. "mb_" function</p>
12375
     *
12376
     * @psalm-pure
12377
     *
12378
     * @return string|string[]
12379
     *                         <p>The result string is returned. If string is an array then array is returned.</p>
12380
     */
12381 10
    public static function substr_replace(
12382
        $str,
12383
        $replacement,
12384
        $offset,
12385
        $length = null,
12386
        string $encoding = 'UTF-8'
12387
    ) {
12388 10
        if (\is_array($str)) {
12389 1
            $num = \count($str);
12390
12391
            // the replacement
12392 1
            if (\is_array($replacement)) {
12393 1
                $replacement = \array_slice($replacement, 0, $num);
12394
            } else {
12395 1
                $replacement = \array_pad([$replacement], $num, $replacement);
12396
            }
12397
12398
            // the offset
12399 1
            if (\is_array($offset)) {
12400 1
                $offset = \array_slice($offset, 0, $num);
12401 1
                foreach ($offset as &$value_tmp) {
12402 1
                    $value_tmp = (int) $value_tmp === $value_tmp ? $value_tmp : 0;
12403
                }
12404 1
                unset($value_tmp);
12405
            } else {
12406 1
                $offset = \array_pad([$offset], $num, $offset);
12407
            }
12408
12409
            // the length
12410 1
            if ($length === null) {
12411 1
                $length = \array_fill(0, $num, 0);
12412 1
            } elseif (\is_array($length)) {
12413 1
                $length = \array_slice($length, 0, $num);
12414 1
                foreach ($length as &$value_tmp_V2) {
12415 1
                    $value_tmp_V2 = (int) $value_tmp_V2 === $value_tmp_V2 ? $value_tmp_V2 : $num;
12416
                }
12417 1
                unset($value_tmp_V2);
12418
            } else {
12419 1
                $length = \array_pad([$length], $num, $length);
12420
            }
12421
12422
            // recursive call
12423 1
            return \array_map([self::class, 'substr_replace'], $str, $replacement, $offset, $length);
12424
        }
12425
12426 10
        if (\is_array($replacement)) {
12427 1
            if ($replacement !== []) {
12428 1
                $replacement = $replacement[0];
12429
            } else {
12430 1
                $replacement = '';
12431
            }
12432
        }
12433
12434
        // init
12435 10
        $str = (string) $str;
12436 10
        $replacement = (string) $replacement;
12437
12438 10
        if (\is_array($length)) {
12439
            throw new \InvalidArgumentException('Parameter "$length" can only be an array, if "$str" is also an array.');
12440
        }
12441
12442 10
        if (\is_array($offset)) {
12443
            throw new \InvalidArgumentException('Parameter "$offset" can only be an array, if "$str" is also an array.');
12444
        }
12445
12446 10
        if ($str === '') {
12447 1
            return $replacement;
12448
        }
12449
12450 9
        if (self::$SUPPORT['mbstring'] === true) {
12451 9
            $string_length = (int) self::strlen($str, $encoding);
12452
12453 9
            if ($offset < 0) {
12454 1
                $offset = (int) \max(0, $string_length + $offset);
12455 9
            } elseif ($offset > $string_length) {
12456 1
                $offset = $string_length;
12457
            }
12458
12459 9
            if ($length !== null && $length < 0) {
12460 1
                $length = (int) \max(0, $string_length - $offset + $length);
12461 9
            } elseif ($length === null || $length > $string_length) {
12462 4
                $length = $string_length;
12463
            }
12464
12465
            /** @noinspection AdditionOperationOnArraysInspection */
12466 9
            if (($offset + $length) > $string_length) {
12467 4
                $length = $string_length - $offset;
12468
            }
12469
12470
            /** @noinspection AdditionOperationOnArraysInspection */
12471 9
            return ((string) \mb_substr($str, 0, $offset, $encoding)) .
12472 9
                   $replacement .
12473 9
                   ((string) \mb_substr($str, $offset + $length, $string_length - $offset - $length, $encoding));
12474
        }
12475
12476
        //
12477
        // fallback for ascii only
12478
        //
12479
12480
        if (ASCII::is_ascii($str)) {
12481
            return ($length === null) ?
12482
                \substr_replace($str, $replacement, $offset) :
12483
                \substr_replace($str, $replacement, $offset, $length);
12484
        }
12485
12486
        //
12487
        // fallback via vanilla php
12488
        //
12489
12490
        \preg_match_all('/./us', $str, $str_matches);
12491
        \preg_match_all('/./us', $replacement, $replacement_matches);
12492
12493
        if ($length === null) {
12494
            $length_tmp = self::strlen($str, $encoding);
12495
            if ($length_tmp === false) {
12496
                // e.g.: non mbstring support + invalid chars
12497
                return '';
12498
            }
12499
            $length = (int) $length_tmp;
12500
        }
12501
12502
        \array_splice($str_matches[0], $offset, $length, $replacement_matches[0]);
12503
12504
        return \implode('', $str_matches[0]);
12505
    }
12506
12507
    /**
12508
     * Removes a suffix ($needle) from the end of the string ($haystack).
12509
     *
12510
     * EXAMPLE: <code>
12511
     * UTF8::substr_right('BeginMiddleΚόσμε', 'Κόσμε'); // 'BeginMiddle'
12512
     * UTF8::substr_right('BeginMiddleΚόσμε', 'κόσμε'); // 'BeginMiddleΚόσμε'
12513
     * </code>
12514
     *
12515
     * @param string $haystack <p>The string to search in.</p>
12516
     * @param string $needle   <p>The substring to search for.</p>
12517
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
12518
     *
12519
     * @psalm-pure
12520
     *
12521
     * @return string
12522
     *                <p>Return the sub-string.</p>
12523
     */
12524 2
    public static function substr_right(
12525
        string $haystack,
12526
        string $needle,
12527
        string $encoding = 'UTF-8'
12528
    ): string {
12529 2
        if ($haystack === '') {
12530 2
            return '';
12531
        }
12532
12533 2
        if ($needle === '') {
12534 2
            return $haystack;
12535
        }
12536
12537
        if (
12538 2
            $encoding === 'UTF-8'
12539
            &&
12540 2
            \substr($haystack, -\strlen($needle)) === $needle
12541
        ) {
12542 2
            return (string) \mb_substr($haystack, 0, (int) \mb_strlen($haystack) - (int) \mb_strlen($needle));
12543
        }
12544
12545 2
        if (\substr($haystack, -\strlen($needle)) === $needle) {
12546
            return (string) self::substr(
12547
                $haystack,
12548
                0,
12549
                (int) self::strlen($haystack, $encoding) - (int) self::strlen($needle, $encoding),
12550
                $encoding
12551
            );
12552
        }
12553
12554 2
        return $haystack;
12555
    }
12556
12557
    /**
12558
     * Returns a case swapped version of the string.
12559
     *
12560
     * EXAMPLE: <code>UTF8::swapCase('déJÀ σσς iıII'); // 'DÉjà ΣΣΣ IIii'</code>
12561
     *
12562
     * @param string $str        <p>The input string.</p>
12563
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
12564
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
12565
     *
12566
     * @psalm-pure
12567
     *
12568
     * @return string
12569
     *                <p>Each character's case swapped.</p>
12570
     */
12571 6
    public static function swapCase(string $str, string $encoding = 'UTF-8', bool $clean_utf8 = false): string
12572
    {
12573 6
        if ($str === '') {
12574 1
            return '';
12575
        }
12576
12577 6
        if ($clean_utf8) {
12578
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
12579
            // if invalid characters are found in $haystack before $needle
12580 2
            $str = self::clean($str);
12581
        }
12582
12583 6
        if ($encoding === 'UTF-8') {
12584 4
            return (string) (\mb_strtolower($str) ^ \mb_strtoupper($str) ^ $str);
12585
        }
12586
12587 4
        return (string) (self::strtolower($str, $encoding) ^ self::strtoupper($str, $encoding) ^ $str);
12588
    }
12589
12590
    /**
12591
     * Checks whether symfony-polyfills are used.
12592
     *
12593
     * @psalm-pure
12594
     *
12595
     * @return bool
12596
     *              <strong>true</strong> if in use, <strong>false</strong> otherwise
12597
     */
12598
    public static function symfony_polyfill_used(): bool
12599
    {
12600
        // init
12601
        $return = false;
12602
12603
        $return_tmp = \extension_loaded('mbstring');
12604
        if (!$return_tmp && \function_exists('mb_strlen')) {
12605
            $return = true;
12606
        }
12607
12608
        $return_tmp = \extension_loaded('iconv');
12609
        if (!$return_tmp && \function_exists('iconv')) {
12610
            $return = true;
12611
        }
12612
12613
        return $return;
12614
    }
12615
12616
    /**
12617
     * @param string $str
12618
     * @param int    $tab_length
12619
     *
12620
     * @psalm-pure
12621
     *
12622
     * @return string
12623
     */
12624 6
    public static function tabs_to_spaces(string $str, int $tab_length = 4): string
12625
    {
12626 6
        if ($tab_length === 4) {
12627 3
            $spaces = '    ';
12628 3
        } elseif ($tab_length === 2) {
12629 1
            $spaces = '  ';
12630
        } else {
12631 2
            $spaces = \str_repeat(' ', $tab_length);
12632
        }
12633
12634 6
        return \str_replace("\t", $spaces, $str);
12635
    }
12636
12637
    /**
12638
     * Converts the first character of each word in the string to uppercase
12639
     * and all other chars to lowercase.
12640
     *
12641
     * @param string      $str                           <p>The input string.</p>
12642
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
12643
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
12644
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
12645
     *                                                   tr</p>
12646
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
12647
     *                                                   -> ß</p>
12648
     *
12649
     * @psalm-pure
12650
     *
12651
     * @return string
12652
     *                <p>A string with all characters of $str being title-cased.</p>
12653
     */
12654 5
    public static function titlecase(
12655
        string $str,
12656
        string $encoding = 'UTF-8',
12657
        bool $clean_utf8 = false,
12658
        string $lang = null,
12659
        bool $try_to_keep_the_string_length = false
12660
    ): string {
12661 5
        if ($clean_utf8) {
12662
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
12663
            // if invalid characters are found in $haystack before $needle
12664
            $str = self::clean($str);
12665
        }
12666
12667
        if (
12668 5
            $lang === null
12669
            &&
12670 5
            !$try_to_keep_the_string_length
12671
        ) {
12672 5
            if ($encoding === 'UTF-8') {
12673 3
                return \mb_convert_case($str, \MB_CASE_TITLE);
12674
            }
12675
12676 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
12677
12678 2
            return \mb_convert_case($str, \MB_CASE_TITLE, $encoding);
12679
        }
12680
12681
        return self::str_titleize(
12682
            $str,
12683
            null,
12684
            $encoding,
12685
            false,
12686
            $lang,
12687
            $try_to_keep_the_string_length,
12688
            false
12689
        );
12690
    }
12691
12692
    /**
12693
     * alias for "UTF8::to_ascii()"
12694
     *
12695
     * @param string $str
12696
     * @param string $subst_chr
12697
     * @param bool   $strict
12698
     *
12699
     * @psalm-pure
12700
     *
12701
     * @return string
12702
     *
12703
     * @see        UTF8::to_ascii()
12704
     * @deprecated <p>please use "UTF8::to_ascii()"</p>
12705
     */
12706 7
    public static function toAscii(
12707
        string $str,
12708
        string $subst_chr = '?',
12709
        bool $strict = false
12710
    ): string {
12711 7
        return self::to_ascii($str, $subst_chr, $strict);
12712
    }
12713
12714
    /**
12715
     * alias for "UTF8::to_iso8859()"
12716
     *
12717
     * @param string|string[] $str
12718
     *
12719
     * @psalm-pure
12720
     *
12721
     * @return string|string[]
12722
     *
12723
     * @see        UTF8::to_iso8859()
12724
     * @deprecated <p>please use "UTF8::to_iso8859()"</p>
12725
     */
12726 2
    public static function toIso8859($str)
12727
    {
12728 2
        return self::to_iso8859($str);
12729
    }
12730
12731
    /**
12732
     * alias for "UTF8::to_latin1()"
12733
     *
12734
     * @param string|string[] $str
12735
     *
12736
     * @psalm-pure
12737
     *
12738
     * @return string|string[]
12739
     *
12740
     * @see        UTF8::to_iso8859()
12741
     * @deprecated <p>please use "UTF8::to_iso8859()"</p>
12742
     */
12743 2
    public static function toLatin1($str)
12744
    {
12745 2
        return self::to_iso8859($str);
12746
    }
12747
12748
    /**
12749
     * alias for "UTF8::to_utf8()"
12750
     *
12751
     * @param string|string[] $str
12752
     *
12753
     * @psalm-pure
12754
     *
12755
     * @return string|string[]
12756
     *
12757
     * @see        UTF8::to_utf8()
12758
     * @deprecated <p>please use "UTF8::to_utf8()"</p>
12759
     */
12760 2
    public static function toUTF8($str)
12761
    {
12762 2
        return self::to_utf8($str);
12763
    }
12764
12765
    /**
12766
     * Convert a string into ASCII.
12767
     *
12768
     * EXAMPLE: <code>UTF8::to_ascii('déjà σσς iıii'); // 'deja sss iiii'</code>
12769
     *
12770
     * @param string $str     <p>The input string.</p>
12771
     * @param string $unknown [optional] <p>Character use if character unknown. (default is ?)</p>
12772
     * @param bool   $strict  [optional] <p>Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad
12773
     *                        performance</p>
12774
     *
12775
     * @psalm-pure
12776
     *
12777
     * @return string
12778
     */
12779 37
    public static function to_ascii(
12780
        string $str,
12781
        string $unknown = '?',
12782
        bool $strict = false
12783
    ): string {
12784 37
        return ASCII::to_transliterate($str, $unknown, $strict);
12785
    }
12786
12787
    /**
12788
     * @param bool|int|string $str
12789
     *
12790
     * @psalm-param bool|int|numeric-string $str
12791
     *
12792
     * @psalm-pure
12793
     *
12794
     * @return bool
12795
     */
12796 19
    public static function to_boolean($str): bool
12797
    {
12798
        // init
12799 19
        $str = (string) $str;
12800
12801 19
        if ($str === '') {
12802 2
            return false;
12803
        }
12804
12805
        // Info: http://php.net/manual/en/filter.filters.validate.php
12806
        $map = [
12807 17
            'true'  => true,
12808
            '1'     => true,
12809
            'on'    => true,
12810
            'yes'   => true,
12811
            'false' => false,
12812
            '0'     => false,
12813
            'off'   => false,
12814
            'no'    => false,
12815
        ];
12816
12817 17
        if (isset($map[$str])) {
12818 11
            return $map[$str];
12819
        }
12820
12821 6
        $key = \strtolower($str);
12822 6
        if (isset($map[$key])) {
12823 2
            return $map[$key];
12824
        }
12825
12826 4
        if (\is_numeric($str)) {
12827 2
            return ((float) $str + 0) > 0;
12828
        }
12829
12830 2
        return (bool) \trim($str);
12831
    }
12832
12833
    /**
12834
     * Convert given string to safe filename (and keep string case).
12835
     *
12836
     * @param string $str
12837
     * @param bool   $use_transliterate No transliteration, conversion etc. is done by default - unsafe characters are
12838
     *                                  simply replaced with hyphen.
12839
     * @param string $fallback_char
12840
     *
12841
     * @psalm-pure
12842
     *
12843
     * @return string
12844
     */
12845 1
    public static function to_filename(
12846
        string $str,
12847
        bool $use_transliterate = false,
12848
        string $fallback_char = '-'
12849
    ): string {
12850 1
        return ASCII::to_filename(
12851 1
            $str,
12852 1
            $use_transliterate,
12853 1
            $fallback_char
12854
        );
12855
    }
12856
12857
    /**
12858
     * Convert a string into "ISO-8859"-encoding (Latin-1).
12859
     *
12860
     * EXAMPLE: <code>UTF8::to_utf8(UTF8::to_iso8859('  -ABC-中文空白-  ')); // '  -ABC-????-  '</code>
12861
     *
12862
     * @param string|string[] $str
12863
     *
12864
     * @psalm-pure
12865
     *
12866
     * @return string|string[]
12867
     */
12868 8
    public static function to_iso8859($str)
12869
    {
12870 8
        if (\is_array($str)) {
12871 2
            foreach ($str as $k => &$v) {
12872 2
                $v = self::to_iso8859($v);
12873
            }
12874
12875 2
            return $str;
12876
        }
12877
12878 8
        $str = (string) $str;
12879 8
        if ($str === '') {
12880 2
            return '';
12881
        }
12882
12883 8
        return self::utf8_decode($str);
12884
    }
12885
12886
    /**
12887
     * alias for "UTF8::to_iso8859()"
12888
     *
12889
     * @param string|string[] $str
12890
     *
12891
     * @psalm-pure
12892
     *
12893
     * @return string|string[]
12894
     *
12895
     * @see        UTF8::to_iso8859()
12896
     * @deprecated <p>please use "UTF8::to_iso8859()"</p>
12897
     */
12898 2
    public static function to_latin1($str)
12899
    {
12900 2
        return self::to_iso8859($str);
12901
    }
12902
12903
    /**
12904
     * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
12905
     *
12906
     * <ul>
12907
     * <li>It decode UTF-8 codepoints and Unicode escape sequences.</li>
12908
     * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li>
12909
     * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
12910
     * case.</li>
12911
     * </ul>
12912
     *
12913
     * EXAMPLE: <code>UTF8::to_utf8(["\u0063\u0061\u0074"]); // array('cat')</code>
12914
     *
12915
     * @param string|string[] $str                        <p>Any string or array of strings.</p>
12916
     * @param bool            $decode_html_entity_to_utf8 <p>Set to true, if you need to decode html-entities.</p>
12917
     *
12918
     * @psalm-pure
12919
     *
12920
     * @return string|string[]
12921
     *                         <p>The UTF-8 encoded string</p>
12922
     *
12923
     * @template TToUtf8
12924
     * @psalm-param TToUtf8 $str
12925
     * @psalm-return TToUtf8
12926
     *
12927
     * @noinspection SuspiciousBinaryOperationInspection
12928
     */
12929 44
    public static function to_utf8($str, bool $decode_html_entity_to_utf8 = false)
12930
    {
12931 44
        if (\is_array($str)) {
12932 4
            foreach ($str as $k => &$v) {
12933 4
                $v = self::to_utf8_string($v, $decode_html_entity_to_utf8);
12934
            }
12935
12936 4
            return $str;
12937
        }
12938
12939
        /** @psalm-var TToUtf8 $str */
12940 44
        $str = self::to_utf8_string($str, $decode_html_entity_to_utf8);
12941
12942 44
        return $str;
12943
    }
12944
12945
    /**
12946
     * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
12947
     *
12948
     * <ul>
12949
     * <li>It decode UTF-8 codepoints and Unicode escape sequences.</li>
12950
     * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li>
12951
     * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
12952
     * case.</li>
12953
     * </ul>
12954
     *
12955
     * EXAMPLE: <code>UTF8::to_utf8_string("\u0063\u0061\u0074"); // 'cat'</code>
12956
     *
12957
     * @param string $str                        <p>Any string.</p>
12958
     * @param bool   $decode_html_entity_to_utf8 <p>Set to true, if you need to decode html-entities.</p>
12959
     *
12960
     * @psalm-pure
12961
     *
12962
     * @return string
12963
     *                <p>The UTF-8 encoded string</p>
12964
     *
12965
     * @noinspection SuspiciousBinaryOperationInspection
12966
     */
12967 44
    public static function to_utf8_string(string $str, bool $decode_html_entity_to_utf8 = false): string
12968
    {
12969 44
        if ($str === '') {
12970 7
            return $str;
12971
        }
12972
12973 44
        $max = \strlen($str);
12974 44
        $buf = '';
12975
12976 44
        for ($i = 0; $i < $max; ++$i) {
12977 44
            $c1 = $str[$i];
12978
12979 44
            if ($c1 >= "\xC0") { // should be converted to UTF8, if it's not UTF8 already
12980
12981 40
                if ($c1 <= "\xDF") { // looks like 2 bytes UTF8
12982
12983 36
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
12984
12985 36
                    if ($c2 >= "\x80" && $c2 <= "\xBF") { // yeah, almost sure it's UTF8 already
12986 22
                        $buf .= $c1 . $c2;
12987 22
                        ++$i;
12988
                    } else { // not valid UTF8 - convert it
12989 36
                        $buf .= self::to_utf8_convert_helper($c1);
12990
                    }
12991 37
                } elseif ($c1 >= "\xE0" && $c1 <= "\xEF") { // looks like 3 bytes UTF8
12992
12993 36
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
12994 36
                    $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
12995
12996 36
                    if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF") { // yeah, almost sure it's UTF8 already
12997 17
                        $buf .= $c1 . $c2 . $c3;
12998 17
                        $i += 2;
12999
                    } else { // not valid UTF8 - convert it
13000 36
                        $buf .= self::to_utf8_convert_helper($c1);
13001
                    }
13002 28
                } elseif ($c1 >= "\xF0" && $c1 <= "\xF7") { // looks like 4 bytes UTF8
13003
13004 28
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
13005 28
                    $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
13006 28
                    $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
13007
13008 28
                    if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF" && $c4 >= "\x80" && $c4 <= "\xBF") { // yeah, almost sure it's UTF8 already
13009 10
                        $buf .= $c1 . $c2 . $c3 . $c4;
13010 10
                        $i += 3;
13011
                    } else { // not valid UTF8 - convert it
13012 28
                        $buf .= self::to_utf8_convert_helper($c1);
13013
                    }
13014
                } else { // doesn't look like UTF8, but should be converted
13015
13016 40
                    $buf .= self::to_utf8_convert_helper($c1);
13017
                }
13018 41
            } elseif (($c1 & "\xC0") === "\x80") { // needs conversion
13019
13020 4
                $buf .= self::to_utf8_convert_helper($c1);
13021
            } else { // it doesn't need conversion
13022
13023 41
                $buf .= $c1;
13024
            }
13025
        }
13026
13027
        // decode unicode escape sequences + unicode surrogate pairs
13028 44
        $buf = \preg_replace_callback(
13029 44
            '/\\\\u([dD][89abAB][0-9a-fA-F]{2})\\\\u([dD][cdefCDEF][\da-fA-F]{2})|\\\\u([0-9a-fA-F]{4})/',
13030
            /**
13031
             * @param array $matches
13032
             *
13033
             * @psalm-pure
13034
             *
13035
             * @return string
13036
             */
13037
            static function (array $matches): string {
13038 13
                if (isset($matches[3])) {
13039 13
                    $cp = (int) \hexdec($matches[3]);
13040
                } else {
13041
                    // http://unicode.org/faq/utf_bom.html#utf16-4
13042 1
                    $cp = ((int) \hexdec($matches[1]) << 10)
13043 1
                          + (int) \hexdec($matches[2])
13044 1
                          + 0x10000
13045 1
                          - (0xD800 << 10)
13046 1
                          - 0xDC00;
13047
                }
13048
13049
                // https://github.com/php/php-src/blob/php-7.3.2/ext/standard/html.c#L471
13050
                //
13051
                // php_utf32_utf8(unsigned char *buf, unsigned k)
13052
13053 13
                if ($cp < 0x80) {
13054 8
                    return (string) self::chr($cp);
13055
                }
13056
13057 10
                if ($cp < 0xA0) {
13058
                    /** @noinspection UnnecessaryCastingInspection */
13059
                    return (string) self::chr(0xC0 | $cp >> 6) . (string) self::chr(0x80 | $cp & 0x3F);
13060
                }
13061
13062 10
                return self::decimal_to_chr($cp);
13063 44
            },
13064 44
            $buf
13065
        );
13066
13067 44
        if ($buf === null) {
13068
            return '';
13069
        }
13070
13071
        // decode UTF-8 codepoints
13072 44
        if ($decode_html_entity_to_utf8) {
13073 3
            $buf = self::html_entity_decode($buf);
13074
        }
13075
13076 44
        return $buf;
13077
    }
13078
13079
    /**
13080
     * Returns the given string as an integer, or null if the string isn't numeric.
13081
     *
13082
     * @param string $str
13083
     *
13084
     * @psalm-pure
13085
     *
13086
     * @return int|null
13087
     *                  <p>null if the string isn't numeric</p>
13088
     */
13089 1
    public static function to_int(string $str)
13090
    {
13091 1
        if (\is_numeric($str)) {
13092 1
            return (int) $str;
13093
        }
13094
13095 1
        return null;
13096
    }
13097
13098
    /**
13099
     * Returns the given input as string, or null if the input isn't int|float|string
13100
     * and do not implement the "__toString()" method.
13101
     *
13102
     * @param float|int|object|string|null $input
13103
     *
13104
     * @psalm-pure
13105
     *
13106
     * @return string|null
13107
     *                     <p>null if the input isn't int|float|string and has no "__toString()" method</p>
13108
     */
13109 1
    public static function to_string($input)
13110
    {
13111 1
        if ($input === null) {
13112
            return null;
13113
        }
13114
13115
        /** @var string $input_type - hack for psalm */
13116 1
        $input_type = \gettype($input);
13117
13118
        if (
13119 1
            $input_type === 'string'
13120
            ||
13121 1
            $input_type === 'integer'
13122
            ||
13123 1
            $input_type === 'float'
13124
            ||
13125 1
            $input_type === 'double'
13126
        ) {
13127 1
            return (string) $input;
13128
        }
13129
13130 1
        if ($input_type === 'object') {
13131
            /** @noinspection PhpSillyAssignmentInspection */
13132
            /** @var object $input - hack for psalm / phpstan */
13133 1
            $input = $input;
13134
            /** @noinspection NestedPositiveIfStatementsInspection */
13135
            /** @noinspection MissingOrEmptyGroupStatementInspection */
13136 1
            if (\method_exists($input, '__toString')) {
13137 1
                return (string) $input;
13138
            }
13139
        }
13140
13141 1
        return null;
13142
    }
13143
13144
    /**
13145
     * Strip whitespace or other characters from the beginning and end of a UTF-8 string.
13146
     *
13147
     * INFO: This is slower then "trim()"
13148
     *
13149
     * We can only use the original-function, if we use <= 7-Bit in the string / chars
13150
     * but the check for ASCII (7-Bit) cost more time, then we can safe here.
13151
     *
13152
     * EXAMPLE: <code>UTF8::trim('   -ABC-中文空白-  '); // '-ABC-中文空白-'</code>
13153
     *
13154
     * @param string      $str   <p>The string to be trimmed</p>
13155
     * @param string|null $chars [optional] <p>Optional characters to be stripped</p>
13156
     *
13157
     * @psalm-pure
13158
     *
13159
     * @return string
13160
     *                <p>The trimmed string.</p>
13161
     */
13162 56
    public static function trim(string $str = '', string $chars = null): string
13163
    {
13164 56
        if ($str === '') {
13165 9
            return '';
13166
        }
13167
13168 49
        if (self::$SUPPORT['mbstring'] === true) {
13169 49
            if ($chars) {
13170
                /** @noinspection PregQuoteUsageInspection */
13171 27
                $chars = \preg_quote($chars);
13172 27
                $pattern = "^[${chars}]+|[${chars}]+\$";
13173
            } else {
13174 22
                $pattern = '^[\\s]+|[\\s]+$';
13175
            }
13176
13177
            /** @noinspection PhpComposerExtensionStubsInspection */
13178 49
            return (string) \mb_ereg_replace($pattern, '', $str);
13179
        }
13180
13181 8
        if ($chars) {
13182
            $chars = \preg_quote($chars, '/');
13183
            $pattern = "^[${chars}]+|[${chars}]+\$";
13184
        } else {
13185 8
            $pattern = '^[\\s]+|[\\s]+$';
13186
        }
13187
13188 8
        return self::regex_replace($str, $pattern, '');
13189
    }
13190
13191
    /**
13192
     * Makes string's first char uppercase.
13193
     *
13194
     * EXAMPLE: <code>UTF8::ucfirst('ñtërnâtiônàlizætiøn foo'); // 'Ñtërnâtiônàlizætiøn foo'</code>
13195
     *
13196
     * @param string      $str                           <p>The input string.</p>
13197
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
13198
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
13199
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
13200
     *                                                   tr</p>
13201
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
13202
     *                                                   -> ß</p>
13203
     *
13204
     * @psalm-pure
13205
     *
13206
     * @return string
13207
     *                <p>The resulting string with with char uppercase.</p>
13208
     */
13209 69
    public static function ucfirst(
13210
        string $str,
13211
        string $encoding = 'UTF-8',
13212
        bool $clean_utf8 = false,
13213
        string $lang = null,
13214
        bool $try_to_keep_the_string_length = false
13215
    ): string {
13216 69
        if ($str === '') {
13217 3
            return '';
13218
        }
13219
13220 68
        if ($clean_utf8) {
13221
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
13222
            // if invalid characters are found in $haystack before $needle
13223 1
            $str = self::clean($str);
13224
        }
13225
13226 68
        $use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
13227
13228 68
        if ($encoding === 'UTF-8') {
13229 22
            $str_part_two = (string) \mb_substr($str, 1);
13230
13231 22
            if ($use_mb_functions) {
13232 22
                $str_part_one = \mb_strtoupper(
13233 22
                    (string) \mb_substr($str, 0, 1)
13234
                );
13235
            } else {
13236
                $str_part_one = self::strtoupper(
13237
                    (string) \mb_substr($str, 0, 1),
13238
                    $encoding,
13239
                    false,
13240
                    $lang,
13241 22
                    $try_to_keep_the_string_length
13242
                );
13243
            }
13244
        } else {
13245 47
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
13246
13247 47
            $str_part_two = (string) self::substr($str, 1, null, $encoding);
13248
13249 47
            if ($use_mb_functions) {
13250 47
                $str_part_one = \mb_strtoupper(
13251 47
                    (string) \mb_substr($str, 0, 1, $encoding),
13252 47
                    $encoding
13253
                );
13254
            } else {
13255
                $str_part_one = self::strtoupper(
13256
                    (string) self::substr($str, 0, 1, $encoding),
13257
                    $encoding,
13258
                    false,
13259
                    $lang,
13260
                    $try_to_keep_the_string_length
13261
                );
13262
            }
13263
        }
13264
13265 68
        return $str_part_one . $str_part_two;
13266
    }
13267
13268
    /**
13269
     * alias for "UTF8::ucfirst()"
13270
     *
13271
     * @param string $str
13272
     * @param string $encoding
13273
     * @param bool   $clean_utf8
13274
     *
13275
     * @psalm-pure
13276
     *
13277
     * @return string
13278
     *
13279
     * @see        UTF8::ucfirst()
13280
     * @deprecated <p>please use "UTF8::ucfirst()"</p>
13281
     */
13282 1
    public static function ucword(
13283
        string $str,
13284
        string $encoding = 'UTF-8',
13285
        bool $clean_utf8 = false
13286
    ): string {
13287 1
        return self::ucfirst($str, $encoding, $clean_utf8);
13288
    }
13289
13290
    /**
13291
     * Uppercase for all words in the string.
13292
     *
13293
     * EXAMPLE: <code>UTF8::ucwords('iñt ërn âTi ônà liz æti øn'); // 'Iñt Ërn ÂTi Ônà Liz Æti Øn'</code>
13294
     *
13295
     * @param string   $str        <p>The input string.</p>
13296
     * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
13297
     * @param string   $char_list  [optional] <p>Additional chars that contains to words and do not start a new
13298
     *                             word.</p>
13299
     * @param string   $encoding   [optional] <p>Set the charset.</p>
13300
     * @param bool     $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
13301
     *
13302
     * @psalm-pure
13303
     *
13304
     * @return string
13305
     */
13306 8
    public static function ucwords(
13307
        string $str,
13308
        array $exceptions = [],
13309
        string $char_list = '',
13310
        string $encoding = 'UTF-8',
13311
        bool $clean_utf8 = false
13312
    ): string {
13313 8
        if (!$str) {
13314 2
            return '';
13315
        }
13316
13317
        // INFO: mb_convert_case($str, MB_CASE_TITLE);
13318
        // -> MB_CASE_TITLE didn't only uppercase the first letter, it also lowercase all other letters
13319
13320 7
        if ($clean_utf8) {
13321
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
13322
            // if invalid characters are found in $haystack before $needle
13323 1
            $str = self::clean($str);
13324
        }
13325
13326 7
        $use_php_default_functions = !(bool) ($char_list . \implode('', $exceptions));
13327
13328
        if (
13329 7
            $use_php_default_functions
13330
            &&
13331 7
            ASCII::is_ascii($str)
13332
        ) {
13333
            return \ucwords($str);
13334
        }
13335
13336 7
        $words = self::str_to_words($str, $char_list);
13337 7
        $use_exceptions = $exceptions !== [];
13338
13339 7
        $words_str = '';
13340 7
        foreach ($words as &$word) {
13341 7
            if (!$word) {
13342 7
                continue;
13343
            }
13344
13345
            if (
13346 7
                !$use_exceptions
13347
                ||
13348 7
                !\in_array($word, $exceptions, true)
13349
            ) {
13350 7
                $words_str .= self::ucfirst($word, $encoding);
13351
            } else {
13352 7
                $words_str .= $word;
13353
            }
13354
        }
13355
13356 7
        return $words_str;
13357
    }
13358
13359
    /**
13360
     * Multi decode HTML entity + fix urlencoded-win1252-chars.
13361
     *
13362
     * EXAMPLE: <code>UTF8::urldecode('tes%20öäü%20\u00edtest+test'); // 'tes öäü ítest test'</code>
13363
     *
13364
     * e.g:
13365
     * 'test+test'                     => 'test test'
13366
     * 'D&#252;sseldorf'               => 'Düsseldorf'
13367
     * 'D%FCsseldorf'                  => 'Düsseldorf'
13368
     * 'D&#xFC;sseldorf'               => 'Düsseldorf'
13369
     * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
13370
     * 'Düsseldorf'                   => 'Düsseldorf'
13371
     * 'D%C3%BCsseldorf'               => 'Düsseldorf'
13372
     * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
13373
     * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
13374
     *
13375
     * @param string $str          <p>The input string.</p>
13376
     * @param bool   $multi_decode <p>Decode as often as possible.</p>
13377
     *
13378
     * @psalm-pure
13379
     *
13380
     * @return string
13381
     */
13382 4
    public static function urldecode(string $str, bool $multi_decode = true): string
13383
    {
13384 4
        if ($str === '') {
13385 3
            return '';
13386
        }
13387
13388
        if (
13389 4
            \strpos($str, '&') === false
13390
            &&
13391 4
            \strpos($str, '%') === false
13392
            &&
13393 4
            \strpos($str, '+') === false
13394
            &&
13395 4
            \strpos($str, '\u') === false
13396
        ) {
13397 3
            return self::fix_simple_utf8($str);
13398
        }
13399
13400 4
        $str = self::urldecode_unicode_helper($str);
13401
13402 4
        if ($multi_decode) {
13403
            do {
13404 3
                $str_compare = $str;
13405
13406
                /**
13407
                 * @psalm-suppress PossiblyInvalidArgument
13408
                 */
13409 3
                $str = self::fix_simple_utf8(
13410 3
                    \urldecode(
13411 3
                        self::html_entity_decode(
13412 3
                            self::to_utf8($str),
13413 3
                            \ENT_QUOTES | \ENT_HTML5
13414
                        )
13415
                    )
13416
                );
13417 3
            } while ($str_compare !== $str);
13418
        } else {
13419
            /**
13420
             * @psalm-suppress PossiblyInvalidArgument
13421
             */
13422 1
            $str = self::fix_simple_utf8(
13423 1
                \urldecode(
13424 1
                    self::html_entity_decode(
13425 1
                        self::to_utf8($str),
13426 1
                        \ENT_QUOTES | \ENT_HTML5
13427
                    )
13428
                )
13429
            );
13430
        }
13431
13432 4
        return $str;
13433
    }
13434
13435
    /**
13436
     * Return a array with "urlencoded"-win1252 -> UTF-8
13437
     *
13438
     * @psalm-pure
13439
     *
13440
     * @return string[]
13441
     *
13442
     * @deprecated <p>please use the "UTF8::urldecode()" function to decode a string</p>
13443
     */
13444 2
    public static function urldecode_fix_win1252_chars(): array
13445
    {
13446
        return [
13447 2
            '%20' => ' ',
13448
            '%21' => '!',
13449
            '%22' => '"',
13450
            '%23' => '#',
13451
            '%24' => '$',
13452
            '%25' => '%',
13453
            '%26' => '&',
13454
            '%27' => "'",
13455
            '%28' => '(',
13456
            '%29' => ')',
13457
            '%2A' => '*',
13458
            '%2B' => '+',
13459
            '%2C' => ',',
13460
            '%2D' => '-',
13461
            '%2E' => '.',
13462
            '%2F' => '/',
13463
            '%30' => '0',
13464
            '%31' => '1',
13465
            '%32' => '2',
13466
            '%33' => '3',
13467
            '%34' => '4',
13468
            '%35' => '5',
13469
            '%36' => '6',
13470
            '%37' => '7',
13471
            '%38' => '8',
13472
            '%39' => '9',
13473
            '%3A' => ':',
13474
            '%3B' => ';',
13475
            '%3C' => '<',
13476
            '%3D' => '=',
13477
            '%3E' => '>',
13478
            '%3F' => '?',
13479
            '%40' => '@',
13480
            '%41' => 'A',
13481
            '%42' => 'B',
13482
            '%43' => 'C',
13483
            '%44' => 'D',
13484
            '%45' => 'E',
13485
            '%46' => 'F',
13486
            '%47' => 'G',
13487
            '%48' => 'H',
13488
            '%49' => 'I',
13489
            '%4A' => 'J',
13490
            '%4B' => 'K',
13491
            '%4C' => 'L',
13492
            '%4D' => 'M',
13493
            '%4E' => 'N',
13494
            '%4F' => 'O',
13495
            '%50' => 'P',
13496
            '%51' => 'Q',
13497
            '%52' => 'R',
13498
            '%53' => 'S',
13499
            '%54' => 'T',
13500
            '%55' => 'U',
13501
            '%56' => 'V',
13502
            '%57' => 'W',
13503
            '%58' => 'X',
13504
            '%59' => 'Y',
13505
            '%5A' => 'Z',
13506
            '%5B' => '[',
13507
            '%5C' => '\\',
13508
            '%5D' => ']',
13509
            '%5E' => '^',
13510
            '%5F' => '_',
13511
            '%60' => '`',
13512
            '%61' => 'a',
13513
            '%62' => 'b',
13514
            '%63' => 'c',
13515
            '%64' => 'd',
13516
            '%65' => 'e',
13517
            '%66' => 'f',
13518
            '%67' => 'g',
13519
            '%68' => 'h',
13520
            '%69' => 'i',
13521
            '%6A' => 'j',
13522
            '%6B' => 'k',
13523
            '%6C' => 'l',
13524
            '%6D' => 'm',
13525
            '%6E' => 'n',
13526
            '%6F' => 'o',
13527
            '%70' => 'p',
13528
            '%71' => 'q',
13529
            '%72' => 'r',
13530
            '%73' => 's',
13531
            '%74' => 't',
13532
            '%75' => 'u',
13533
            '%76' => 'v',
13534
            '%77' => 'w',
13535
            '%78' => 'x',
13536
            '%79' => 'y',
13537
            '%7A' => 'z',
13538
            '%7B' => '{',
13539
            '%7C' => '|',
13540
            '%7D' => '}',
13541
            '%7E' => '~',
13542
            '%7F' => '',
13543
            '%80' => '`',
13544
            '%81' => '',
13545
            '%82' => '‚',
13546
            '%83' => 'ƒ',
13547
            '%84' => '„',
13548
            '%85' => '…',
13549
            '%86' => '†',
13550
            '%87' => '‡',
13551
            '%88' => 'ˆ',
13552
            '%89' => '‰',
13553
            '%8A' => 'Š',
13554
            '%8B' => '‹',
13555
            '%8C' => 'Œ',
13556
            '%8D' => '',
13557
            '%8E' => 'Ž',
13558
            '%8F' => '',
13559
            '%90' => '',
13560
            '%91' => '‘',
13561
            '%92' => '’',
13562
            '%93' => '“',
13563
            '%94' => '”',
13564
            '%95' => '•',
13565
            '%96' => '–',
13566
            '%97' => '—',
13567
            '%98' => '˜',
13568
            '%99' => '™',
13569
            '%9A' => 'š',
13570
            '%9B' => '›',
13571
            '%9C' => 'œ',
13572
            '%9D' => '',
13573
            '%9E' => 'ž',
13574
            '%9F' => 'Ÿ',
13575
            '%A0' => '',
13576
            '%A1' => '¡',
13577
            '%A2' => '¢',
13578
            '%A3' => '£',
13579
            '%A4' => '¤',
13580
            '%A5' => '¥',
13581
            '%A6' => '¦',
13582
            '%A7' => '§',
13583
            '%A8' => '¨',
13584
            '%A9' => '©',
13585
            '%AA' => 'ª',
13586
            '%AB' => '«',
13587
            '%AC' => '¬',
13588
            '%AD' => '',
13589
            '%AE' => '®',
13590
            '%AF' => '¯',
13591
            '%B0' => '°',
13592
            '%B1' => '±',
13593
            '%B2' => '²',
13594
            '%B3' => '³',
13595
            '%B4' => '´',
13596
            '%B5' => 'µ',
13597
            '%B6' => '¶',
13598
            '%B7' => '·',
13599
            '%B8' => '¸',
13600
            '%B9' => '¹',
13601
            '%BA' => 'º',
13602
            '%BB' => '»',
13603
            '%BC' => '¼',
13604
            '%BD' => '½',
13605
            '%BE' => '¾',
13606
            '%BF' => '¿',
13607
            '%C0' => 'À',
13608
            '%C1' => 'Á',
13609
            '%C2' => 'Â',
13610
            '%C3' => 'Ã',
13611
            '%C4' => 'Ä',
13612
            '%C5' => 'Å',
13613
            '%C6' => 'Æ',
13614
            '%C7' => 'Ç',
13615
            '%C8' => 'È',
13616
            '%C9' => 'É',
13617
            '%CA' => 'Ê',
13618
            '%CB' => 'Ë',
13619
            '%CC' => 'Ì',
13620
            '%CD' => 'Í',
13621
            '%CE' => 'Î',
13622
            '%CF' => 'Ï',
13623
            '%D0' => 'Ð',
13624
            '%D1' => 'Ñ',
13625
            '%D2' => 'Ò',
13626
            '%D3' => 'Ó',
13627
            '%D4' => 'Ô',
13628
            '%D5' => 'Õ',
13629
            '%D6' => 'Ö',
13630
            '%D7' => '×',
13631
            '%D8' => 'Ø',
13632
            '%D9' => 'Ù',
13633
            '%DA' => 'Ú',
13634
            '%DB' => 'Û',
13635
            '%DC' => 'Ü',
13636
            '%DD' => 'Ý',
13637
            '%DE' => 'Þ',
13638
            '%DF' => 'ß',
13639
            '%E0' => 'à',
13640
            '%E1' => 'á',
13641
            '%E2' => 'â',
13642
            '%E3' => 'ã',
13643
            '%E4' => 'ä',
13644
            '%E5' => 'å',
13645
            '%E6' => 'æ',
13646
            '%E7' => 'ç',
13647
            '%E8' => 'è',
13648
            '%E9' => 'é',
13649
            '%EA' => 'ê',
13650
            '%EB' => 'ë',
13651
            '%EC' => 'ì',
13652
            '%ED' => 'í',
13653
            '%EE' => 'î',
13654
            '%EF' => 'ï',
13655
            '%F0' => 'ð',
13656
            '%F1' => 'ñ',
13657
            '%F2' => 'ò',
13658
            '%F3' => 'ó',
13659
            '%F4' => 'ô',
13660
            '%F5' => 'õ',
13661
            '%F6' => 'ö',
13662
            '%F7' => '÷',
13663
            '%F8' => 'ø',
13664
            '%F9' => 'ù',
13665
            '%FA' => 'ú',
13666
            '%FB' => 'û',
13667
            '%FC' => 'ü',
13668
            '%FD' => 'ý',
13669
            '%FE' => 'þ',
13670
            '%FF' => 'ÿ',
13671
        ];
13672
    }
13673
13674
    /**
13675
     * Decodes a UTF-8 string to ISO-8859-1.
13676
     *
13677
     * EXAMPLE: <code>UTF8::encode('UTF-8', UTF8::utf8_decode('-ABC-中文空白-')); // '-ABC-????-'</code>
13678
     *
13679
     * @param string $str             <p>The input string.</p>
13680
     * @param bool   $keep_utf8_chars
13681
     *
13682
     * @psalm-pure
13683
     *
13684
     * @return string
13685
     *
13686
     * @noinspection SuspiciousBinaryOperationInspection
13687
     */
13688 14
    public static function utf8_decode(string $str, bool $keep_utf8_chars = false): string
13689
    {
13690 14
        if ($str === '') {
13691 6
            return '';
13692
        }
13693
13694
        // save for later comparision
13695 14
        $str_backup = $str;
13696 14
        $len = \strlen($str);
13697
13698 14
        if (self::$ORD === null) {
13699
            self::$ORD = self::getData('ord');
13700
        }
13701
13702 14
        if (self::$CHR === null) {
13703
            self::$CHR = self::getData('chr');
13704
        }
13705
13706 14
        $no_char_found = '?';
13707
        /** @noinspection ForeachInvariantsInspection */
13708 14
        for ($i = 0, $j = 0; $i < $len; ++$i, ++$j) {
13709 14
            switch ($str[$i] & "\xF0") {
13710 14
                case "\xC0":
13711 13
                case "\xD0":
13712 13
                    $c = (self::$ORD[$str[$i] & "\x1F"] << 6) | self::$ORD[$str[++$i] & "\x3F"];
13713 13
                    $str[$j] = $c < 256 ? self::$CHR[$c] : $no_char_found;
13714
13715 13
                    break;
13716
13717
                /** @noinspection PhpMissingBreakStatementInspection */
13718 13
                case "\xF0":
13719
                    ++$i;
13720
13721
                // no break
13722
13723 13
                case "\xE0":
13724 11
                    $str[$j] = $no_char_found;
13725 11
                    $i += 2;
13726
13727 11
                    break;
13728
13729
                default:
13730 12
                    $str[$j] = $str[$i];
13731
            }
13732
        }
13733
13734
        /** @var false|string $return - needed for PhpStan (stubs error) */
13735 14
        $return = \substr($str, 0, $j);
13736 14
        if ($return === false) {
13737
            $return = '';
13738
        }
13739
13740
        if (
13741 14
            $keep_utf8_chars
13742
            &&
13743 14
            (int) self::strlen($return) >= (int) self::strlen($str_backup)
13744
        ) {
13745 2
            return $str_backup;
13746
        }
13747
13748 14
        return $return;
13749
    }
13750
13751
    /**
13752
     * Encodes an ISO-8859-1 string to UTF-8.
13753
     *
13754
     * EXAMPLE: <code>UTF8::utf8_decode(UTF8::utf8_encode('-ABC-中文空白-')); // '-ABC-中文空白-'</code>
13755
     *
13756
     * @param string $str <p>The input string.</p>
13757
     *
13758
     * @psalm-pure
13759
     *
13760
     * @return string
13761
     */
13762 16
    public static function utf8_encode(string $str): string
13763
    {
13764 16
        if ($str === '') {
13765 14
            return '';
13766
        }
13767
13768
        /** @var false|string $str - the polyfill maybe return false */
13769 16
        $str = \utf8_encode($str);
0 ignored issues
show
Bug introduced by
It seems like $str can also be of type false; however, parameter $data of utf8_encode() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

13769
        $str = \utf8_encode(/** @scrutinizer ignore-type */ $str);
Loading history...
13770
13771
        /** @noinspection CallableParameterUseCaseInTypeContextInspection */
13772
        /** @psalm-suppress TypeDoesNotContainType */
13773 16
        if ($str === false) {
13774
            return '';
13775
        }
13776
13777 16
        return $str;
13778
    }
13779
13780
    /**
13781
     * fix -> utf8-win1252 chars
13782
     *
13783
     * @param string $str <p>The input string.</p>
13784
     *
13785
     * @psalm-pure
13786
     *
13787
     * @return string
13788
     *
13789
     * @deprecated <p>please use "UTF8::fix_simple_utf8()"</p>
13790
     */
13791 2
    public static function utf8_fix_win1252_chars(string $str): string
13792
    {
13793 2
        return self::fix_simple_utf8($str);
13794
    }
13795
13796
    /**
13797
     * Returns an array with all utf8 whitespace characters.
13798
     *
13799
     * @see http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
13800
     *
13801
     * @psalm-pure
13802
     *
13803
     * @return string[]
13804
     *                  An array with all known whitespace characters as values and the type of whitespace as keys
13805
     *                  as defined in above URL
13806
     */
13807 2
    public static function whitespace_table(): array
13808
    {
13809 2
        return self::$WHITESPACE_TABLE;
13810
    }
13811
13812
    /**
13813
     * Limit the number of words in a string.
13814
     *
13815
     * EXAMPLE: <code>UTF8::words_limit('fòô bàř fòô', 2, ''); // 'fòô bàř'</code>
13816
     *
13817
     * @param string $str        <p>The input string.</p>
13818
     * @param int    $limit      <p>The limit of words as integer.</p>
13819
     * @param string $str_add_on <p>Replacement for the striped string.</p>
13820
     *
13821
     * @psalm-pure
13822
     *
13823
     * @return string
13824
     */
13825 2
    public static function words_limit(
13826
        string $str,
13827
        int $limit = 100,
13828
        string $str_add_on = '…'
13829
    ): string {
13830 2
        if ($str === '' || $limit < 1) {
13831 2
            return '';
13832
        }
13833
13834 2
        \preg_match('/^\\s*+(?:[^\\s]++\\s*+){1,' . $limit . '}/u', $str, $matches);
13835
13836
        if (
13837 2
            !isset($matches[0])
13838
            ||
13839 2
            \mb_strlen($str) === (int) \mb_strlen($matches[0])
13840
        ) {
13841 2
            return $str;
13842
        }
13843
13844 2
        return \rtrim($matches[0]) . $str_add_on;
13845
    }
13846
13847
    /**
13848
     * Wraps a string to a given number of characters
13849
     *
13850
     * EXAMPLE: <code>UTF8::wordwrap('Iñtërnâtiônàlizætiøn', 2, '<br>', true)); // 'Iñ<br>të<br>rn<br>ât<br>iô<br>nà<br>li<br>zæ<br>ti<br>øn'</code>
13851
     *
13852
     * @see http://php.net/manual/en/function.wordwrap.php
13853
     *
13854
     * @param string $str   <p>The input string.</p>
13855
     * @param int    $width [optional] <p>The column width.</p>
13856
     * @param string $break [optional] <p>The line is broken using the optional break parameter.</p>
13857
     * @param bool   $cut   [optional] <p>
13858
     *                      If the cut is set to true, the string is
13859
     *                      always wrapped at or before the specified width. So if you have
13860
     *                      a word that is larger than the given width, it is broken apart.
13861
     *                      </p>
13862
     *
13863
     * @psalm-pure
13864
     *
13865
     * @return string
13866
     *                <p>The given string wrapped at the specified column.</p>
13867
     */
13868 12
    public static function wordwrap(
13869
        string $str,
13870
        int $width = 75,
13871
        string $break = "\n",
13872
        bool $cut = false
13873
    ): string {
13874 12
        if ($str === '' || $break === '') {
13875 4
            return '';
13876
        }
13877
13878 10
        $str_split = \explode($break, $str);
13879 10
        if ($str_split === false) {
13880
            return '';
13881
        }
13882
13883
        /** @var string[] $charsArray */
13884 10
        $charsArray = [];
13885 10
        $word_split = '';
13886 10
        foreach ($str_split as $i => $i_value) {
13887 10
            if ($i) {
13888 3
                $charsArray[] = $break;
13889 3
                $word_split .= '#';
13890
            }
13891
13892 10
            foreach (self::str_split($i_value) as $c) {
13893 10
                $charsArray[] = $c;
13894 10
                if ($c === ' ') {
13895 3
                    $word_split .= ' ';
13896
                } else {
13897 10
                    $word_split .= '?';
13898
                }
13899
            }
13900
        }
13901
13902 10
        $str_return = '';
13903 10
        $j = 0;
13904 10
        $b = -1;
13905 10
        $i = -1;
13906 10
        $word_split = \wordwrap($word_split, $width, '#', $cut);
13907
13908 10
        $max = \mb_strlen($word_split);
13909 10
        while (($b = \mb_strpos($word_split, '#', $b + 1)) !== false) {
13910 8
            for (++$i; $i < $b; ++$i) {
13911 8
                if (isset($charsArray[$j])) {
13912 8
                    $str_return .= $charsArray[$j];
13913 8
                    unset($charsArray[$j]);
13914
                }
13915 8
                ++$j;
13916
13917
                // prevent endless loop, e.g. if there is a error in the "mb_*" polyfill
13918 8
                if ($i > $max) {
13919
                    break 2;
13920
                }
13921
            }
13922
13923
            if (
13924 8
                $break === $charsArray[$j]
13925
                ||
13926 8
                $charsArray[$j] === ' '
13927
            ) {
13928 5
                unset($charsArray[$j++]);
13929
            }
13930
13931 8
            $str_return .= $break;
13932
13933
            // prevent endless loop, e.g. if there is a error in the "mb_*" polyfill
13934 8
            if ($b > $max) {
13935
                break;
13936
            }
13937
        }
13938
13939 10
        return $str_return . \implode('', $charsArray);
13940
    }
13941
13942
    /**
13943
     * Line-Wrap the string after $limit, but split the string by "$delimiter" before ...
13944
     *    ... so that we wrap the per line.
13945
     *
13946
     * @param string      $str             <p>The input string.</p>
13947
     * @param int         $width           [optional] <p>The column width.</p>
13948
     * @param string      $break           [optional] <p>The line is broken using the optional break parameter.</p>
13949
     * @param bool        $cut             [optional] <p>
13950
     *                                     If the cut is set to true, the string is
13951
     *                                     always wrapped at or before the specified width. So if you have
13952
     *                                     a word that is larger than the given width, it is broken apart.
13953
     *                                     </p>
13954
     * @param bool        $add_final_break [optional] <p>
13955
     *                                     If this flag is true, then the method will add a $break at the end
13956
     *                                     of the result string.
13957
     *                                     </p>
13958
     * @param string|null $delimiter       [optional] <p>
13959
     *                                     You can change the default behavior, where we split the string by newline.
13960
     *                                     </p>
13961
     *
13962
     * @psalm-pure
13963
     *
13964
     * @return string
13965
     */
13966 1
    public static function wordwrap_per_line(
13967
        string $str,
13968
        int $width = 75,
13969
        string $break = "\n",
13970
        bool $cut = false,
13971
        bool $add_final_break = true,
13972
        string $delimiter = null
13973
    ): string {
13974 1
        if ($delimiter === null) {
13975 1
            $strings = \preg_split('/\\r\\n|\\r|\\n/', $str);
13976
        } else {
13977 1
            $strings = \explode($delimiter, $str);
13978
        }
13979
13980 1
        $string_helper_array = [];
13981 1
        if ($strings !== false) {
13982 1
            foreach ($strings as $value) {
13983 1
                $string_helper_array[] = self::wordwrap($value, $width, $break, $cut);
13984
            }
13985
        }
13986
13987 1
        if ($add_final_break) {
13988 1
            $final_break = $break;
13989
        } else {
13990 1
            $final_break = '';
13991
        }
13992
13993 1
        return \implode($delimiter ?? "\n", $string_helper_array) . $final_break;
13994
    }
13995
13996
    /**
13997
     * Returns an array of Unicode White Space characters.
13998
     *
13999
     * @psalm-pure
14000
     *
14001
     * @return string[]
14002
     *                  <p>An array with numeric code point as key and White Space Character as value.</p>
14003
     */
14004 2
    public static function ws(): array
14005
    {
14006 2
        return self::$WHITESPACE;
14007
    }
14008
14009
    /**
14010
     * Checks whether the passed string contains only byte sequences that are valid UTF-8 characters.
14011
     *
14012
     * EXAMPLE: <code>
14013
     * UTF8::is_utf8_string('Iñtërnâtiônàlizætiøn']); // true
14014
     * //
14015
     * UTF8::is_utf8_string("Iñtërnâtiônàlizætiøn\xA0\xA1"); // false
14016
     * </code>
14017
     *
14018
     * @see          http://hsivonen.iki.fi/php-utf8/
14019
     *
14020
     * @param string $str    <p>The string to be checked.</p>
14021
     * @param bool   $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
14022
     *
14023
     * @psalm-pure
14024
     *
14025
     * @return bool
14026
     *
14027
     * @noinspection ReturnTypeCanBeDeclaredInspection
14028
     */
14029 110
    private static function is_utf8_string(string $str, bool $strict = false)
14030
    {
14031 110
        if ($str === '') {
14032 15
            return true;
14033
        }
14034
14035 103
        if ($strict) {
14036 2
            $is_binary = self::is_binary($str, true);
14037
14038 2
            if ($is_binary && self::is_utf16($str, false) !== false) {
14039 2
                return false;
14040
            }
14041
14042
            if ($is_binary && self::is_utf32($str, false) !== false) {
14043
                return false;
14044
            }
14045
        }
14046
14047 103
        if (self::$SUPPORT['pcre_utf8']) {
14048
            // If even just the first character can be matched, when the /u
14049
            // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
14050
            // invalid, nothing at all will match, even if the string contains
14051
            // some valid sequences
14052 103
            return \preg_match('/^./us', $str) === 1;
14053
        }
14054
14055 2
        $mState = 0; // cached expected number of octets after the current octet
14056
        // until the beginning of the next UTF8 character sequence
14057 2
        $mUcs4 = 0; // cached Unicode character
14058 2
        $mBytes = 1; // cached expected number of octets in the current sequence
14059
14060 2
        if (self::$ORD === null) {
14061
            self::$ORD = self::getData('ord');
14062
        }
14063
14064 2
        $len = \strlen($str);
14065
        /** @noinspection ForeachInvariantsInspection */
14066 2
        for ($i = 0; $i < $len; ++$i) {
14067 2
            $in = self::$ORD[$str[$i]];
14068
14069 2
            if ($mState === 0) {
14070
                // When mState is zero we expect either a US-ASCII character or a
14071
                // multi-octet sequence.
14072 2
                if ((0x80 & $in) === 0) {
14073
                    // US-ASCII, pass straight through.
14074 2
                    $mBytes = 1;
14075 2
                } elseif ((0xE0 & $in) === 0xC0) {
14076
                    // First octet of 2 octet sequence.
14077 2
                    $mUcs4 = $in;
14078 2
                    $mUcs4 = ($mUcs4 & 0x1F) << 6;
14079 2
                    $mState = 1;
14080 2
                    $mBytes = 2;
14081 2
                } elseif ((0xF0 & $in) === 0xE0) {
14082
                    // First octet of 3 octet sequence.
14083 2
                    $mUcs4 = $in;
14084 2
                    $mUcs4 = ($mUcs4 & 0x0F) << 12;
14085 2
                    $mState = 2;
14086 2
                    $mBytes = 3;
14087
                } elseif ((0xF8 & $in) === 0xF0) {
14088
                    // First octet of 4 octet sequence.
14089
                    $mUcs4 = $in;
14090
                    $mUcs4 = ($mUcs4 & 0x07) << 18;
14091
                    $mState = 3;
14092
                    $mBytes = 4;
14093
                } elseif ((0xFC & $in) === 0xF8) {
14094
                    /* First octet of 5 octet sequence.
14095
                     *
14096
                     * This is illegal because the encoded codepoint must be either
14097
                     * (a) not the shortest form or
14098
                     * (b) outside the Unicode range of 0-0x10FFFF.
14099
                     * Rather than trying to resynchronize, we will carry on until the end
14100
                     * of the sequence and let the later error handling code catch it.
14101
                     */
14102
                    $mUcs4 = $in;
14103
                    $mUcs4 = ($mUcs4 & 0x03) << 24;
14104
                    $mState = 4;
14105
                    $mBytes = 5;
14106
                } elseif ((0xFE & $in) === 0xFC) {
14107
                    // First octet of 6 octet sequence, see comments for 5 octet sequence.
14108
                    $mUcs4 = $in;
14109
                    $mUcs4 = ($mUcs4 & 1) << 30;
14110
                    $mState = 5;
14111
                    $mBytes = 6;
14112
                } else {
14113
                    // Current octet is neither in the US-ASCII range nor a legal first
14114
                    // octet of a multi-octet sequence.
14115 2
                    return false;
14116
                }
14117 2
            } elseif ((0xC0 & $in) === 0x80) {
14118
14119
                // When mState is non-zero, we expect a continuation of the multi-octet
14120
                // sequence
14121
14122
                // Legal continuation.
14123 2
                $shift = ($mState - 1) * 6;
14124 2
                $tmp = $in;
14125 2
                $tmp = ($tmp & 0x0000003F) << $shift;
14126 2
                $mUcs4 |= $tmp;
14127
                // Prefix: End of the multi-octet sequence. mUcs4 now contains the final
14128
                // Unicode code point to be output.
14129 2
                if (--$mState === 0) {
14130
                    // Check for illegal sequences and code points.
14131
                    //
14132
                    // From Unicode 3.1, non-shortest form is illegal
14133
                    if (
14134 2
                        ($mBytes === 2 && $mUcs4 < 0x0080)
14135
                        ||
14136 2
                        ($mBytes === 3 && $mUcs4 < 0x0800)
14137
                        ||
14138 2
                        ($mBytes === 4 && $mUcs4 < 0x10000)
14139
                        ||
14140 2
                        ($mBytes > 4)
14141
                        ||
14142
                        // From Unicode 3.2, surrogate characters are illegal.
14143 2
                        (($mUcs4 & 0xFFFFF800) === 0xD800)
14144
                        ||
14145
                        // Code points outside the Unicode range are illegal.
14146 2
                        ($mUcs4 > 0x10FFFF)
14147
                    ) {
14148
                        return false;
14149
                    }
14150
                    // initialize UTF8 cache
14151 2
                    $mState = 0;
14152 2
                    $mUcs4 = 0;
14153 2
                    $mBytes = 1;
14154
                }
14155
            } else {
14156
                // ((0xC0 & (*in) != 0x80) && (mState != 0))
14157
                // Incomplete multi-octet sequence.
14158
                return false;
14159
            }
14160
        }
14161
14162 2
        return $mState === 0;
14163
    }
14164
14165
    /**
14166
     * @param string $str
14167
     * @param bool   $use_lowercase      <p>Use uppercase by default, otherwise use lowercase.</p>
14168
     * @param bool   $use_full_case_fold <p>Convert not only common cases.</p>
14169
     *
14170
     * @psalm-pure
14171
     *
14172
     * @return string
14173
     *
14174
     * @noinspection ReturnTypeCanBeDeclaredInspection
14175
     */
14176 33
    private static function fixStrCaseHelper(
14177
        string $str,
14178
        bool $use_lowercase = false,
14179
        bool $use_full_case_fold = false
14180
    ) {
14181 33
        $upper = self::$COMMON_CASE_FOLD['upper'];
14182 33
        $lower = self::$COMMON_CASE_FOLD['lower'];
14183
14184 33
        if ($use_lowercase) {
14185 2
            $str = \str_replace(
14186 2
                $upper,
14187 2
                $lower,
14188 2
                $str
14189
            );
14190
        } else {
14191 31
            $str = \str_replace(
14192 31
                $lower,
14193 31
                $upper,
14194 31
                $str
14195
            );
14196
        }
14197
14198 33
        if ($use_full_case_fold) {
14199
            /**
14200
             * @psalm-suppress ImpureStaticVariable
14201
             *
14202
             * @var array<mixed>|null
14203
             */
14204 31
            static $FULL_CASE_FOLD = null;
14205 31
            if ($FULL_CASE_FOLD === null) {
14206 1
                $FULL_CASE_FOLD = self::getData('caseFolding_full');
14207
            }
14208
14209 31
            if ($use_lowercase) {
14210 2
                $str = \str_replace($FULL_CASE_FOLD[0], $FULL_CASE_FOLD[1], $str);
14211
            } else {
14212 29
                $str = \str_replace($FULL_CASE_FOLD[1], $FULL_CASE_FOLD[0], $str);
14213
            }
14214
        }
14215
14216 33
        return $str;
14217
    }
14218
14219
    /**
14220
     * get data from "/data/*.php"
14221
     *
14222
     * @param string $file
14223
     *
14224
     * @psalm-pure
14225
     *
14226
     * @return array
14227
     *
14228
     * @noinspection ReturnTypeCanBeDeclaredInspection
14229
     */
14230 6
    private static function getData(string $file)
14231
    {
14232
        /** @noinspection PhpIncludeInspection */
14233
        /** @noinspection UsingInclusionReturnValueInspection */
14234
        /** @psalm-suppress UnresolvableInclude */
14235 6
        return include __DIR__ . '/data/' . $file . '.php';
14236
    }
14237
14238
    /**
14239
     * @psalm-pure
14240
     *
14241
     * @return true|null
14242
     */
14243 12
    private static function initEmojiData()
14244
    {
14245 12
        if (self::$EMOJI_KEYS_CACHE === null) {
14246 1
            if (self::$EMOJI === null) {
14247 1
                self::$EMOJI = self::getData('emoji');
14248
            }
14249
14250
            /**
14251
             * @psalm-suppress ImpureFunctionCall - static sort function is used
14252
             */
14253 1
            \uksort(
14254 1
                self::$EMOJI,
14255
                static function (string $a, string $b): int {
14256 1
                    return \strlen($b) <=> \strlen($a);
14257 1
                }
14258
            );
14259
14260 1
            self::$EMOJI_KEYS_CACHE = \array_keys(self::$EMOJI);
14261 1
            self::$EMOJI_VALUES_CACHE = self::$EMOJI;
14262
14263 1
            foreach (self::$EMOJI_KEYS_CACHE as $key) {
14264 1
                $tmp_key = \crc32($key);
14265 1
                self::$EMOJI_KEYS_REVERSIBLE_CACHE[] = '_-_PORTABLE_UTF8_-_' . $tmp_key . '_-_' . \strrev((string) $tmp_key) . '_-_8FTU_ELBATROP_-_';
14266
            }
14267
14268 1
            return true;
14269
        }
14270
14271 12
        return null;
14272
    }
14273
14274
    /**
14275
     * Checks whether mbstring "overloaded" is active on the server.
14276
     *
14277
     * @psalm-pure
14278
     *
14279
     * @return bool
14280
     *
14281
     * @noinspection ReturnTypeCanBeDeclaredInspection
14282
     */
14283
    private static function mbstring_overloaded()
14284
    {
14285
        /**
14286
         * INI directive 'mbstring.func_overload' is deprecated since PHP 7.2
14287
         */
14288
14289
        /** @noinspection PhpComposerExtensionStubsInspection */
14290
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
14291
        return \defined('MB_OVERLOAD_STRING')
14292
               &&
14293
               ((int) @\ini_get('mbstring.func_overload') & \MB_OVERLOAD_STRING);
14294
    }
14295
14296
    /**
14297
     * @param array    $strings
14298
     * @param bool     $remove_empty_values
14299
     * @param int|null $remove_short_values
14300
     *
14301
     * @psalm-pure
14302
     *
14303
     * @return array
14304
     *
14305
     * @noinspection ReturnTypeCanBeDeclaredInspection
14306
     */
14307 2
    private static function reduce_string_array(
14308
        array $strings,
14309
        bool $remove_empty_values,
14310
        int $remove_short_values = null
14311
    ) {
14312
        // init
14313 2
        $return = [];
14314
14315 2
        foreach ($strings as &$str) {
14316
            if (
14317 2
                $remove_short_values !== null
14318
                &&
14319 2
                \mb_strlen($str) <= $remove_short_values
14320
            ) {
14321 2
                continue;
14322
            }
14323
14324
            if (
14325 2
                $remove_empty_values
14326
                &&
14327 2
                \trim($str) === ''
14328
            ) {
14329 2
                continue;
14330
            }
14331
14332 2
            $return[] = $str;
14333
        }
14334
14335 2
        return $return;
14336
    }
14337
14338
    /**
14339
     * rxClass
14340
     *
14341
     * @param string $s
14342
     * @param string $class
14343
     *
14344
     * @psalm-pure
14345
     *
14346
     * @return string
14347
     *
14348
     * @noinspection ReturnTypeCanBeDeclaredInspection
14349
     */
14350 33
    private static function rxClass(string $s, string $class = '')
14351
    {
14352
        /**
14353
         * @psalm-suppress ImpureStaticVariable
14354
         *
14355
         * @var array<string,string>
14356
         */
14357 33
        static $RX_CLASS_CACHE = [];
14358
14359 33
        $cache_key = $s . '_' . $class;
14360
14361 33
        if (isset($RX_CLASS_CACHE[$cache_key])) {
14362 21
            return $RX_CLASS_CACHE[$cache_key];
14363
        }
14364
14365
        /** @var string[] $class_array */
14366 16
        $class_array[] = $class;
0 ignored issues
show
Comprehensibility Best Practice introduced by
$class_array was never initialized. Although not strictly required by PHP, it is generally a good practice to add $class_array = array(); before regardless.
Loading history...
14367
14368
        /** @noinspection SuspiciousLoopInspection */
14369
        /** @noinspection AlterInForeachInspection */
14370 16
        foreach (self::str_split($s) as &$s) {
0 ignored issues
show
Bug introduced by
The expression self::str_split($s) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
14371 15
            if ($s === '-') {
14372
                $class_array[0] = '-' . $class_array[0];
14373 15
            } elseif (!isset($s[2])) {
14374 15
                $class_array[0] .= \preg_quote($s, '/');
14375 1
            } elseif (self::strlen($s) === 1) {
14376 1
                $class_array[0] .= $s;
14377
            } else {
14378 15
                $class_array[] = $s;
14379
            }
14380
        }
14381
14382 16
        if ($class_array[0]) {
14383 16
            $class_array[0] = '[' . $class_array[0] . ']';
14384
        }
14385
14386 16
        if (\count($class_array) === 1) {
14387 16
            $return = $class_array[0];
14388
        } else {
14389
            $return = '(?:' . \implode('|', $class_array) . ')';
14390
        }
14391
14392 16
        $RX_CLASS_CACHE[$cache_key] = $return;
14393
14394 16
        return $return;
14395
    }
14396
14397
    /**
14398
     * Personal names such as "Marcus Aurelius" are sometimes typed incorrectly using lowercase ("marcus aurelius").
14399
     *
14400
     * @param string $names
14401
     * @param string $delimiter
14402
     * @param string $encoding
14403
     *
14404
     * @psalm-pure
14405
     *
14406
     * @return string
14407
     *
14408
     * @noinspection ReturnTypeCanBeDeclaredInspection
14409
     */
14410 1
    private static function str_capitalize_name_helper(
14411
        string $names,
14412
        string $delimiter,
14413
        string $encoding = 'UTF-8'
14414
    ) {
14415
        // init
14416 1
        $name_helper_array = \explode($delimiter, $names);
14417 1
        if ($name_helper_array === false) {
14418
            return '';
14419
        }
14420
14421
        $special_cases = [
14422 1
            'names' => [
14423
                'ab',
14424
                'af',
14425
                'al',
14426
                'and',
14427
                'ap',
14428
                'bint',
14429
                'binte',
14430
                'da',
14431
                'de',
14432
                'del',
14433
                'den',
14434
                'der',
14435
                'di',
14436
                'dit',
14437
                'ibn',
14438
                'la',
14439
                'mac',
14440
                'nic',
14441
                'of',
14442
                'ter',
14443
                'the',
14444
                'und',
14445
                'van',
14446
                'von',
14447
                'y',
14448
                'zu',
14449
            ],
14450
            'prefixes' => [
14451
                'al-',
14452
                "d'",
14453
                'ff',
14454
                "l'",
14455
                'mac',
14456
                'mc',
14457
                'nic',
14458
            ],
14459
        ];
14460
14461 1
        foreach ($name_helper_array as &$name) {
14462 1
            if (\in_array($name, $special_cases['names'], true)) {
14463 1
                continue;
14464
            }
14465
14466 1
            $continue = false;
14467
14468 1
            if ($delimiter === '-') {
14469
                /** @noinspection AlterInForeachInspection */
14470 1
                foreach ((array) $special_cases['names'] as &$beginning) {
14471 1
                    if (self::strpos($name, $beginning, 0, $encoding) === 0) {
14472 1
                        $continue = true;
14473
14474 1
                        break;
14475
                    }
14476
                }
14477
            }
14478
14479
            /** @noinspection AlterInForeachInspection */
14480 1
            foreach ((array) $special_cases['prefixes'] as &$beginning) {
14481 1
                if (self::strpos($name, $beginning, 0, $encoding) === 0) {
14482 1
                    $continue = true;
14483
14484 1
                    break;
14485
                }
14486
            }
14487
14488 1
            if ($continue) {
14489 1
                continue;
14490
            }
14491
14492 1
            $name = self::ucfirst($name);
14493
        }
14494
14495 1
        return \implode($delimiter, $name_helper_array);
14496
    }
14497
14498
    /**
14499
     * Generic case-sensitive transformation for collation matching.
14500
     *
14501
     * @param string $str <p>The input string</p>
14502
     *
14503
     * @psalm-pure
14504
     *
14505
     * @return string|null
14506
     */
14507 6
    private static function strtonatfold(string $str)
14508
    {
14509
        /** @noinspection PhpUndefinedClassInspection */
14510 6
        return \preg_replace(
14511 6
            '/\p{Mn}+/u',
14512 6
            '',
14513 6
            \Normalizer::normalize($str, \Normalizer::NFD)
14514
        );
14515
    }
14516
14517
    /**
14518
     * @param int|string $input
14519
     *
14520
     * @psalm-pure
14521
     *
14522
     * @return string
14523
     *
14524
     * @noinspection ReturnTypeCanBeDeclaredInspection
14525
     * @noinspection SuspiciousBinaryOperationInspection
14526
     */
14527 32
    private static function to_utf8_convert_helper($input)
14528
    {
14529
        // init
14530 32
        $buf = '';
14531
14532 32
        if (self::$ORD === null) {
14533 1
            self::$ORD = self::getData('ord');
14534
        }
14535
14536 32
        if (self::$CHR === null) {
14537 1
            self::$CHR = self::getData('chr');
14538
        }
14539
14540 32
        if (self::$WIN1252_TO_UTF8 === null) {
14541 1
            self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
14542
        }
14543
14544 32
        $ordC1 = self::$ORD[$input];
14545 32
        if (isset(self::$WIN1252_TO_UTF8[$ordC1])) { // found in Windows-1252 special cases
14546 32
            $buf .= self::$WIN1252_TO_UTF8[$ordC1];
14547
        } else {
14548
            /** @noinspection OffsetOperationsInspection */
14549 1
            $cc1 = self::$CHR[$ordC1 / 64] | "\xC0";
14550 1
            $cc2 = ((string) $input & "\x3F") | "\x80";
0 ignored issues
show
Bug introduced by
Are you sure you want to use the bitwise | or did you mean ||?
Loading history...
Bug introduced by
Are you sure you want to use the bitwise & or did you mean &&?
Loading history...
14551 1
            $buf .= $cc1 . $cc2;
14552
        }
14553
14554 32
        return $buf;
14555
    }
14556
14557
    /**
14558
     * @param string $str
14559
     *
14560
     * @psalm-pure
14561
     *
14562
     * @return string
14563
     *
14564
     * @noinspection ReturnTypeCanBeDeclaredInspection
14565
     */
14566 10
    private static function urldecode_unicode_helper(string $str)
14567
    {
14568 10
        $pattern = '/%u([0-9a-fA-F]{3,4})/';
14569 10
        if (\preg_match($pattern, $str)) {
14570 7
            $str = (string) \preg_replace($pattern, '&#x\\1;', $str);
14571
        }
14572
14573 10
        return $str;
14574
    }
14575
}
14576