Passed
Push — master ( c96fda...7d85ee )
by Lars
08:58 queued 06:07
created

UTF8::str_snakeize()   B

Complexity

Conditions 6
Paths 3

Size

Total Lines 57
Code Lines 28

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 23
CRAP Score 6.0184

Importance

Changes 3
Bugs 0 Features 0
Metric Value
cc 6
eloc 28
c 3
b 0
f 0
nc 3
nop 2
dl 0
loc 57
ccs 23
cts 25
cp 0.92
crap 6.0184
rs 8.8497

How to fix   Long Method   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
declare(strict_types=1);
4
5
namespace voku\helper;
6
7
/**
8
 * @psalm-immutable
9
 */
10
final class UTF8
11
{
12
    /**
13
     * (CRLF|([ZWNJ-ZWJ]|T+|L*(LV?V+|LV|LVT)T*|L+|[^Control])[Extend]*|[Control])
14
     * This regular expression is a work around for http://bugs.exim.org/1279
15
     *
16
     * @deprecated <p>please don't use it anymore</p>
17
     */
18
    const GRAPHEME_CLUSTER_RX = "(?:\r\n|(?:[ -~\x{200C}\x{200D}]|[ᆨ-ᇹ]+|[ᄀ-ᅟ]*(?:[가개갸걔거게겨계고과괘괴교구궈궤귀규그긔기까깨꺄꺠꺼께껴꼐꼬꽈꽤꾀꾜꾸꿔꿰뀌뀨끄끠끼나내냐냬너네녀녜노놔놰뇌뇨누눠눼뉘뉴느늬니다대댜댸더데뎌뎨도돠돼되됴두둬뒈뒤듀드듸디따때땨떄떠떼뗘뗴또똬뙈뙤뚀뚜뚸뛔뛰뜌뜨띄띠라래랴럐러레려례로롸뢔뢰료루뤄뤠뤼류르릐리마매먀먜머메며몌모뫄뫠뫼묘무뭐뭬뮈뮤므믜미바배뱌뱨버베벼볘보봐봬뵈뵤부붜붸뷔뷰브븨비빠빼뺘뺴뻐뻬뼈뼤뽀뽜뽸뾔뾰뿌뿨쀄쀠쀼쁘쁴삐사새샤섀서세셔셰소솨쇄쇠쇼수숴쉐쉬슈스싀시싸쌔쌰썌써쎄쎠쎼쏘쏴쐐쐬쑈쑤쒀쒜쒸쓔쓰씌씨아애야얘어에여예오와왜외요우워웨위유으의이자재쟈쟤저제져졔조좌좨죄죠주줘줴쥐쥬즈즤지짜째쨔쨰쩌쩨쪄쪠쪼쫘쫴쬐쬬쭈쭤쮀쮜쮸쯔쯰찌차채챠챼처체쳐쳬초촤쵀최쵸추춰췌취츄츠츼치카캐캬컈커케켜켸코콰쾌쾨쿄쿠쿼퀘퀴큐크킈키타태탸턔터테텨톄토톼퇘퇴툐투퉈퉤튀튜트틔티파패퍄퍠퍼페펴폐포퐈퐤푀표푸풔풰퓌퓨프픠피하해햐햬허헤혀혜호화홰회효후훠훼휘휴흐희히]?[ᅠ-ᆢ]+|[가-힣])[ᆨ-ᇹ]*|[ᄀ-ᅟ]+|[^\p{Cc}\p{Cf}\p{Zl}\p{Zp}])[\p{Mn}\p{Me}\x{09BE}\x{09D7}\x{0B3E}\x{0B57}\x{0BBE}\x{0BD7}\x{0CC2}\x{0CD5}\x{0CD6}\x{0D3E}\x{0D57}\x{0DCF}\x{0DDF}\x{200C}\x{200D}\x{1D165}\x{1D16E}-\x{1D172}]*|[\p{Cc}\p{Cf}\p{Zl}\p{Zp}])";
19
20
    /**
21
     * Bom => Byte-Length
22
     *
23
     * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
24
     *
25
     * @var array<string, int>
26
     */
27
    private static $BOM = [
28
        "\xef\xbb\xbf"     => 3, // UTF-8 BOM
29
        ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
30
        "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
31
        '  þÿ'             => 6, // UTF-32 (BE) BOM as "WINDOWS-1252"
32
        "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
33
        'ÿþ  '             => 6, // UTF-32 (LE) BOM as "WINDOWS-1252"
34
        "\xfe\xff"         => 2, // UTF-16 (BE) BOM
35
        'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
36
        "\xff\xfe"         => 2, // UTF-16 (LE) BOM
37
        'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
38
    ];
39
40
    /**
41
     * Numeric code point => UTF-8 Character
42
     *
43
     * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
44
     *
45
     * @var array<int, string>
46
     */
47
    private static $WHITESPACE = [
48
        // NULL Byte
49
        0 => "\x0",
50
        // Tab
51
        9 => "\x9",
52
        // New Line
53
        10 => "\xa",
54
        // Vertical Tab
55
        11 => "\xb",
56
        // Carriage Return
57
        13 => "\xd",
58
        // Ordinary Space
59
        32 => "\x20",
60
        // NO-BREAK SPACE
61
        160 => "\xc2\xa0",
62
        // OGHAM SPACE MARK
63
        5760 => "\xe1\x9a\x80",
64
        // MONGOLIAN VOWEL SEPARATOR
65
        6158 => "\xe1\xa0\x8e",
66
        // EN QUAD
67
        8192 => "\xe2\x80\x80",
68
        // EM QUAD
69
        8193 => "\xe2\x80\x81",
70
        // EN SPACE
71
        8194 => "\xe2\x80\x82",
72
        // EM SPACE
73
        8195 => "\xe2\x80\x83",
74
        // THREE-PER-EM SPACE
75
        8196 => "\xe2\x80\x84",
76
        // FOUR-PER-EM SPACE
77
        8197 => "\xe2\x80\x85",
78
        // SIX-PER-EM SPACE
79
        8198 => "\xe2\x80\x86",
80
        // FIGURE SPACE
81
        8199 => "\xe2\x80\x87",
82
        // PUNCTUATION SPACE
83
        8200 => "\xe2\x80\x88",
84
        // THIN SPACE
85
        8201 => "\xe2\x80\x89",
86
        // HAIR SPACE
87
        8202 => "\xe2\x80\x8a",
88
        // LINE SEPARATOR
89
        8232 => "\xe2\x80\xa8",
90
        // PARAGRAPH SEPARATOR
91
        8233 => "\xe2\x80\xa9",
92
        // NARROW NO-BREAK SPACE
93
        8239 => "\xe2\x80\xaf",
94
        // MEDIUM MATHEMATICAL SPACE
95
        8287 => "\xe2\x81\x9f",
96
        // HALFWIDTH HANGUL FILLER
97
        65440 => "\xef\xbe\xa0",
98
        // IDEOGRAPHIC SPACE
99
        12288 => "\xe3\x80\x80",
100
    ];
101
102
    /**
103
     * @var array<string, string>
104
     */
105
    private static $WHITESPACE_TABLE = [
106
        'SPACE'                     => "\x20",
107
        'NO-BREAK SPACE'            => "\xc2\xa0",
108
        'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
109
        'EN QUAD'                   => "\xe2\x80\x80",
110
        'EM QUAD'                   => "\xe2\x80\x81",
111
        'EN SPACE'                  => "\xe2\x80\x82",
112
        'EM SPACE'                  => "\xe2\x80\x83",
113
        'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
114
        'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
115
        'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
116
        'FIGURE SPACE'              => "\xe2\x80\x87",
117
        'PUNCTUATION SPACE'         => "\xe2\x80\x88",
118
        'THIN SPACE'                => "\xe2\x80\x89",
119
        'HAIR SPACE'                => "\xe2\x80\x8a",
120
        'LINE SEPARATOR'            => "\xe2\x80\xa8",
121
        'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
122
        'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
123
        'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
124
        'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
125
        'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
126
        'HALFWIDTH HANGUL FILLER'   => "\xef\xbe\xa0",
127
    ];
128
129
    /**
130
     * @var array
131
     *
132
     * @phpstan-var array{upper: string[], lower: string[]}
133
     */
134
    private static $COMMON_CASE_FOLD = [
135
        'upper' => [
136
            'µ',
137
            'ſ',
138
            "\xCD\x85",
139
            'ς',
140
            'ẞ',
141
            "\xCF\x90",
142
            "\xCF\x91",
143
            "\xCF\x95",
144
            "\xCF\x96",
145
            "\xCF\xB0",
146
            "\xCF\xB1",
147
            "\xCF\xB5",
148
            "\xE1\xBA\x9B",
149
            "\xE1\xBE\xBE",
150
        ],
151
        'lower' => [
152
            'μ',
153
            's',
154
            'ι',
155
            'σ',
156
            'ß',
157
            'β',
158
            'θ',
159
            'φ',
160
            'π',
161
            'κ',
162
            'ρ',
163
            'ε',
164
            "\xE1\xB9\xA1",
165
            'ι',
166
        ],
167
    ];
168
169
    /**
170
     * @var array
171
     *
172
     * @phpstan-var array<string, mixed>
173
     */
174
    private static $SUPPORT = [];
175
176
    /**
177
     * @var string[]|null
178
     *
179
     * @phpstan-var array<string, string>|null
180
     */
181
    private static $BROKEN_UTF8_FIX;
182
183
    /**
184
     * @var string[]|null
185
     *
186
     * @phpstan-var array<int, string>|null
187
     */
188
    private static $WIN1252_TO_UTF8;
189
190
    /**
191
     * @var string[]|null
192
     *
193
     * @phpstan-var array<int ,string>|null
194
     */
195
    private static $INTL_TRANSLITERATOR_LIST;
196
197
    /**
198
     * @var string[]|null
199
     *
200
     * @phpstan-var array<string>|null
201
     */
202
    private static $ENCODINGS;
203
204
    /**
205
     * @var int[]|null
206
     *
207
     * @phpstan-var array<string ,int>|null
208
     */
209
    private static $ORD;
210
211
    /**
212
     * @var string[]|null
213
     *
214
     * @phpstan-var array<string, string>|null
215
     */
216
    private static $EMOJI;
217
218
    /**
219
     * @var string[]|null
220
     *
221
     * @phpstan-var array<string>|null
222
     */
223
    private static $EMOJI_VALUES_CACHE;
224
225
    /**
226
     * @var string[]|null
227
     *
228
     * @phpstan-var array<string>|null
229
     */
230
    private static $EMOJI_KEYS_CACHE;
231
232
    /**
233
     * @var string[]|null
234
     *
235
     * @phpstan-var array<string>|null
236
     */
237
    private static $EMOJI_KEYS_REVERSIBLE_CACHE;
238
239
    /**
240
     * @var string[]|null
241
     *
242
     * @phpstan-var array<int, string>|null
243
     */
244
    private static $CHR;
245
246
    /**
247
     * __construct()
248
     */
249 34
    public function __construct()
250
    {
251 34
    }
252
253
    /**
254
     * Return the character at the specified position: $str[1] like functionality.
255
     *
256
     * EXAMPLE: <code>UTF8::access('fòô', 1); // 'ò'</code>
257
     *
258
     * @param string $str      <p>A UTF-8 string.</p>
259
     * @param int    $pos      <p>The position of character to return.</p>
260
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
261
     *
262
     * @psalm-pure
263
     *
264
     * @return string
265
     *                <p>Single multi-byte character.</p>
266
     */
267 3
    public static function access(string $str, int $pos, string $encoding = 'UTF-8'): string
268
    {
269 3
        if ($str === '' || $pos < 0) {
270 2
            return '';
271
        }
272
273 3
        if ($encoding === 'UTF-8') {
274 3
            return (string) \mb_substr($str, $pos, 1);
275
        }
276
277
        return (string) self::substr($str, $pos, 1, $encoding);
278
    }
279
280
    /**
281
     * Prepends UTF-8 BOM character to the string and returns the whole string.
282
     *
283
     * INFO: If BOM already existed there, the Input string is returned.
284
     *
285
     * EXAMPLE: <code>UTF8::add_bom_to_string('fòô'); // "\xEF\xBB\xBF" . 'fòô'</code>
286
     *
287
     * @param string $str <p>The input string.</p>
288
     *
289
     * @psalm-pure
290
     *
291
     * @return string
292
     *                <p>The output string that contains BOM.</p>
293
     */
294 2
    public static function add_bom_to_string(string $str): string
295
    {
296 2
        if (!self::string_has_bom($str)) {
297 2
            $str = self::bom() . $str;
298
        }
299
300 2
        return $str;
301
    }
302
303
    /**
304
     * Changes all keys in an array.
305
     *
306
     * @param array<string, mixed> $array    <p>The array to work on</p>
307
     * @param int                  $case     [optional] <p> Either <strong>CASE_UPPER</strong><br>
308
     *                                       or <strong>CASE_LOWER</strong> (default)</p>
309
     * @param string               $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
310
     *
311
     * @psalm-pure
312
     *
313
     * @return string[]
314
     *                  <p>An array with its keys lower- or uppercased.</p>
315
     */
316 2
    public static function array_change_key_case(
317
        array $array,
318
        int $case = \CASE_LOWER,
319
        string $encoding = 'UTF-8'
320
    ): array {
321
        if (
322 2
            $case !== \CASE_LOWER
323
            &&
324 2
            $case !== \CASE_UPPER
325
        ) {
326
            $case = \CASE_LOWER;
327
        }
328
329 2
        $return = [];
330 2
        foreach ($array as $key => &$value) {
331 2
            $key = $case === \CASE_LOWER
332 2
                ? self::strtolower((string) $key, $encoding)
333 2
                : self::strtoupper((string) $key, $encoding);
334
335 2
            $return[$key] = $value;
336
        }
337
338 2
        return $return;
339
    }
340
341
    /**
342
     * Returns the substring between $start and $end, if found, or an empty
343
     * string. An optional offset may be supplied from which to begin the
344
     * search for the start string.
345
     *
346
     * @param string $str
347
     * @param string $start    <p>Delimiter marking the start of the substring.</p>
348
     * @param string $end      <p>Delimiter marking the end of the substring.</p>
349
     * @param int    $offset   [optional] <p>Index from which to begin the search. Default: 0</p>
350
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
351
     *
352
     * @psalm-pure
353
     *
354
     * @return string
355
     */
356 16
    public static function between(
357
        string $str,
358
        string $start,
359
        string $end,
360
        int $offset = 0,
361
        string $encoding = 'UTF-8'
362
    ): string {
363 16
        if ($encoding === 'UTF-8') {
364 8
            $start_position = \mb_strpos($str, $start, $offset);
365 8
            if ($start_position === false) {
366 1
                return '';
367
            }
368
369 7
            $substr_index = $start_position + (int) \mb_strlen($start);
370 7
            $end_position = \mb_strpos($str, $end, $substr_index);
371
            if (
372 7
                $end_position === false
373
                ||
374 7
                $end_position === $substr_index
375
            ) {
376 2
                return '';
377
            }
378
379 5
            return (string) \mb_substr($str, $substr_index, $end_position - $substr_index);
380
        }
381
382 8
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
383
384 8
        $start_position = self::strpos($str, $start, $offset, $encoding);
385 8
        if ($start_position === false) {
386 1
            return '';
387
        }
388
389 7
        $substr_index = $start_position + (int) self::strlen($start, $encoding);
390 7
        $end_position = self::strpos($str, $end, $substr_index, $encoding);
391
        if (
392 7
            $end_position === false
393
            ||
394 7
            $end_position === $substr_index
395
        ) {
396 2
            return '';
397
        }
398
399 5
        return (string) self::substr(
400 5
            $str,
401 5
            $substr_index,
402 5
            $end_position - $substr_index,
403 5
            $encoding
404
        );
405
    }
406
407
    /**
408
     * Convert binary into a string.
409
     *
410
     * INFO: opposite to UTF8::str_to_binary()
411
     *
412
     * EXAMPLE: <code>UTF8::binary_to_str('11110000100111111001100010000011'); // '😃'</code>
413
     *
414
     * @param string $bin 1|0
415
     *
416
     * @psalm-pure
417
     *
418
     * @return string
419
     */
420 2
    public static function binary_to_str($bin): string
421
    {
422 2
        if (!isset($bin[0])) {
423
            return '';
424
        }
425
426 2
        $convert = \base_convert($bin, 2, 16);
427 2
        if ($convert === '0') {
428 1
            return '';
429
        }
430
431 2
        return \pack('H*', $convert);
432
    }
433
434
    /**
435
     * Returns the UTF-8 Byte Order Mark Character.
436
     *
437
     * INFO: take a look at UTF8::$bom for e.g. UTF-16 and UTF-32 BOM values
438
     *
439
     * EXAMPLE: <code>UTF8::bom(); // "\xEF\xBB\xBF"</code>
440
     *
441
     * @psalm-pure
442
     *
443
     * @return string
444
     *                <p>UTF-8 Byte Order Mark.</p>
445
     */
446 4
    public static function bom(): string
447
    {
448 4
        return "\xef\xbb\xbf";
449
    }
450
451
    /**
452
     * @alias of UTF8::chr_map()
453
     *
454
     * @param callable $callback
455
     * @param string   $str
456
     *
457
     * @psalm-pure
458
     *
459
     * @return string[]
460
     *
461
     * @see   UTF8::chr_map()
462
     */
463 2
    public static function callback($callback, string $str): array
464
    {
465 2
        return self::chr_map($callback, $str);
466
    }
467
468
    /**
469
     * Returns the character at $index, with indexes starting at 0.
470
     *
471
     * @param string $str      <p>The input string.</p>
472
     * @param int    $index    <p>Position of the character.</p>
473
     * @param string $encoding [optional] <p>Default is UTF-8</p>
474
     *
475
     * @psalm-pure
476
     *
477
     * @return string
478
     *                <p>The character at $index.</p>
479
     */
480 9
    public static function char_at(string $str, int $index, string $encoding = 'UTF-8'): string
481
    {
482 9
        if ($encoding === 'UTF-8') {
483 5
            return (string) \mb_substr($str, $index, 1);
484
        }
485
486 4
        return (string) self::substr($str, $index, 1, $encoding);
487
    }
488
489
    /**
490
     * Returns an array consisting of the characters in the string.
491
     *
492
     * @param string $str <p>The input string.</p>
493
     *
494
     * @psalm-pure
495
     *
496
     * @return string[]
497
     *                  <p>An array of chars.</p>
498
     */
499 4
    public static function chars(string $str): array
500
    {
501
        /** @var string[] */
502 4
        return self::str_split($str);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::str_split($str) returns an array which contains values of type string[] which are incompatible with the documented value type string.
Loading history...
503
    }
504
505
    /**
506
     * This method will auto-detect your server environment for UTF-8 support.
507
     *
508
     * @return true|null
509
     *
510
     * @internal <p>You don't need to run it manually, it will be triggered if it's needed.</p>
511
     */
512 5
    public static function checkForSupport()
513
    {
514 5
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
515
            self::$SUPPORT['already_checked_via_portable_utf8'] = true;
516
517
            // http://php.net/manual/en/book.mbstring.php
518
            self::$SUPPORT['mbstring'] = self::mbstring_loaded();
519
520
            self::$SUPPORT['mbstring_func_overload'] = self::mbstring_overloaded();
521
            if (self::$SUPPORT['mbstring'] === true) {
522
                \mb_internal_encoding('UTF-8');
523
                /** @noinspection UnusedFunctionResultInspection */
524
                /** @noinspection PhpComposerExtensionStubsInspection */
525
                \mb_regex_encoding('UTF-8');
526
                self::$SUPPORT['mbstring_internal_encoding'] = 'UTF-8';
527
            }
528
529
            // http://php.net/manual/en/book.iconv.php
530
            self::$SUPPORT['iconv'] = self::iconv_loaded();
531
532
            // http://php.net/manual/en/book.intl.php
533
            self::$SUPPORT['intl'] = self::intl_loaded();
534
535
            // http://php.net/manual/en/class.intlchar.php
536
            self::$SUPPORT['intlChar'] = self::intlChar_loaded();
537
538
            // http://php.net/manual/en/book.ctype.php
539
            self::$SUPPORT['ctype'] = self::ctype_loaded();
540
541
            // http://php.net/manual/en/class.finfo.php
542
            self::$SUPPORT['finfo'] = self::finfo_loaded();
543
544
            // http://php.net/manual/en/book.json.php
545
            self::$SUPPORT['json'] = self::json_loaded();
546
547
            // http://php.net/manual/en/book.pcre.php
548
            self::$SUPPORT['pcre_utf8'] = self::pcre_utf8_support();
549
550
            self::$SUPPORT['symfony_polyfill_used'] = self::symfony_polyfill_used();
551
            if (self::$SUPPORT['symfony_polyfill_used'] === true) {
552
                \mb_internal_encoding('UTF-8');
553
                self::$SUPPORT['mbstring_internal_encoding'] = 'UTF-8';
554
            }
555
556
            return true;
557
        }
558
559 5
        return null;
560
    }
561
562
    /**
563
     * Generates a UTF-8 encoded character from the given code point.
564
     *
565
     * INFO: opposite to UTF8::ord()
566
     *
567
     * EXAMPLE: <code>UTF8::chr(0x2603); // '☃'</code>
568
     *
569
     * @param int    $code_point <p>The code point for which to generate a character.</p>
570
     * @param string $encoding   [optional] <p>Default is UTF-8</p>
571
     *
572
     * @psalm-pure
573
     *
574
     * @return string|null
575
     *                     <p>Multi-byte character, returns null on failure or empty input.</p>
576
     */
577 21
    public static function chr($code_point, string $encoding = 'UTF-8')
578
    {
579
        // init
580
        /**
581
         * @psalm-suppress ImpureStaticVariable
582
         *
583
         * @var array<string,string>
584
         */
585 21
        static $CHAR_CACHE = [];
586
587 21
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
588 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
589
        }
590
591
        /** @noinspection InArrayCanBeUsedInspection */
592
        if (
593 21
            $encoding !== 'UTF-8'
594
            &&
595 21
            $encoding !== 'ISO-8859-1'
596
            &&
597 21
            $encoding !== 'WINDOWS-1252'
598
            &&
599 21
            self::$SUPPORT['mbstring'] === false
600
        ) {
601
            /**
602
             * @psalm-suppress ImpureFunctionCall - is is only a warning
603
             */
604
            \trigger_error('UTF8::chr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
605
        }
606
607 21
        if (!\is_int($code_point) || $code_point <= 0) {
0 ignored issues
show
introduced by
The condition is_int($code_point) is always true.
Loading history...
608 5
            return null;
609
        }
610
611 21
        $cache_key = $code_point . '_' . $encoding;
612 21
        if (isset($CHAR_CACHE[$cache_key])) {
613 19
            return $CHAR_CACHE[$cache_key];
614
        }
615
616 10
        if ($code_point <= 0x80) { // only for "simple"-chars
617
618 9
            if (self::$CHR === null) {
619
                self::$CHR = self::getData('chr');
620
            }
621
622
            /**
623
             * @psalm-suppress PossiblyNullArrayAccess
624
             */
625 9
            $chr = self::$CHR[$code_point];
626
627 9
            if ($encoding !== 'UTF-8') {
628 1
                $chr = self::encode($encoding, $chr);
629
            }
630
631 9
            return $CHAR_CACHE[$cache_key] = $chr;
632
        }
633
634
        //
635
        // fallback via "IntlChar"
636
        //
637
638 6
        if (self::$SUPPORT['intlChar'] === true) {
639
            /** @noinspection PhpComposerExtensionStubsInspection */
640 6
            $chr = \IntlChar::chr($code_point);
641
642 6
            if ($encoding !== 'UTF-8') {
643
                $chr = self::encode($encoding, $chr);
644
            }
645
646 6
            return $CHAR_CACHE[$cache_key] = $chr;
647
        }
648
649
        //
650
        // fallback via vanilla php
651
        //
652
653
        if (self::$CHR === null) {
654
            self::$CHR = self::getData('chr');
655
        }
656
657
        $code_point = (int) $code_point;
658
        if ($code_point <= 0x7FF) {
659
            /**
660
             * @psalm-suppress PossiblyNullArrayAccess
661
             */
662
            $chr = self::$CHR[($code_point >> 6) + 0xC0] .
663
                   self::$CHR[($code_point & 0x3F) + 0x80];
664
        } elseif ($code_point <= 0xFFFF) {
665
            /**
666
             * @psalm-suppress PossiblyNullArrayAccess
667
             */
668
            $chr = self::$CHR[($code_point >> 12) + 0xE0] .
669
                   self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
670
                   self::$CHR[($code_point & 0x3F) + 0x80];
671
        } else {
672
            /**
673
             * @psalm-suppress PossiblyNullArrayAccess
674
             */
675
            $chr = self::$CHR[($code_point >> 18) + 0xF0] .
676
                   self::$CHR[(($code_point >> 12) & 0x3F) + 0x80] .
677
                   self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
678
                   self::$CHR[($code_point & 0x3F) + 0x80];
679
        }
680
681
        if ($encoding !== 'UTF-8') {
682
            $chr = self::encode($encoding, $chr);
683
        }
684
685
        return $CHAR_CACHE[$cache_key] = $chr;
686
    }
687
688
    /**
689
     * Applies callback to all characters of a string.
690
     *
691
     * EXAMPLE: <code>UTF8::chr_map([UTF8::class, 'strtolower'], 'Κόσμε'); // ['κ','ό', 'σ', 'μ', 'ε']</code>
692
     *
693
     * @param callable $callback <p>The callback function.</p>
694
     * @param string   $str      <p>UTF-8 string to run callback on.</p>
695
     *
696
     * @psalm-pure
697
     *
698
     * @return string[]
699
     *                  <p>The outcome of the callback, as array.</p>
700
     */
701 2
    public static function chr_map($callback, string $str): array
702
    {
703 2
        return \array_map(
704 2
            $callback,
705 2
            self::str_split($str)
706
        );
707
    }
708
709
    /**
710
     * Generates an array of byte length of each character of a Unicode string.
711
     *
712
     * 1 byte => U+0000  - U+007F
713
     * 2 byte => U+0080  - U+07FF
714
     * 3 byte => U+0800  - U+FFFF
715
     * 4 byte => U+10000 - U+10FFFF
716
     *
717
     * EXAMPLE: <code>UTF8::chr_size_list('中文空白-test'); // [3, 3, 3, 3, 1, 1, 1, 1, 1]</code>
718
     *
719
     * @param string $str <p>The original unicode string.</p>
720
     *
721
     * @psalm-pure
722
     *
723
     * @return int[]
724
     *               <p>An array of byte lengths of each character.</p>
725
     */
726 4
    public static function chr_size_list(string $str): array
727
    {
728 4
        if ($str === '') {
729 4
            return [];
730
        }
731
732 4
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
733
            return \array_map(
734
                static function (string $data): int {
735
                    // "mb_" is available if overload is used, so use it ...
736
                    return \mb_strlen($data, 'CP850'); // 8-BIT
737
                },
738
                self::str_split($str)
739
            );
740
        }
741
742 4
        return \array_map('\strlen', self::str_split($str));
743
    }
744
745
    /**
746
     * Get a decimal code representation of a specific character.
747
     *
748
     * INFO: opposite to UTF8::decimal_to_chr()
749
     *
750
     * EXAMPLE: <code>UTF8::chr_to_decimal('§'); // 0xa7</code>
751
     *
752
     * @param string $char <p>The input character.</p>
753
     *
754
     * @psalm-pure
755
     *
756
     * @return int
757
     */
758 5
    public static function chr_to_decimal(string $char): int
759
    {
760 5
        if (self::$SUPPORT['iconv'] === true) {
761 5
            $chr_tmp = \iconv('UTF-8', 'UCS-4LE', $char);
762 5
            if ($chr_tmp !== false) {
763
                /** @noinspection OffsetOperationsInspection */
764 5
                return \unpack('V', $chr_tmp)[1];
765
            }
766
        }
767
768
        $code = self::ord($char[0]);
769
        $bytes = 1;
770
771
        if (!($code & 0x80)) {
772
            // 0xxxxxxx
773
            return $code;
774
        }
775
776
        if (($code & 0xe0) === 0xc0) {
777
            // 110xxxxx
778
            $bytes = 2;
779
            $code &= ~0xc0;
780
        } elseif (($code & 0xf0) === 0xe0) {
781
            // 1110xxxx
782
            $bytes = 3;
783
            $code &= ~0xe0;
784
        } elseif (($code & 0xf8) === 0xf0) {
785
            // 11110xxx
786
            $bytes = 4;
787
            $code &= ~0xf0;
788
        }
789
790
        for ($i = 2; $i <= $bytes; ++$i) {
791
            // 10xxxxxx
792
            $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
793
        }
794
795
        return $code;
796
    }
797
798
    /**
799
     * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
800
     *
801
     * EXAMPLE: <code>UTF8::chr_to_hex('§'); // U+00a7</code>
802
     *
803
     * @param int|string $char   <p>The input character</p>
804
     * @param string     $prefix [optional]
805
     *
806
     * @psalm-pure
807
     *
808
     * @return string
809
     *                <p>The code point encoded as U+xxxx.</p>
810
     */
811 2
    public static function chr_to_hex($char, string $prefix = 'U+'): string
812
    {
813 2
        if ($char === '') {
814 2
            return '';
815
        }
816
817 2
        if ($char === '&#0;') {
818 2
            $char = '';
819
        }
820
821 2
        return self::int_to_hex(self::ord((string) $char), $prefix);
822
    }
823
824
    /**
825
     * alias for "UTF8::chr_to_decimal()"
826
     *
827
     * @param string $chr
828
     *
829
     * @psalm-pure
830
     *
831
     * @return int
832
     *
833
     * @see        UTF8::chr_to_decimal()
834
     * @deprecated <p>please use "UTF8::chr_to_decimal()"</p>
835
     */
836 2
    public static function chr_to_int(string $chr): int
837
    {
838 2
        return self::chr_to_decimal($chr);
839
    }
840
841
    /**
842
     * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
843
     *
844
     * EXAMPLE: <code>UTF8::chunk_split('ABC-ÖÄÜ-中文空白-κόσμε', 3); // "ABC\r\n-ÖÄ\r\nÜ-中\r\n文空白\r\n-κό\r\nσμε"</code>
845
     *
846
     * @param string $body         <p>The original string to be split.</p>
847
     * @param int    $chunk_length [optional] <p>The maximum character length of a chunk.</p>
848
     * @param string $end          [optional] <p>The character(s) to be inserted at the end of each chunk.</p>
849
     *
850
     * @psalm-pure
851
     *
852
     * @return string
853
     *                <p>The chunked string.</p>
854
     */
855 4
    public static function chunk_split(string $body, int $chunk_length = 76, string $end = "\r\n"): string
856
    {
857 4
        return \implode($end, self::str_split($body, $chunk_length));
858
    }
859
860
    /**
861
     * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
862
     *
863
     * EXAMPLE: <code>UTF8::clean("\xEF\xBB\xBF„Abcdef\xc2\xa0\x20…” — 😃 - Düsseldorf", true, true); // '„Abcdef  …” — 😃 - Düsseldorf'</code>
864
     *
865
     * @param string $str                                     <p>The string to be sanitized.</p>
866
     * @param bool   $remove_bom                              [optional] <p>Set to true, if you need to remove
867
     *                                                        UTF-BOM.</p>
868
     * @param bool   $normalize_whitespace                    [optional] <p>Set to true, if you need to normalize the
869
     *                                                        whitespace.</p>
870
     * @param bool   $normalize_msword                        [optional] <p>Set to true, if you need to normalize MS
871
     *                                                        Word chars e.g.: "…"
872
     *                                                        => "..."</p>
873
     * @param bool   $keep_non_breaking_space                 [optional] <p>Set to true, to keep non-breaking-spaces,
874
     *                                                        in
875
     *                                                        combination with
876
     *                                                        $normalize_whitespace</p>
877
     * @param bool   $replace_diamond_question_mark           [optional] <p>Set to true, if you need to remove diamond
878
     *                                                        question mark e.g.: "�"</p>
879
     * @param bool   $remove_invisible_characters             [optional] <p>Set to false, if you not want to remove
880
     *                                                        invisible characters e.g.: "\0"</p>
881
     * @param bool   $remove_invisible_characters_url_encoded [optional] <p>Set to true, if you not want to remove
882
     *                                                        invisible url encoded characters e.g.: "%0B"<br> WARNING:
883
     *                                                        maybe contains false-positives e.g. aa%0Baa -> aaaa.
884
     *                                                        </p>
885
     *
886
     * @psalm-pure
887
     *
888
     * @return string
889
     *                <p>An clean UTF-8 encoded string.</p>
890
     *
891
     * @noinspection PhpTooManyParametersInspection
892
     */
893 90
    public static function clean(
894
        string $str,
895
        bool $remove_bom = false,
896
        bool $normalize_whitespace = false,
897
        bool $normalize_msword = false,
898
        bool $keep_non_breaking_space = false,
899
        bool $replace_diamond_question_mark = false,
900
        bool $remove_invisible_characters = true,
901
        bool $remove_invisible_characters_url_encoded = false
902
    ): string {
903
        // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
904
        // caused connection reset problem on larger strings
905
906 90
        $regex = '/
907
          (
908
            (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
909
            |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
910
            |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
911
            |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
912
            ){1,100}                      # ...one or more times
913
          )
914
        | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
915
        | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
916
        /x';
917
        /** @noinspection NotOptimalRegularExpressionsInspection */
918 90
        $str = (string) \preg_replace($regex, '$1', $str);
919
920 90
        if ($replace_diamond_question_mark) {
921 33
            $str = self::replace_diamond_question_mark($str);
922
        }
923
924 90
        if ($remove_invisible_characters) {
925 90
            $str = self::remove_invisible_characters($str, $remove_invisible_characters_url_encoded);
926
        }
927
928 90
        if ($normalize_whitespace) {
929 37
            $str = self::normalize_whitespace($str, $keep_non_breaking_space);
930
        }
931
932 90
        if ($normalize_msword) {
933 4
            $str = self::normalize_msword($str);
934
        }
935
936 90
        if ($remove_bom) {
937 37
            $str = self::remove_bom($str);
938
        }
939
940 90
        return $str;
941
    }
942
943
    /**
944
     * Clean-up a string and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
945
     *
946
     * EXAMPLE: <code>UTF8::cleanup("\xEF\xBB\xBF„Abcdef\xc2\xa0\x20…” — 😃 - Düsseldorf", true, true); // '„Abcdef  …” — 😃 - Düsseldorf'</code>
947
     *
948
     * @param string $str <p>The input string.</p>
949
     *
950
     * @psalm-pure
951
     *
952
     * @return string
953
     */
954 33
    public static function cleanup($str): string
955
    {
956
        // init
957 33
        $str = (string) $str;
958
959 33
        if ($str === '') {
960 5
            return '';
961
        }
962
963
        // fixed ISO <-> UTF-8 Errors
964 33
        $str = self::fix_simple_utf8($str);
965
966
        // remove all none UTF-8 symbols
967
        // && remove diamond question mark (�)
968
        // && remove remove invisible characters (e.g. "\0")
969
        // && remove BOM
970
        // && normalize whitespace chars (but keep non-breaking-spaces)
971 33
        return self::clean(
972 33
            $str,
973 33
            true,
974 33
            true,
975 33
            false,
976 33
            true,
977 33
            true
978
        );
979
    }
980
981
    /**
982
     * Accepts a string or a array of strings and returns an array of Unicode code points.
983
     *
984
     * INFO: opposite to UTF8::string()
985
     *
986
     * EXAMPLE: <code>
987
     * UTF8::codepoints('κöñ'); // array(954, 246, 241)
988
     * // ... OR ...
989
     * UTF8::codepoints('κöñ', true); // array('U+03ba', 'U+00f6', 'U+00f1')
990
     * </code>
991
     *
992
     * @param string|string[] $arg         <p>A UTF-8 encoded string or an array of such strings.</p>
993
     * @param bool            $use_u_style <p>If True, will return code points in U+xxxx format,
994
     *                                     default, code points will be returned as integers.</p>
995
     *
996
     * @psalm-pure
997
     *
998
     * @return int[]|string[]
999
     *                        <p>
1000
     *                        The array of code points:<br>
1001
     *                        int[] for $u_style === false<br>
1002
     *                        string[] for $u_style === true<br>
1003
     *                        </p>
1004
     */
1005 12
    public static function codepoints($arg, bool $use_u_style = false): array
1006
    {
1007 12
        if (\is_string($arg)) {
1008 12
            $arg = self::str_split($arg);
1009
        }
1010
1011
        /**
1012
         * @psalm-suppress DocblockTypeContradiction
1013
         */
1014 12
        if (!\is_array($arg)) {
0 ignored issues
show
introduced by
The condition is_array($arg) is always true.
Loading history...
1015 4
            return [];
1016
        }
1017
1018 12
        if ($arg === []) {
1019 7
            return [];
1020
        }
1021
1022 11
        $arg = \array_map(
1023
            [
1024 11
                self::class,
1025
                'ord',
1026
            ],
1027 11
            $arg
1028
        );
1029
1030 11
        if ($use_u_style) {
1031 2
            $arg = \array_map(
1032
                [
1033 2
                    self::class,
1034
                    'int_to_hex',
1035
                ],
1036 2
                $arg
1037
            );
1038
        }
1039
1040 11
        return $arg;
1041
    }
1042
1043
    /**
1044
     * Trims the string and replaces consecutive whitespace characters with a
1045
     * single space. This includes tabs and newline characters, as well as
1046
     * multibyte whitespace such as the thin space and ideographic space.
1047
     *
1048
     * @param string $str <p>The input string.</p>
1049
     *
1050
     * @psalm-pure
1051
     *
1052
     * @return string
1053
     *                <p>A string with trimmed $str and condensed whitespace.</p>
1054
     */
1055 13
    public static function collapse_whitespace(string $str): string
1056
    {
1057 13
        if (self::$SUPPORT['mbstring'] === true) {
1058
            /** @noinspection PhpComposerExtensionStubsInspection */
1059 13
            return \trim((string) \mb_ereg_replace('[[:space:]]+', ' ', $str));
1060
        }
1061
1062
        return \trim(self::regex_replace($str, '[[:space:]]+', ' '));
1063
    }
1064
1065
    /**
1066
     * Returns count of characters used in a string.
1067
     *
1068
     * EXAMPLE: <code>UTF8::count_chars('κaκbκc'); // array('κ' => 3, 'a' => 1, 'b' => 1, 'c' => 1)</code>
1069
     *
1070
     * @param string $str                     <p>The input string.</p>
1071
     * @param bool   $clean_utf8              [optional] <p>Remove non UTF-8 chars from the string.</p>
1072
     * @param bool   $try_to_use_mb_functions [optional] <p>Set to false, if you don't want to use
1073
     *
1074
     * @psalm-pure
1075
     *
1076
     * @return int[]
1077
     *               <p>An associative array of Character as keys and
1078
     *               their count as values.</p>
1079
     */
1080 19
    public static function count_chars(
1081
        string $str,
1082
        bool $clean_utf8 = false,
1083
        bool $try_to_use_mb_functions = true
1084
    ): array {
1085 19
        return \array_count_values(
1086 19
            self::str_split(
1087 19
                $str,
1088 19
                1,
1089 19
                $clean_utf8,
1090 19
                $try_to_use_mb_functions
1091
            )
1092
        );
1093
    }
1094
1095
    /**
1096
     * Create a valid CSS identifier for e.g. "class"- or "id"-attributes.
1097
     *
1098
     * EXAMPLE: <code>UTF8::css_identifier('123foo/bar!!!'); // _23foo-bar</code>
1099
     *
1100
     * copy&past from https://github.com/drupal/core/blob/8.8.x/lib/Drupal/Component/Utility/Html.php#L95
1101
     *
1102
     * @param string   $str        <p>INFO: if no identifier is given e.g. " " or "", we will create a unique string automatically</p>
1103
     * @param string[] $filter
1104
     * @param bool     $strip_tags
1105
     * @param bool     $strtolower
1106
     *
1107
     * @psalm-pure
1108
     *
1109
     * @return string
1110
     *
1111
     * @phpstan-param array<string,string> $filter
1112
     */
1113 1
    public static function css_identifier(
1114
        string $str = '',
1115
        array $filter = [
1116
            ' ' => '-',
1117
            '/' => '-',
1118
            '[' => '',
1119
            ']' => '',
1120
        ],
1121
        bool $strip_tags = false,
1122
        bool $strtolower = true
1123
    ): string {
1124
        // We could also use strtr() here but its much slower than str_replace(). In
1125
        // order to keep '__' to stay '__' we first replace it with a different
1126
        // placeholder after checking that it is not defined as a filter.
1127 1
        $double_underscore_replacements = 0;
1128
1129
        // Fallback ...
1130 1
        if (\trim($str) === '') {
1131 1
            $str = \uniqid('auto-generated-css-class', true);
1132
        } else {
1133 1
            $str = self::clean($str);
1134
        }
1135
1136 1
        if ($strip_tags) {
1137
            $str = \strip_tags($str);
1138
        }
1139
1140 1
        if ($strtolower) {
1141 1
            $str = \strtolower($str);
1142
        }
1143
1144 1
        if (!isset($filter['__'])) {
1145 1
            $str = \str_replace('__', '##', $str, $double_underscore_replacements);
1146
        }
1147
1148
        /* @noinspection ArrayValuesMissUseInspection */
1149 1
        $str = \str_replace(\array_keys($filter), \array_values($filter), $str);
1150
        // Replace temporary placeholder '##' with '__' only if the original
1151
        // $identifier contained '__'.
1152 1
        if ($double_underscore_replacements > 0) {
1153
            $str = \str_replace('##', '__', $str);
1154
        }
1155
1156
        // Valid characters in a CSS identifier are:
1157
        // - the hyphen (U+002D)
1158
        // - a-z (U+0030 - U+0039)
1159
        // - A-Z (U+0041 - U+005A)
1160
        // - the underscore (U+005F)
1161
        // - 0-9 (U+0061 - U+007A)
1162
        // - ISO 10646 characters U+00A1 and higher
1163
        // We strip out any character not in the above list.
1164 1
        $str = (string) \preg_replace('/[^\x{002D}\x{0030}-\x{0039}\x{0041}-\x{005A}\x{005F}\x{0061}-\x{007A}\x{00A1}-\x{FFFF}]/u', '', $str);
1165
        // Identifiers cannot start with a digit, two hyphens, or a hyphen followed by a digit.
1166 1
        $str = (string) \preg_replace(['/^[0-9]/', '/^(-[0-9])|^(--)/'], ['_', '__'], $str);
1167
1168 1
        return \trim($str, '-');
1169
    }
1170
1171
    /**
1172
     * Remove css media-queries.
1173
     *
1174
     * @param string $str
1175
     *
1176
     * @psalm-pure
1177
     *
1178
     * @return string
1179
     */
1180 1
    public static function css_stripe_media_queries(string $str): string
1181
    {
1182 1
        return (string) \preg_replace(
1183 1
            '#@media\\s+(?:only\\s)?(?:[\\s{(]|screen|all)\\s?[^{]+{.*}\\s*}\\s*#isumU',
1184 1
            '',
1185 1
            $str
1186
        );
1187
    }
1188
1189
    /**
1190
     * Checks whether ctype is available on the server.
1191
     *
1192
     * @psalm-pure
1193
     *
1194
     * @return bool
1195
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
1196
     *
1197
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
1198
     */
1199
    public static function ctype_loaded(): bool
1200
    {
1201
        return \extension_loaded('ctype');
1202
    }
1203
1204
    /**
1205
     * Converts an int value into a UTF-8 character.
1206
     *
1207
     * INFO: opposite to UTF8::string()
1208
     *
1209
     * EXAMPLE: <code>UTF8::decimal_to_chr(931); // 'Σ'</code>
1210
     *
1211
     * @param int|string $int
1212
     *
1213
     * @phpstan-param int|numeric-string $int
1214
     *
1215
     * @psalm-pure
1216
     *
1217
     * @return string
1218
     */
1219 20
    public static function decimal_to_chr($int): string
1220
    {
1221 20
        return self::html_entity_decode('&#' . $int . ';', \ENT_QUOTES | \ENT_HTML5);
1222
    }
1223
1224
    /**
1225
     * Decodes a MIME header field
1226
     *
1227
     * @param string $str
1228
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
1229
     *
1230
     * @psalm-pure
1231
     *
1232
     * @return false|string
1233
     *                      <p>A decoded MIME field on success,
1234
     *                      or false if an error occurs during the decoding.</p>
1235
     */
1236 2
    public static function decode_mimeheader($str, string $encoding = 'UTF-8')
1237
    {
1238 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
1239 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
1240
        }
1241
1242
        // always fallback via symfony polyfill
1243 2
        return \iconv_mime_decode($str, \ICONV_MIME_DECODE_CONTINUE_ON_ERROR, $encoding);
1244
    }
1245
1246
    /**
1247
     * Convert any two-letter country code (ISO 3166-1) to the corresponding Emoji.
1248
     *
1249
     * @see https://en.wikipedia.org/wiki/ISO_3166-1
1250
     *
1251
     * @param string $country_code_iso_3166_1 <p>e.g. DE</p>
1252
     *
1253
     * @return string
1254
     *                <p>Emoji or empty string on error.</p>
1255
     */
1256 1
    public static function emoji_from_country_code(string $country_code_iso_3166_1): string
1257
    {
1258 1
        if ($country_code_iso_3166_1 === '') {
1259 1
            return '';
1260
        }
1261
1262 1
        if (self::strlen($country_code_iso_3166_1) !== 2) {
1263 1
            return '';
1264
        }
1265
1266 1
        $country_code_iso_3166_1 = \strtoupper($country_code_iso_3166_1);
1267
1268 1
        $flagOffset = 0x1F1E6;
1269 1
        $asciiOffset = 0x41;
1270
1271 1
        return (self::chr((self::ord($country_code_iso_3166_1[0]) - $asciiOffset + $flagOffset)) ?? '') .
1272 1
               (self::chr((self::ord($country_code_iso_3166_1[1]) - $asciiOffset + $flagOffset)) ?? '');
1273
    }
1274
1275
    /**
1276
     * Decodes a string which was encoded by "UTF8::emoji_encode()".
1277
     *
1278
     * INFO: opposite to UTF8::emoji_encode()
1279
     *
1280
     * EXAMPLE: <code>
1281
     * UTF8::emoji_decode('foo CHARACTER_OGRE', false); // 'foo 👹'
1282
     * //
1283
     * UTF8::emoji_decode('foo _-_PORTABLE_UTF8_-_308095726_-_627590803_-_8FTU_ELBATROP_-_', true); // 'foo 👹'
1284
     * </code>
1285
     *
1286
     * @param string $str                            <p>The input string.</p>
1287
     * @param bool   $use_reversible_string_mappings [optional] <p>
1288
     *                                               When <b>TRUE</b>, we se a reversible string mapping
1289
     *                                               between "emoji_encode" and "emoji_decode".</p>
1290
     *
1291
     * @psalm-pure
1292
     *
1293
     * @return string
1294
     */
1295 9
    public static function emoji_decode(
1296
        string $str,
1297
        bool $use_reversible_string_mappings = false
1298
    ): string {
1299 9
        self::initEmojiData();
1300
1301 9
        if ($use_reversible_string_mappings) {
1302 9
            return (string) \str_replace(
1303 9
                (array) self::$EMOJI_KEYS_REVERSIBLE_CACHE,
1304 9
                (array) self::$EMOJI_VALUES_CACHE,
1305 9
                $str
1306
            );
1307
        }
1308
1309 1
        return (string) \str_replace(
1310 1
            (array) self::$EMOJI_KEYS_CACHE,
1311 1
            (array) self::$EMOJI_VALUES_CACHE,
1312 1
            $str
1313
        );
1314
    }
1315
1316
    /**
1317
     * Encode a string with emoji chars into a non-emoji string.
1318
     *
1319
     * INFO: opposite to UTF8::emoji_decode()
1320
     *
1321
     * EXAMPLE: <code>
1322
     * UTF8::emoji_encode('foo 👹', false)); // 'foo CHARACTER_OGRE'
1323
     * //
1324
     * UTF8::emoji_encode('foo 👹', true)); // 'foo _-_PORTABLE_UTF8_-_308095726_-_627590803_-_8FTU_ELBATROP_-_'
1325
     * </code>
1326
     *
1327
     * @param string $str                            <p>The input string</p>
1328
     * @param bool   $use_reversible_string_mappings [optional] <p>
1329
     *                                               when <b>TRUE</b>, we use a reversible string mapping
1330
     *                                               between "emoji_encode" and "emoji_decode"</p>
1331
     *
1332
     * @psalm-pure
1333
     *
1334
     * @return string
1335
     */
1336 12
    public static function emoji_encode(
1337
        string $str,
1338
        bool $use_reversible_string_mappings = false
1339
    ): string {
1340 12
        self::initEmojiData();
1341
1342 12
        if ($use_reversible_string_mappings) {
1343 9
            return (string) \str_replace(
1344 9
                (array) self::$EMOJI_VALUES_CACHE,
1345 9
                (array) self::$EMOJI_KEYS_REVERSIBLE_CACHE,
1346 9
                $str
1347
            );
1348
        }
1349
1350 4
        return (string) \str_replace(
1351 4
            (array) self::$EMOJI_VALUES_CACHE,
1352 4
            (array) self::$EMOJI_KEYS_CACHE,
1353 4
            $str
1354
        );
1355
    }
1356
1357
    /**
1358
     * Encode a string with a new charset-encoding.
1359
     *
1360
     * INFO:  This function will also try to fix broken / double encoding,
1361
     *        so you can call this function also on a UTF-8 string and you don't mess up the string.
1362
     *
1363
     * EXAMPLE: <code>
1364
     * UTF8::encode('ISO-8859-1', '-ABC-中文空白-'); // '-ABC-????-'
1365
     * //
1366
     * UTF8::encode('UTF-8', '-ABC-中文空白-'); // '-ABC-中文空白-'
1367
     * //
1368
     * UTF8::encode('HTML', '-ABC-中文空白-'); // '-ABC-&#20013;&#25991;&#31354;&#30333;-'
1369
     * //
1370
     * UTF8::encode('BASE64', '-ABC-中文空白-'); // 'LUFCQy3kuK3mlofnqbrnmb0t'
1371
     * </code>
1372
     *
1373
     * @param string $to_encoding                   <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.</p>
1374
     * @param string $str                           <p>The input string</p>
1375
     * @param bool   $auto_detect_the_from_encoding [optional] <p>Force the new encoding (we try to fix broken / double
1376
     *                                              encoding for UTF-8)<br> otherwise we auto-detect the current
1377
     *                                              string-encoding</p>
1378
     * @param string $from_encoding                 [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
1379
     *                                              A empty string will trigger the autodetect anyway.</p>
1380
     *
1381
     * @psalm-pure
1382
     *
1383
     * @return string
1384
     *
1385
     * @psalm-suppress InvalidReturnStatement
1386
     */
1387 29
    public static function encode(
1388
        string $to_encoding,
1389
        string $str,
1390
        bool $auto_detect_the_from_encoding = true,
1391
        string $from_encoding = ''
1392
    ): string {
1393 29
        if ($str === '' || $to_encoding === '') {
1394 13
            return $str;
1395
        }
1396
1397 29
        if ($to_encoding !== 'UTF-8' && $to_encoding !== 'CP850') {
1398 7
            $to_encoding = self::normalize_encoding($to_encoding, 'UTF-8');
1399
        }
1400
1401 29
        if ($from_encoding && $from_encoding !== 'UTF-8' && $from_encoding !== 'CP850') {
1402 2
            $from_encoding = self::normalize_encoding($from_encoding);
1403
        }
1404
1405
        if (
1406 29
            $to_encoding
1407
            &&
1408 29
            $from_encoding
1409
            &&
1410 29
            $from_encoding === $to_encoding
1411
        ) {
1412
            return $str;
1413
        }
1414
1415 29
        if ($to_encoding === 'JSON') {
1416 1
            $return = self::json_encode($str);
1417 1
            if ($return === false) {
1418
                throw new \InvalidArgumentException('The input string [' . $str . '] can not be used for json_encode().');
1419
            }
1420
1421 1
            return $return;
1422
        }
1423 29
        if ($from_encoding === 'JSON') {
1424 1
            $str = self::json_decode($str);
1425 1
            $from_encoding = '';
1426
        }
1427
1428 29
        if ($to_encoding === 'BASE64') {
1429 2
            return \base64_encode($str);
1430
        }
1431 29
        if ($from_encoding === 'BASE64') {
1432 2
            $str = \base64_decode($str, true);
1433 2
            $from_encoding = '';
1434
        }
1435
1436 29
        if ($to_encoding === 'HTML-ENTITIES') {
1437 2
            return self::html_encode($str, true);
1438
        }
1439 29
        if ($from_encoding === 'HTML-ENTITIES') {
1440 2
            $str = self::html_entity_decode($str, \ENT_COMPAT);
1441 2
            $from_encoding = '';
1442
        }
1443
1444 29
        $from_encoding_auto_detected = false;
1445
        if (
1446 29
            $auto_detect_the_from_encoding
1447
            ||
1448 29
            !$from_encoding
1449
        ) {
1450 29
            $from_encoding_auto_detected = self::str_detect_encoding($str);
1451
        }
1452
1453
        // DEBUG
1454
        //var_dump($to_encoding, $from_encoding, $from_encoding_auto_detected, $str, "\n\n");
1455
1456 29
        if ($from_encoding_auto_detected !== false) {
1457
            /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
1458 25
            $from_encoding = $from_encoding_auto_detected;
1459 7
        } elseif ($auto_detect_the_from_encoding) {
1460
            // fallback for the "autodetect"-mode
1461 7
            return self::to_utf8($str);
1462
        }
1463
1464
        if (
1465 25
            !$from_encoding
1466
            ||
1467 25
            $from_encoding === $to_encoding
1468
        ) {
1469 15
            return $str;
1470
        }
1471
1472
        if (
1473 20
            $to_encoding === 'UTF-8'
1474
            &&
1475
            (
1476 18
                $from_encoding === 'WINDOWS-1252'
1477
                ||
1478 20
                $from_encoding === 'ISO-8859-1'
1479
            )
1480
        ) {
1481 14
            return self::to_utf8($str);
1482
        }
1483
1484
        if (
1485 12
            $to_encoding === 'ISO-8859-1'
1486
            &&
1487
            (
1488 6
                $from_encoding === 'WINDOWS-1252'
1489
                ||
1490 12
                $from_encoding === 'UTF-8'
1491
            )
1492
        ) {
1493 6
            return self::to_iso8859($str);
1494
        }
1495
1496
        /** @noinspection InArrayCanBeUsedInspection */
1497
        if (
1498 10
            $to_encoding !== 'UTF-8'
1499
            &&
1500 10
            $to_encoding !== 'ISO-8859-1'
1501
            &&
1502 10
            $to_encoding !== 'WINDOWS-1252'
1503
            &&
1504 10
            self::$SUPPORT['mbstring'] === false
1505
        ) {
1506
            /**
1507
             * @psalm-suppress ImpureFunctionCall - is is only a warning
1508
             */
1509
            \trigger_error('UTF8::encode() without mbstring cannot handle "' . $to_encoding . '" encoding', \E_USER_WARNING);
1510
        }
1511
1512 10
        if (self::$SUPPORT['mbstring'] === true) {
1513 10
            $str_encoded = \mb_convert_encoding(
1514 10
                $str,
1515 10
                $to_encoding,
1516 10
                $from_encoding
1517
            );
1518
1519 10
            if ($str_encoded) {
1520
                \assert(\is_string($str_encoded));
1521
1522 10
                return $str_encoded;
1523
            }
1524
        }
1525
1526
        /** @noinspection PhpUsageOfSilenceOperatorInspection - Detected an incomplete multibyte character in input string */
1527
        $return = @\iconv($from_encoding, $to_encoding, $str);
1528
        if ($return !== false) {
1529
            return $return;
1530
        }
1531
1532
        return $str;
1533
    }
1534
1535
    /**
1536
     * @param string $str
1537
     * @param string $from_charset      [optional] <p>Set the input charset.</p>
1538
     * @param string $to_charset        [optional] <p>Set the output charset.</p>
1539
     * @param string $transfer_encoding [optional] <p>Set the transfer encoding.</p>
1540
     * @param string $linefeed          [optional] <p>Set the used linefeed.</p>
1541
     * @param int    $indent            [optional] <p>Set the max length indent.</p>
1542
     *
1543
     * @psalm-pure
1544
     *
1545
     * @return false|string
1546
     *                      <p>An encoded MIME field on success,
1547
     *                      or false if an error occurs during the encoding.</p>
1548
     */
1549 1
    public static function encode_mimeheader(
1550
        string $str,
1551
        string $from_charset = 'UTF-8',
1552
        string $to_charset = 'UTF-8',
1553
        string $transfer_encoding = 'Q',
1554
        string $linefeed = "\r\n",
1555
        int $indent = 76
1556
    ) {
1557 1
        if ($from_charset !== 'UTF-8' && $from_charset !== 'CP850') {
1558
            $from_charset = self::normalize_encoding($from_charset, 'UTF-8');
1559
        }
1560
1561 1
        if ($to_charset !== 'UTF-8' && $to_charset !== 'CP850') {
1562 1
            $to_charset = self::normalize_encoding($to_charset, 'UTF-8');
1563
        }
1564
1565
        // always fallback via symfony polyfill
1566 1
        return \iconv_mime_encode(
1567 1
            '',
1568 1
            $str,
1569
            [
1570 1
                'scheme'           => $transfer_encoding,
1571 1
                'line-length'      => $indent,
1572 1
                'input-charset'    => $from_charset,
1573 1
                'output-charset'   => $to_charset,
1574 1
                'line-break-chars' => $linefeed,
1575
            ]
1576
        );
1577
    }
1578
1579
    /**
1580
     * Create an extract from a sentence, so if the search-string was found, it try to centered in the output.
1581
     *
1582
     * @param string   $str                       <p>The input string.</p>
1583
     * @param string   $search                    <p>The searched string.</p>
1584
     * @param int|null $length                    [optional] <p>Default: null === text->length / 2</p>
1585
     * @param string   $replacer_for_skipped_text [optional] <p>Default: …</p>
1586
     * @param string   $encoding                  [optional] <p>Set the charset for e.g. "mb_" function</p>
1587
     *
1588
     * @psalm-pure
1589
     *
1590
     * @return string
1591
     */
1592 1
    public static function extract_text(
1593
        string $str,
1594
        string $search = '',
1595
        int $length = null,
1596
        string $replacer_for_skipped_text = '…',
1597
        string $encoding = 'UTF-8'
1598
    ): string {
1599 1
        if ($str === '') {
1600 1
            return '';
1601
        }
1602
1603 1
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
1604
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
1605
        }
1606
1607 1
        $trim_chars = "\t\r\n -_()!~?=+/*\\,.:;\"'[]{}`&";
1608
1609 1
        if ($length === null) {
1610 1
            $length = (int) \round((int) self::strlen($str, $encoding) / 2);
1611
        }
1612
1613 1
        if ($search === '') {
1614 1
            if ($encoding === 'UTF-8') {
1615 1
                if ($length > 0) {
1616 1
                    $string_length = (int) \mb_strlen($str);
1617 1
                    $end = ($length - 1) > $string_length ? $string_length : ($length - 1);
1618
                } else {
1619 1
                    $end = 0;
1620
                }
1621
1622 1
                $pos = (int) \min(
1623 1
                    \mb_strpos($str, ' ', $end),
1624 1
                    \mb_strpos($str, '.', $end)
1625
                );
1626
            } else {
1627
                if ($length > 0) {
1628
                    $string_length = (int) self::strlen($str, $encoding);
1629
                    $end = ($length - 1) > $string_length ? $string_length : ($length - 1);
1630
                } else {
1631
                    $end = 0;
1632
                }
1633
1634
                $pos = (int) \min(
1635
                    self::strpos($str, ' ', $end, $encoding),
1636
                    self::strpos($str, '.', $end, $encoding)
1637
                );
1638
            }
1639
1640 1
            if ($pos) {
1641 1
                if ($encoding === 'UTF-8') {
1642 1
                    $str_sub = \mb_substr($str, 0, $pos);
1643
                } else {
1644
                    $str_sub = self::substr($str, 0, $pos, $encoding);
1645
                }
1646
1647 1
                if ($str_sub === false) {
1648
                    return '';
1649
                }
1650
1651 1
                return \rtrim($str_sub, $trim_chars) . $replacer_for_skipped_text;
1652
            }
1653
1654
            return $str;
1655
        }
1656
1657 1
        if ($encoding === 'UTF-8') {
1658 1
            $word_position = (int) \mb_stripos($str, $search);
1659 1
            $half_side = (int) ($word_position - $length / 2 + (int) \mb_strlen($search) / 2);
1660
        } else {
1661
            $word_position = (int) self::stripos($str, $search, 0, $encoding);
1662
            $half_side = (int) ($word_position - $length / 2 + (int) self::strlen($search, $encoding) / 2);
1663
        }
1664
1665 1
        $pos_start = 0;
1666 1
        if ($half_side > 0) {
1667 1
            if ($encoding === 'UTF-8') {
1668 1
                $half_text = \mb_substr($str, 0, $half_side);
1669
            } else {
1670
                $half_text = self::substr($str, 0, $half_side, $encoding);
1671
            }
1672 1
            if ($half_text !== false) {
1673 1
                if ($encoding === 'UTF-8') {
1674 1
                    $pos_start = (int) \max(
1675 1
                        \mb_strrpos($half_text, ' '),
1676 1
                        \mb_strrpos($half_text, '.')
1677
                    );
1678
                } else {
1679
                    $pos_start = (int) \max(
1680
                        self::strrpos($half_text, ' ', 0, $encoding),
1681
                        self::strrpos($half_text, '.', 0, $encoding)
1682
                    );
1683
                }
1684
            }
1685
        }
1686
1687 1
        if ($word_position && $half_side > 0) {
1688 1
            $offset = $pos_start + $length - 1;
1689 1
            $real_length = (int) self::strlen($str, $encoding);
1690
1691 1
            if ($offset > $real_length) {
1692
                $offset = $real_length;
1693
            }
1694
1695 1
            if ($encoding === 'UTF-8') {
1696 1
                $pos_end = (int) \min(
1697 1
                    \mb_strpos($str, ' ', $offset),
1698 1
                    \mb_strpos($str, '.', $offset)
1699 1
                ) - $pos_start;
1700
            } else {
1701
                $pos_end = (int) \min(
1702
                    self::strpos($str, ' ', $offset, $encoding),
1703
                    self::strpos($str, '.', $offset, $encoding)
1704
                ) - $pos_start;
1705
            }
1706
1707 1
            if (!$pos_end || $pos_end <= 0) {
1708 1
                if ($encoding === 'UTF-8') {
1709 1
                    $str_sub = \mb_substr($str, $pos_start, (int) \mb_strlen($str));
1710
                } else {
1711
                    $str_sub = self::substr($str, $pos_start, (int) self::strlen($str, $encoding), $encoding);
1712
                }
1713 1
                if ($str_sub !== false) {
1714 1
                    $extract = $replacer_for_skipped_text . \ltrim($str_sub, $trim_chars);
1715
                } else {
1716 1
                    $extract = '';
1717
                }
1718
            } else {
1719 1
                if ($encoding === 'UTF-8') {
1720 1
                    $str_sub = \mb_substr($str, $pos_start, $pos_end);
1721
                } else {
1722
                    $str_sub = self::substr($str, $pos_start, $pos_end, $encoding);
1723
                }
1724 1
                if ($str_sub !== false) {
1725 1
                    $extract = $replacer_for_skipped_text . \trim($str_sub, $trim_chars) . $replacer_for_skipped_text;
1726
                } else {
1727 1
                    $extract = '';
1728
                }
1729
            }
1730
        } else {
1731 1
            $offset = $length - 1;
1732 1
            $true_length = (int) self::strlen($str, $encoding);
1733
1734 1
            if ($offset > $true_length) {
1735
                $offset = $true_length;
1736
            }
1737
1738 1
            if ($encoding === 'UTF-8') {
1739 1
                $pos_end = (int) \min(
1740 1
                    \mb_strpos($str, ' ', $offset),
1741 1
                    \mb_strpos($str, '.', $offset)
1742
                );
1743
            } else {
1744
                $pos_end = (int) \min(
1745
                    self::strpos($str, ' ', $offset, $encoding),
1746
                    self::strpos($str, '.', $offset, $encoding)
1747
                );
1748
            }
1749
1750 1
            if ($pos_end) {
1751 1
                if ($encoding === 'UTF-8') {
1752 1
                    $str_sub = \mb_substr($str, 0, $pos_end);
1753
                } else {
1754
                    $str_sub = self::substr($str, 0, $pos_end, $encoding);
1755
                }
1756 1
                if ($str_sub !== false) {
1757 1
                    $extract = \rtrim($str_sub, $trim_chars) . $replacer_for_skipped_text;
1758
                } else {
1759 1
                    $extract = '';
1760
                }
1761
            } else {
1762 1
                $extract = $str;
1763
            }
1764
        }
1765
1766 1
        return $extract;
1767
    }
1768
1769
    /**
1770
     * Reads entire file into a string.
1771
     *
1772
     * EXAMPLE: <code>UTF8::file_get_contents('utf16le.txt'); // ...</code>
1773
     *
1774
     * WARNING: Do not use UTF-8 Option ($convert_to_utf8) for binary files (e.g.: images) !!!
1775
     *
1776
     * @see http://php.net/manual/en/function.file-get-contents.php
1777
     *
1778
     * @param string        $filename         <p>
1779
     *                                        Name of the file to read.
1780
     *                                        </p>
1781
     * @param bool          $use_include_path [optional] <p>
1782
     *                                        Prior to PHP 5, this parameter is called
1783
     *                                        use_include_path and is a bool.
1784
     *                                        As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
1785
     *                                        to trigger include path
1786
     *                                        search.
1787
     *                                        </p>
1788
     * @param resource|null $context          [optional] <p>
1789
     *                                        A valid context resource created with
1790
     *                                        stream_context_create. If you don't need to use a
1791
     *                                        custom context, you can skip this parameter by &null;.
1792
     *                                        </p>
1793
     * @param int|null      $offset           [optional] <p>
1794
     *                                        The offset where the reading starts.
1795
     *                                        </p>
1796
     * @param int|null      $max_length       [optional] <p>
1797
     *                                        Maximum length of data read. The default is to read until end
1798
     *                                        of file is reached.
1799
     *                                        </p>
1800
     * @param int           $timeout          <p>The time in seconds for the timeout.</p>
1801
     * @param bool          $convert_to_utf8  <strong>WARNING!!!</strong> <p>Maybe you can't use this option for
1802
     *                                        some files, because they used non default utf-8 chars. Binary files
1803
     *                                        like images or pdf will not be converted.</p>
1804
     * @param string        $from_encoding    [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
1805
     *                                        A empty string will trigger the autodetect anyway.</p>
1806
     *
1807
     * @psalm-pure
1808
     *
1809
     * @return false|string
1810
     *                      <p>The function returns the read data as string or <b>false</b> on failure.</p>
1811
     *
1812
     * @noinspection PhpTooManyParametersInspection
1813
     */
1814 12
    public static function file_get_contents(
1815
        string $filename,
1816
        bool $use_include_path = false,
1817
        $context = null,
1818
        int $offset = null,
1819
        int $max_length = null,
1820
        int $timeout = 10,
1821
        bool $convert_to_utf8 = true,
1822
        string $from_encoding = ''
1823
    ) {
1824
        // init
1825 12
        $filename = \filter_var($filename, \FILTER_SANITIZE_STRING);
1826
        /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
1827 12
        if ($filename === false) {
1828
            return false;
1829
        }
1830
1831 12
        if ($timeout && $context === null) {
1832 9
            $context = \stream_context_create(
1833
                [
1834
                    'http' => [
1835 9
                        'timeout' => $timeout,
1836
                    ],
1837
                ]
1838
            );
1839
        }
1840
1841 12
        if ($offset === null) {
1842 12
            $offset = 0;
1843
        }
1844
1845 12
        if (\is_int($max_length)) {
1846 2
            $data = \file_get_contents($filename, $use_include_path, $context, $offset, $max_length);
1847
        } else {
1848 12
            $data = \file_get_contents($filename, $use_include_path, $context, $offset);
1849
        }
1850
1851
        // return false on error
1852 12
        if ($data === false) {
1853
            return false;
1854
        }
1855
1856 12
        if ($convert_to_utf8) {
1857
            if (
1858 12
                !self::is_binary($data, true)
1859
                ||
1860 9
                self::is_utf16($data, false) !== false
1861
                ||
1862 12
                self::is_utf32($data, false) !== false
1863
            ) {
1864 9
                $data = self::encode('UTF-8', $data, false, $from_encoding);
1865 9
                $data = self::cleanup($data);
1866
            }
1867
        }
1868
1869 12
        return $data;
1870
    }
1871
1872
    /**
1873
     * Checks if a file starts with BOM (Byte Order Mark) character.
1874
     *
1875
     * EXAMPLE: <code>UTF8::file_has_bom('utf8_with_bom.txt'); // true</code>
1876
     *
1877
     * @param string $file_path <p>Path to a valid file.</p>
1878
     *
1879
     * @throws \RuntimeException if file_get_contents() returned false
1880
     *
1881
     * @return bool
1882
     *              <p><strong>true</strong> if the file has BOM at the start, <strong>false</strong> otherwise</p>
1883
     *
1884
     * @psalm-pure
1885
     */
1886 2
    public static function file_has_bom(string $file_path): bool
1887
    {
1888 2
        $file_content = \file_get_contents($file_path);
1889 2
        if ($file_content === false) {
1890
            throw new \RuntimeException('file_get_contents() returned false for:' . $file_path);
1891
        }
1892
1893 2
        return self::string_has_bom($file_content);
1894
    }
1895
1896
    /**
1897
     * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1898
     *
1899
     * EXAMPLE: <code>UTF8::filter(array("\xE9", 'à', 'a')); // array('é', 'à', 'a')</code>
1900
     *
1901
     * @param array|object|string $var
1902
     * @param int                 $normalization_form
1903
     * @param string              $leading_combining
1904
     *
1905
     * @psalm-pure
1906
     *
1907
     * @return mixed
1908
     *
1909
     * @template TFilter
1910
     * @phpstan-param TFilter $var
1911
     * @phpstan-return TFilter
1912
     */
1913 65
    public static function filter(
1914
        $var,
1915
        int $normalization_form = \Normalizer::NFC,
1916
        string $leading_combining = '◌'
1917
    ) {
1918 65
        switch (\gettype($var)) {
1919 65
            case 'object':
1920 65
            case 'array':
1921 6
                foreach ($var as $k => &$v) {
1922 6
                    $v = self::filter($v, $normalization_form, $leading_combining);
1923
                }
1924 6
                unset($v);
1925
1926 6
                break;
1927 65
            case 'string':
1928
1929 63
                if (\strpos($var, "\r") !== false) {
1930 3
                    $var = self::normalize_line_ending($var);
1931
                }
1932
1933 63
                if (!ASCII::is_ascii($var)) {
1934 33
                    if (\Normalizer::isNormalized($var, $normalization_form)) {
1935 27
                        $n = '-';
1936
                    } else {
1937 13
                        $n = \Normalizer::normalize($var, $normalization_form);
1938
1939 13
                        if (isset($n[0])) {
1940 7
                            $var = $n;
1941
                        } else {
1942 9
                            $var = self::encode('UTF-8', $var);
1943
                        }
1944
                    }
1945
1946
                    \assert(\is_string($var));
1947
                    if (
1948 33
                        $var[0] >= "\x80"
1949
                        &&
1950 33
                        isset($n[0], $leading_combining[0])
1951
                        &&
1952 33
                        \preg_match('/^\\p{Mn}/u', $var)
1953
                    ) {
1954
                        // Prevent leading combining chars
1955
                        // for NFC-safe concatenations.
1956 3
                        $var = $leading_combining . $var;
1957
                    }
1958
                }
1959
1960 63
                break;
1961
            default:
1962
                // nothing
1963
        }
1964
1965
        /** @noinspection PhpSillyAssignmentInspection */
1966
        /** @phpstan-var TFilter $var */
1967 65
        $var = $var;
1968
1969 65
        return $var;
1970
    }
1971
1972
    /**
1973
     * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1974
     *
1975
     * Gets a specific external variable by name and optionally filters it.
1976
     *
1977
     * EXAMPLE: <code>
1978
     * // _GET['foo'] = 'bar';
1979
     * UTF8::filter_input(INPUT_GET, 'foo', FILTER_SANITIZE_STRING)); // 'bar'
1980
     * </code>
1981
     *
1982
     * @see http://php.net/manual/en/function.filter-input.php
1983
     *
1984
     * @param int            $type          <p>
1985
     *                                      One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1986
     *                                      <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1987
     *                                      <b>INPUT_ENV</b>.
1988
     *                                      </p>
1989
     * @param string         $variable_name <p>
1990
     *                                      Name of a variable to get.
1991
     *                                      </p>
1992
     * @param int            $filter        [optional] <p>
1993
     *                                      The ID of the filter to apply. The
1994
     *                                      manual page lists the available filters.
1995
     *                                      </p>
1996
     * @param int|int[]|null $options       [optional] <p>
1997
     *                                      Associative array of options or bitwise disjunction of flags. If filter
1998
     *                                      accepts options, flags can be provided in "flags" field of array.
1999
     *                                      </p>
2000
     *
2001
     * @psalm-pure
2002
     *
2003
     * @return mixed
2004
     *               <p>
2005
     *               Value of the requested variable on success, <b>FALSE</b> if the filter fails, or <b>NULL</b> if the
2006
     *               <i>variable_name</i> variable is not set. If the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it
2007
     *               returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter fails.
2008
     *               </p>
2009
     */
2010 1
    public static function filter_input(
2011
        int $type,
2012
        string $variable_name,
2013
        int $filter = \FILTER_DEFAULT,
2014
        $options = null
2015
    ) {
2016
        /**
2017
         * @psalm-suppress ImpureFunctionCall - we use func_num_args only for args count matching here
2018
         */
2019 1
        if ($options === null || \func_num_args() < 4) {
2020 1
            $var = \filter_input($type, $variable_name, $filter);
2021
        } else {
2022
            $var = \filter_input($type, $variable_name, $filter, $options);
2023
        }
2024
2025 1
        return self::filter($var);
2026
    }
2027
2028
    /**
2029
     * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2030
     *
2031
     * Gets external variables and optionally filters them.
2032
     *
2033
     * EXAMPLE: <code>
2034
     * // _GET['foo'] = 'bar';
2035
     * UTF8::filter_input_array(INPUT_GET, array('foo' => 'FILTER_SANITIZE_STRING')); // array('bar')
2036
     * </code>
2037
     *
2038
     * @see http://php.net/manual/en/function.filter-input-array.php
2039
     *
2040
     * @param int        $type       <p>
2041
     *                               One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
2042
     *                               <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
2043
     *                               <b>INPUT_ENV</b>.
2044
     *                               </p>
2045
     * @param array|null $definition [optional] <p>
2046
     *                               An array defining the arguments. A valid key is a string
2047
     *                               containing a variable name and a valid value is either a filter type, or an array
2048
     *                               optionally specifying the filter, flags and options. If the value is an
2049
     *                               array, valid keys are filter which specifies the
2050
     *                               filter type,
2051
     *                               flags which specifies any flags that apply to the
2052
     *                               filter, and options which specifies any options that
2053
     *                               apply to the filter. See the example below for a better understanding.
2054
     *                               </p>
2055
     *                               <p>
2056
     *                               This parameter can be also an integer holding a filter constant. Then all values in the
2057
     *                               input array are filtered by this filter.
2058
     *                               </p>
2059
     * @param bool       $add_empty  [optional] <p>
2060
     *                               Add missing keys as <b>NULL</b> to the return value.
2061
     *                               </p>
2062
     *
2063
     * @psalm-pure
2064
     *
2065
     * @return mixed
2066
     *               <p>
2067
     *               An array containing the values of the requested variables on success, or <b>FALSE</b> on failure.
2068
     *               An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not
2069
     *               set. Or if the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it returns <b>FALSE</b> if the variable
2070
     *               is not set and <b>NULL</b> if the filter fails.
2071
     *               </p>
2072
     */
2073 1
    public static function filter_input_array(
2074
        int $type,
2075
        $definition = null,
2076
        bool $add_empty = true
2077
    ) {
2078
        /**
2079
         * @psalm-suppress ImpureFunctionCall - we use func_num_args only for args count matching here
2080
         */
2081 1
        if ($definition === null || \func_num_args() < 2) {
2082
            $a = \filter_input_array($type);
2083
        } else {
2084 1
            $a = \filter_input_array($type, $definition, $add_empty);
2085
        }
2086
2087 1
        return self::filter($a);
2088
    }
2089
2090
    /**
2091
     * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2092
     *
2093
     * Filters a variable with a specified filter.
2094
     *
2095
     * EXAMPLE: <code>UTF8::filter_var('-ABC-中文空白-', FILTER_VALIDATE_URL); // false</code>
2096
     *
2097
     * @see http://php.net/manual/en/function.filter-var.php
2098
     *
2099
     * @param float|int|string|null $variable <p>
2100
     *                                        Value to filter.
2101
     *                                        </p>
2102
     * @param int                   $filter   [optional] <p>
2103
     *                                        The ID of the filter to apply. The
2104
     *                                        manual page lists the available filters.
2105
     *                                        </p>
2106
     * @param int|int[]|null        $options  [optional] <p>
2107
     *                                        Associative array of options or bitwise disjunction of flags. If filter
2108
     *                                        accepts options, flags can be provided in "flags" field of array. For
2109
     *                                        the "callback" filter, callable type should be passed. The
2110
     *                                        callback must accept one argument, the value to be filtered, and return
2111
     *                                        the value after filtering/sanitizing it.
2112
     *                                        </p>
2113
     *                                        <p>
2114
     *                                        <code>
2115
     *                                        // for filters that accept options, use this format
2116
     *                                        $options = array(
2117
     *                                        'options' => array(
2118
     *                                        'default' => 3, // value to return if the filter fails
2119
     *                                        // other options here
2120
     *                                        'min_range' => 0
2121
     *                                        ),
2122
     *                                        'flags' => FILTER_FLAG_ALLOW_OCTAL,
2123
     *                                        );
2124
     *                                        $var = filter_var('0755', FILTER_VALIDATE_INT, $options);
2125
     *                                        // for filter that only accept flags, you can pass them directly
2126
     *                                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);
2127
     *                                        // for filter that only accept flags, you can also pass as an array
2128
     *                                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN,
2129
     *                                        array('flags' => FILTER_NULL_ON_FAILURE));
2130
     *                                        // callback validate filter
2131
     *                                        function foo($value)
2132
     *                                        {
2133
     *                                        // Expected format: Surname, GivenNames
2134
     *                                        if (strpos($value, ", ") === false) return false;
2135
     *                                        list($surname, $givennames) = explode(", ", $value, 2);
2136
     *                                        $empty = (empty($surname) || empty($givennames));
2137
     *                                        $notstrings = (!is_string($surname) || !is_string($givennames));
2138
     *                                        if ($empty || $notstrings) {
2139
     *                                        return false;
2140
     *                                        } else {
2141
     *                                        return $value;
2142
     *                                        }
2143
     *                                        }
2144
     *                                        $var = filter_var('Doe, Jane Sue', FILTER_CALLBACK, array('options' => 'foo'));
2145
     *                                        </code>
2146
     *                                        </p>
2147
     *
2148
     * @psalm-pure
2149
     *
2150
     * @return mixed
2151
     *               <p>The filtered data, or <b>FALSE</b> if the filter fails.</p>
2152
     */
2153 2
    public static function filter_var(
2154
        $variable,
2155
        int $filter = \FILTER_DEFAULT,
2156
        $options = null
2157
    ) {
2158
        /**
2159
         * @psalm-suppress ImpureFunctionCall - we use func_num_args only for args count matching here
2160
         */
2161 2
        if (\func_num_args() < 3) {
2162 2
            $variable = \filter_var($variable, $filter);
2163
        } else {
2164 2
            $variable = \filter_var($variable, $filter, $options);
0 ignored issues
show
Bug introduced by
It seems like $options can also be of type null; however, parameter $options of filter_var() does only seem to accept array|integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

2164
            $variable = \filter_var($variable, $filter, /** @scrutinizer ignore-type */ $options);
Loading history...
2165
        }
2166
2167 2
        return self::filter($variable);
2168
    }
2169
2170
    /**
2171
     * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2172
     *
2173
     * Gets multiple variables and optionally filters them.
2174
     *
2175
     * EXAMPLE: <code>
2176
     * $filters = [
2177
     *     'name'  => ['filter'  => FILTER_CALLBACK, 'options' => [UTF8::class, 'ucwords']],
2178
     *     'age'   => ['filter'  => FILTER_VALIDATE_INT, 'options' => ['min_range' => 1, 'max_range' => 120]],
2179
     *     'email' => FILTER_VALIDATE_EMAIL,
2180
     * ];
2181
     *
2182
     * $data = [
2183
     *     'name' => 'κόσμε',
2184
     *     'age' => '18',
2185
     *     'email' => '[email protected]'
2186
     * ];
2187
     *
2188
     * UTF8::filter_var_array($data, $filters, true); // ['name' => 'Κόσμε', 'age' => 18, 'email' => '[email protected]']
2189
     * </code>
2190
     *
2191
     * @see http://php.net/manual/en/function.filter-var-array.php
2192
     *
2193
     * @param array<mixed>   $data       <p>
2194
     *                                   An array with string keys containing the data to filter.
2195
     *                                   </p>
2196
     * @param array|int|null $definition [optional] <p>
2197
     *                                   An array defining the arguments. A valid key is a string
2198
     *                                   containing a variable name and a valid value is either a
2199
     *                                   filter type, or an
2200
     *                                   array optionally specifying the filter, flags and options.
2201
     *                                   If the value is an array, valid keys are filter
2202
     *                                   which specifies the filter type,
2203
     *                                   flags which specifies any flags that apply to the
2204
     *                                   filter, and options which specifies any options that
2205
     *                                   apply to the filter. See the example below for a better understanding.
2206
     *                                   </p>
2207
     *                                   <p>
2208
     *                                   This parameter can be also an integer holding a filter constant. Then all values
2209
     *                                   in the input array are filtered by this filter.
2210
     *                                   </p>
2211
     * @param bool           $add_empty  [optional] <p>
2212
     *                                   Add missing keys as <b>NULL</b> to the return value.
2213
     *                                   </p>
2214
     *
2215
     * @psalm-pure
2216
     *
2217
     * @return mixed
2218
     *               <p>
2219
     *               An array containing the values of the requested variables on success, or <b>FALSE</b> on failure.
2220
     *               An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not
2221
     *               set.
2222
     *               </p>
2223
     */
2224 2
    public static function filter_var_array(
2225
        array $data,
2226
        $definition = null,
2227
        bool $add_empty = true
2228
    ) {
2229
        /**
2230
         * @psalm-suppress ImpureFunctionCall - we use func_num_args only for args count matching here
2231
         */
2232 2
        if (\func_num_args() < 2) {
2233 2
            $a = \filter_var_array($data);
2234
        } else {
2235 2
            $a = \filter_var_array($data, $definition, $add_empty);
0 ignored issues
show
Bug introduced by
It seems like $definition can also be of type null; however, parameter $options of filter_var_array() does only seem to accept array|integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

2235
            $a = \filter_var_array($data, /** @scrutinizer ignore-type */ $definition, $add_empty);
Loading history...
2236
        }
2237
2238 2
        return self::filter($a);
2239
    }
2240
2241
    /**
2242
     * Checks whether finfo is available on the server.
2243
     *
2244
     * @psalm-pure
2245
     *
2246
     * @return bool
2247
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
2248
     *
2249
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
2250
     */
2251
    public static function finfo_loaded(): bool
2252
    {
2253
        return \class_exists('finfo');
2254
    }
2255
2256
    /**
2257
     * Returns the first $n characters of the string.
2258
     *
2259
     * @param string $str      <p>The input string.</p>
2260
     * @param int    $n        <p>Number of characters to retrieve from the start.</p>
2261
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
2262
     *
2263
     * @psalm-pure
2264
     *
2265
     * @return string
2266
     */
2267 13
    public static function first_char(
2268
        string $str,
2269
        int $n = 1,
2270
        string $encoding = 'UTF-8'
2271
    ): string {
2272 13
        if ($str === '' || $n <= 0) {
2273 5
            return '';
2274
        }
2275
2276 8
        if ($encoding === 'UTF-8') {
2277 4
            return (string) \mb_substr($str, 0, $n);
2278
        }
2279
2280 4
        return (string) self::substr($str, 0, $n, $encoding);
2281
    }
2282
2283
    /**
2284
     * Check if the number of Unicode characters isn't greater than the specified integer.
2285
     *
2286
     * EXAMPLE: <code>UTF8::fits_inside('κόσμε', 6); // false</code>
2287
     *
2288
     * @param string $str      the original string to be checked
2289
     * @param int    $box_size the size in number of chars to be checked against string
2290
     *
2291
     * @psalm-pure
2292
     *
2293
     * @return bool
2294
     *              <p><strong>TRUE</strong> if string is less than or equal to $box_size, <strong>FALSE</strong> otherwise.</p>
2295
     */
2296 2
    public static function fits_inside(string $str, int $box_size): bool
2297
    {
2298 2
        return (int) self::strlen($str) <= $box_size;
2299
    }
2300
2301
    /**
2302
     * Try to fix simple broken UTF-8 strings.
2303
     *
2304
     * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
2305
     *
2306
     * EXAMPLE: <code>UTF8::fix_simple_utf8('Düsseldorf'); // 'Düsseldorf'</code>
2307
     *
2308
     * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
2309
     * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
2310
     * See: http://en.wikipedia.org/wiki/Windows-1252
2311
     *
2312
     * @param string $str <p>The input string</p>
2313
     *
2314
     * @psalm-pure
2315
     *
2316
     * @return string
2317
     */
2318 47
    public static function fix_simple_utf8(string $str): string
2319
    {
2320 47
        if ($str === '') {
2321 4
            return '';
2322
        }
2323
2324
        /**
2325
         * @psalm-suppress ImpureStaticVariable
2326
         *
2327
         * @var array<mixed>|null
2328
         */
2329 47
        static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null;
2330
2331
        /**
2332
         * @psalm-suppress ImpureStaticVariable
2333
         *
2334
         * @var array<mixed>|null
2335
         */
2336 47
        static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null;
2337
2338 47
        if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) {
2339 1
            if (self::$BROKEN_UTF8_FIX === null) {
2340 1
                self::$BROKEN_UTF8_FIX = self::getData('utf8_fix');
2341
            }
2342
2343 1
            $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = \array_keys(self::$BROKEN_UTF8_FIX);
2344 1
            $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = self::$BROKEN_UTF8_FIX;
2345
        }
2346
2347
        \assert(\is_array($BROKEN_UTF8_TO_UTF8_VALUES_CACHE));
2348
2349 47
        return \str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str);
2350
    }
2351
2352
    /**
2353
     * Fix a double (or multiple) encoded UTF8 string.
2354
     *
2355
     * EXAMPLE: <code>UTF8::fix_utf8('Fédération'); // 'Fédération'</code>
2356
     *
2357
     * @param string|string[] $str you can use a string or an array of strings
2358
     *
2359
     * @psalm-pure
2360
     *
2361
     * @return string|string[]
2362
     *                         Will return the fixed input-"array" or
2363
     *                         the fixed input-"string"
2364
     *
2365
     * @psalm-suppress InvalidReturnType
2366
     */
2367 2
    public static function fix_utf8($str)
2368
    {
2369 2
        if (\is_array($str)) {
2370 2
            foreach ($str as $k => &$v) {
2371 2
                $v = self::fix_utf8($v);
2372
            }
2373 2
            unset($v);
2374
2375
            /**
2376
             * @psalm-suppress InvalidReturnStatement
2377
             */
2378 2
            return $str;
2379
        }
2380
2381 2
        $str = (string) $str;
2382 2
        $last = '';
2383 2
        while ($last !== $str) {
2384 2
            $last = $str;
2385
            /**
2386
             * @psalm-suppress PossiblyInvalidArgument
2387
             */
2388 2
            $str = self::to_utf8(
2389 2
                self::utf8_decode($str, true)
2390
            );
2391
        }
2392
2393
        /**
2394
         * @psalm-suppress InvalidReturnStatement
2395
         */
2396 2
        return $str;
2397
    }
2398
2399
    /**
2400
     * Get character of a specific character.
2401
     *
2402
     * EXAMPLE: <code>UTF8::getCharDirection('ا'); // 'RTL'</code>
2403
     *
2404
     * @param string $char
2405
     *
2406
     * @psalm-pure
2407
     *
2408
     * @return string
2409
     *                <p>'RTL' or 'LTR'.</p>
2410
     */
2411 2
    public static function getCharDirection(string $char): string
2412
    {
2413 2
        if (self::$SUPPORT['intlChar'] === true) {
2414
            /** @noinspection PhpComposerExtensionStubsInspection */
2415 2
            $tmp_return = \IntlChar::charDirection($char);
2416
2417
            // from "IntlChar"-Class
2418
            $char_direction = [
2419 2
                'RTL' => [1, 13, 14, 15, 21],
2420
                'LTR' => [0, 11, 12, 20],
2421
            ];
2422
2423 2
            if (\in_array($tmp_return, $char_direction['LTR'], true)) {
2424
                return 'LTR';
2425
            }
2426
2427 2
            if (\in_array($tmp_return, $char_direction['RTL'], true)) {
2428 2
                return 'RTL';
2429
            }
2430
        }
2431
2432 2
        $c = static::chr_to_decimal($char);
2433
2434 2
        if (!($c >= 0x5be && $c <= 0x10b7f)) {
2435 2
            return 'LTR';
2436
        }
2437
2438 2
        if ($c <= 0x85e) {
2439 2
            if ($c === 0x5be ||
2440 2
                $c === 0x5c0 ||
2441 2
                $c === 0x5c3 ||
2442 2
                $c === 0x5c6 ||
2443 2
                ($c >= 0x5d0 && $c <= 0x5ea) ||
2444 2
                ($c >= 0x5f0 && $c <= 0x5f4) ||
2445 2
                $c === 0x608 ||
2446 2
                $c === 0x60b ||
2447 2
                $c === 0x60d ||
2448 2
                $c === 0x61b ||
2449 2
                ($c >= 0x61e && $c <= 0x64a) ||
2450
                ($c >= 0x66d && $c <= 0x66f) ||
2451
                ($c >= 0x671 && $c <= 0x6d5) ||
2452
                ($c >= 0x6e5 && $c <= 0x6e6) ||
2453
                ($c >= 0x6ee && $c <= 0x6ef) ||
2454
                ($c >= 0x6fa && $c <= 0x70d) ||
2455
                $c === 0x710 ||
2456
                ($c >= 0x712 && $c <= 0x72f) ||
2457
                ($c >= 0x74d && $c <= 0x7a5) ||
2458
                $c === 0x7b1 ||
2459
                ($c >= 0x7c0 && $c <= 0x7ea) ||
2460
                ($c >= 0x7f4 && $c <= 0x7f5) ||
2461
                $c === 0x7fa ||
2462
                ($c >= 0x800 && $c <= 0x815) ||
2463
                $c === 0x81a ||
2464
                $c === 0x824 ||
2465
                $c === 0x828 ||
2466
                ($c >= 0x830 && $c <= 0x83e) ||
2467
                ($c >= 0x840 && $c <= 0x858) ||
2468 2
                $c === 0x85e
2469
            ) {
2470 2
                return 'RTL';
2471
            }
2472 2
        } elseif ($c === 0x200f) {
2473
            return 'RTL';
2474 2
        } elseif ($c >= 0xfb1d) {
2475 2
            if ($c === 0xfb1d ||
2476 2
                ($c >= 0xfb1f && $c <= 0xfb28) ||
2477 2
                ($c >= 0xfb2a && $c <= 0xfb36) ||
2478 2
                ($c >= 0xfb38 && $c <= 0xfb3c) ||
2479 2
                $c === 0xfb3e ||
2480 2
                ($c >= 0xfb40 && $c <= 0xfb41) ||
2481 2
                ($c >= 0xfb43 && $c <= 0xfb44) ||
2482 2
                ($c >= 0xfb46 && $c <= 0xfbc1) ||
2483 2
                ($c >= 0xfbd3 && $c <= 0xfd3d) ||
2484 2
                ($c >= 0xfd50 && $c <= 0xfd8f) ||
2485 2
                ($c >= 0xfd92 && $c <= 0xfdc7) ||
2486 2
                ($c >= 0xfdf0 && $c <= 0xfdfc) ||
2487 2
                ($c >= 0xfe70 && $c <= 0xfe74) ||
2488 2
                ($c >= 0xfe76 && $c <= 0xfefc) ||
2489 2
                ($c >= 0x10800 && $c <= 0x10805) ||
2490 2
                $c === 0x10808 ||
2491 2
                ($c >= 0x1080a && $c <= 0x10835) ||
2492 2
                ($c >= 0x10837 && $c <= 0x10838) ||
2493 2
                $c === 0x1083c ||
2494 2
                ($c >= 0x1083f && $c <= 0x10855) ||
2495 2
                ($c >= 0x10857 && $c <= 0x1085f) ||
2496 2
                ($c >= 0x10900 && $c <= 0x1091b) ||
2497 2
                ($c >= 0x10920 && $c <= 0x10939) ||
2498 2
                $c === 0x1093f ||
2499 2
                $c === 0x10a00 ||
2500 2
                ($c >= 0x10a10 && $c <= 0x10a13) ||
2501 2
                ($c >= 0x10a15 && $c <= 0x10a17) ||
2502 2
                ($c >= 0x10a19 && $c <= 0x10a33) ||
2503 2
                ($c >= 0x10a40 && $c <= 0x10a47) ||
2504 2
                ($c >= 0x10a50 && $c <= 0x10a58) ||
2505 2
                ($c >= 0x10a60 && $c <= 0x10a7f) ||
2506 2
                ($c >= 0x10b00 && $c <= 0x10b35) ||
2507 2
                ($c >= 0x10b40 && $c <= 0x10b55) ||
2508 2
                ($c >= 0x10b58 && $c <= 0x10b72) ||
2509 2
                ($c >= 0x10b78 && $c <= 0x10b7f)
2510
            ) {
2511 2
                return 'RTL';
2512
            }
2513
        }
2514
2515 2
        return 'LTR';
2516
    }
2517
2518
    /**
2519
     * Check for php-support.
2520
     *
2521
     * @param string|null $key
2522
     *
2523
     * @psalm-pure
2524
     *
2525
     * @return mixed
2526
     *               Return the full support-"array", if $key === null<br>
2527
     *               return bool-value, if $key is used and available<br>
2528
     *               otherwise return <strong>null</strong>
2529
     */
2530 27
    public static function getSupportInfo(string $key = null)
2531
    {
2532 27
        if ($key === null) {
2533 4
            return self::$SUPPORT;
2534
        }
2535
2536 25
        if (self::$INTL_TRANSLITERATOR_LIST === null) {
2537 1
            self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
2538
        }
2539
        // compatibility fix for old versions
2540 25
        self::$SUPPORT['intl__transliterator_list_ids'] = self::$INTL_TRANSLITERATOR_LIST;
2541
2542 25
        return self::$SUPPORT[$key] ?? null;
2543
    }
2544
2545
    /**
2546
     * Warning: this method only works for some file-types (png, jpg)
2547
     *          if you need more supported types, please use e.g. "finfo"
2548
     *
2549
     * @param string $str
2550
     * @param array  $fallback <p>with this keys: 'ext', 'mime', 'type'
2551
     *
2552
     * @psalm-pure
2553
     *
2554
     * @return null[]|string[]
2555
     *                         <p>with this keys: 'ext', 'mime', 'type'</p>
2556
     *
2557
     * @phpstan-param array{ext: null|string, mime: null|string, type: null|string} $fallback
2558
     */
2559 40
    public static function get_file_type(
2560
        string $str,
2561
        array $fallback = [
2562
            'ext'  => null,
2563
            'mime' => 'application/octet-stream',
2564
            'type' => null,
2565
        ]
2566
    ): array {
2567 40
        if ($str === '') {
2568
            return $fallback;
2569
        }
2570
2571
        /** @var false|string $str_info - needed for PhpStan (stubs error) */
2572 40
        $str_info = \substr($str, 0, 2);
2573 40
        if ($str_info === false || \strlen($str_info) !== 2) {
2574 11
            return $fallback;
2575
        }
2576
2577
        // DEBUG
2578
        //var_dump($str_info);
2579
2580 36
        $str_info = \unpack('C2chars', $str_info);
2581
2582
        /** @noinspection PhpSillyAssignmentInspection */
2583
        /** @var array|false $str_info - needed for PhpStan (stubs error) */
2584 36
        $str_info = $str_info;
2585
2586 36
        if ($str_info === false) {
2587
            return $fallback;
2588
        }
2589
        /** @noinspection OffsetOperationsInspection */
2590 36
        $type_code = (int) ($str_info['chars1'] . $str_info['chars2']);
2591
2592
        // DEBUG
2593
        //var_dump($type_code);
2594
2595
        //
2596
        // info: https://en.wikipedia.org/wiki/Magic_number_%28programming%29#Format_indicator
2597
        //
2598
        switch ($type_code) {
2599
            // WARNING: do not add too simple comparisons, because of false-positive results:
2600
            //
2601
            // 3780 => 'pdf', 7790 => 'exe', 7784 => 'midi', 8075 => 'zip',
2602
            // 8297 => 'rar', 7173 => 'gif', 7373 => 'tiff' 6677 => 'bmp', ...
2603
            //
2604 36
            case 255216:
2605
                $ext = 'jpg';
2606
                $mime = 'image/jpeg';
2607
                $type = 'binary';
2608
2609
                break;
2610 36
            case 13780:
2611 7
                $ext = 'png';
2612 7
                $mime = 'image/png';
2613 7
                $type = 'binary';
2614
2615 7
                break;
2616
            default:
2617 35
                return $fallback;
2618
        }
2619
2620
        return [
2621 7
            'ext'  => $ext,
2622 7
            'mime' => $mime,
2623 7
            'type' => $type,
2624
        ];
2625
    }
2626
2627
    /**
2628
     * @param int    $length         <p>Length of the random string.</p>
2629
     * @param string $possible_chars [optional] <p>Characters string for the random selection.</p>
2630
     * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
2631
     *
2632
     * @return string
2633
     */
2634 1
    public static function get_random_string(
2635
        int $length,
2636
        string $possible_chars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789',
2637
        string $encoding = 'UTF-8'
2638
    ): string {
2639
        // init
2640 1
        $i = 0;
2641 1
        $str = '';
2642
2643
        //
2644
        // add random chars
2645
        //
2646
2647 1
        if ($encoding === 'UTF-8') {
2648 1
            $max_length = (int) \mb_strlen($possible_chars);
2649 1
            if ($max_length === 0) {
2650 1
                return '';
2651
            }
2652
2653 1
            while ($i < $length) {
2654
                try {
2655 1
                    $rand_int = \random_int(0, $max_length - 1);
2656
                } catch (\Exception $e) {
2657
                    /** @noinspection RandomApiMigrationInspection */
2658
                    $rand_int = \mt_rand(0, $max_length - 1);
2659
                }
2660 1
                $char = \mb_substr($possible_chars, $rand_int, 1);
2661 1
                if ($char !== false) {
2662 1
                    $str .= $char;
2663 1
                    ++$i;
2664
                }
2665
            }
2666
        } else {
2667
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2668
2669
            $max_length = (int) self::strlen($possible_chars, $encoding);
2670
            if ($max_length === 0) {
2671
                return '';
2672
            }
2673
2674
            while ($i < $length) {
2675
                try {
2676
                    $rand_int = \random_int(0, $max_length - 1);
2677
                } catch (\Exception $e) {
2678
                    /** @noinspection RandomApiMigrationInspection */
2679
                    $rand_int = \mt_rand(0, $max_length - 1);
2680
                }
2681
                $char = self::substr($possible_chars, $rand_int, 1, $encoding);
2682
                if ($char !== false) {
2683
                    $str .= $char;
2684
                    ++$i;
2685
                }
2686
            }
2687
        }
2688
2689 1
        return $str;
2690
    }
2691
2692
    /**
2693
     * @param int|string $extra_entropy [optional] <p>Extra entropy via a string or int value.</p>
2694
     * @param bool       $use_md5       [optional] <p>Return the unique identifier as md5-hash? Default: true</p>
2695
     *
2696
     * @return string
2697
     */
2698 1
    public static function get_unique_string($extra_entropy = '', bool $use_md5 = true): string
2699
    {
2700
        try {
2701 1
            $rand_int = \random_int(0, \mt_getrandmax());
2702
        } catch (\Exception $e) {
2703
            /** @noinspection RandomApiMigrationInspection */
2704
            $rand_int = \mt_rand(0, \mt_getrandmax());
2705
        }
2706
2707
        $unique_helper = $rand_int .
2708 1
                         \session_id() .
2709 1
                         ($_SERVER['REMOTE_ADDR'] ?? '') .
2710 1
                         ($_SERVER['SERVER_ADDR'] ?? '') .
2711 1
                         $extra_entropy;
2712
2713 1
        $unique_string = \uniqid($unique_helper, true);
2714
2715 1
        if ($use_md5) {
2716 1
            $unique_string = \md5($unique_string . $unique_helper);
2717
        }
2718
2719 1
        return $unique_string;
2720
    }
2721
2722
    /**
2723
     * alias for "UTF8::string_has_bom()"
2724
     *
2725
     * @param string $str
2726
     *
2727
     * @psalm-pure
2728
     *
2729
     * @return bool
2730
     *
2731
     * @see        UTF8::string_has_bom()
2732
     * @deprecated <p>please use "UTF8::string_has_bom()"</p>
2733
     */
2734 2
    public static function hasBom(string $str): bool
2735
    {
2736 2
        return self::string_has_bom($str);
2737
    }
2738
2739
    /**
2740
     * Returns true if the string contains a lower case char, false otherwise.
2741
     *
2742
     * @param string $str <p>The input string.</p>
2743
     *
2744
     * @psalm-pure
2745
     *
2746
     * @return bool
2747
     *              <p>Whether or not the string contains a lower case character.</p>
2748
     */
2749 47
    public static function has_lowercase(string $str): bool
2750
    {
2751 47
        if (self::$SUPPORT['mbstring'] === true) {
2752
            /** @noinspection PhpComposerExtensionStubsInspection */
2753 47
            return \mb_ereg_match('.*[[:lower:]]', $str);
2754
        }
2755
2756
        return self::str_matches_pattern($str, '.*[[:lower:]]');
2757
    }
2758
2759
    /**
2760
     * Returns true if the string contains whitespace, false otherwise.
2761
     *
2762
     * @param string $str <p>The input string.</p>
2763
     *
2764
     * @psalm-pure
2765
     *
2766
     * @return bool
2767
     *              <p>Whether or not the string contains whitespace.</p>
2768
     */
2769 11
    public static function has_whitespace(string $str): bool
2770
    {
2771 11
        if (self::$SUPPORT['mbstring'] === true) {
2772
            /** @noinspection PhpComposerExtensionStubsInspection */
2773 11
            return \mb_ereg_match('.*[[:space:]]', $str);
2774
        }
2775
2776
        return self::str_matches_pattern($str, '.*[[:space:]]');
2777
    }
2778
2779
    /**
2780
     * Returns true if the string contains an upper case char, false otherwise.
2781
     *
2782
     * @param string $str <p>The input string.</p>
2783
     *
2784
     * @psalm-pure
2785
     *
2786
     * @return bool
2787
     *              <p>Whether or not the string contains an upper case character.</p>
2788
     */
2789 12
    public static function has_uppercase(string $str): bool
2790
    {
2791 12
        if (self::$SUPPORT['mbstring'] === true) {
2792
            /** @noinspection PhpComposerExtensionStubsInspection */
2793 12
            return \mb_ereg_match('.*[[:upper:]]', $str);
2794
        }
2795
2796
        return self::str_matches_pattern($str, '.*[[:upper:]]');
2797
    }
2798
2799
    /**
2800
     * Converts a hexadecimal value into a UTF-8 character.
2801
     *
2802
     * INFO: opposite to UTF8::chr_to_hex()
2803
     *
2804
     * EXAMPLE: <code>UTF8::hex_to_chr('U+00a7'); // '§'</code>
2805
     *
2806
     * @param string $hexdec <p>The hexadecimal value.</p>
2807
     *
2808
     * @psalm-pure
2809
     *
2810
     * @return false|string one single UTF-8 character
2811
     */
2812 4
    public static function hex_to_chr(string $hexdec)
2813
    {
2814
        /** @noinspection PhpUsageOfSilenceOperatorInspection - Invalid characters passed for attempted conversion, these have been ignored */
2815 4
        return self::decimal_to_chr((int) @\hexdec($hexdec));
2816
    }
2817
2818
    /**
2819
     * Converts hexadecimal U+xxxx code point representation to integer.
2820
     *
2821
     * INFO: opposite to UTF8::int_to_hex()
2822
     *
2823
     * EXAMPLE: <code>UTF8::hex_to_int('U+00f1'); // 241</code>
2824
     *
2825
     * @param string $hexdec <p>The hexadecimal code point representation.</p>
2826
     *
2827
     * @psalm-pure
2828
     *
2829
     * @return false|int
2830
     *                   <p>The code point, or false on failure.</p>
2831
     */
2832 2
    public static function hex_to_int($hexdec)
2833
    {
2834
        // init
2835 2
        $hexdec = (string) $hexdec;
2836
2837 2
        if ($hexdec === '') {
2838 2
            return false;
2839
        }
2840
2841 2
        if (\preg_match('/^(?:\\\u|U\+|)([a-zA-Z0-9]{4,6})$/', $hexdec, $match)) {
2842 2
            return \intval($match[1], 16);
2843
        }
2844
2845 2
        return false;
2846
    }
2847
2848
    /**
2849
     * alias for "UTF8::html_entity_decode()"
2850
     *
2851
     * @param string   $str
2852
     * @param int|null $flags
2853
     * @param string   $encoding
2854
     *
2855
     * @psalm-pure
2856
     *
2857
     * @return string
2858
     *
2859
     * @see        UTF8::html_entity_decode()
2860
     * @deprecated <p>please use "UTF8::html_entity_decode()"</p>
2861
     */
2862 2
    public static function html_decode(
2863
        string $str,
2864
        int $flags = null,
2865
        string $encoding = 'UTF-8'
2866
    ): string {
2867 2
        return self::html_entity_decode($str, $flags, $encoding);
2868
    }
2869
2870
    /**
2871
     * Converts a UTF-8 string to a series of HTML numbered entities.
2872
     *
2873
     * INFO: opposite to UTF8::html_decode()
2874
     *
2875
     * EXAMPLE: <code>UTF8::html_encode('中文空白'); // '&#20013;&#25991;&#31354;&#30333;'</code>
2876
     *
2877
     * @param string $str              <p>The Unicode string to be encoded as numbered entities.</p>
2878
     * @param bool   $keep_ascii_chars [optional] <p>Keep ASCII chars.</p>
2879
     * @param string $encoding         [optional] <p>Set the charset for e.g. "mb_" function</p>
2880
     *
2881
     * @psalm-pure
2882
     *
2883
     * @return string HTML numbered entities
2884
     */
2885 14
    public static function html_encode(
2886
        string $str,
2887
        bool $keep_ascii_chars = false,
2888
        string $encoding = 'UTF-8'
2889
    ): string {
2890 14
        if ($str === '') {
2891 4
            return '';
2892
        }
2893
2894 14
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2895 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2896
        }
2897
2898
        // INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2899 14
        if (self::$SUPPORT['mbstring'] === true) {
2900 14
            if ($keep_ascii_chars) {
2901 13
                $start_code = 0x80;
2902
            } else {
2903 3
                $start_code = 0x00;
2904
            }
2905
2906 14
            if ($encoding === 'UTF-8') {
2907
                /** @var false|string|null $return - needed for PhpStan (stubs error) */
2908 14
                $return = \mb_encode_numericentity(
0 ignored issues
show
Bug introduced by
The call to mb_encode_numericentity() has too few arguments starting with encoding. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

2908
                $return = /** @scrutinizer ignore-call */ \mb_encode_numericentity(

This check compares calls to functions or methods with their respective definitions. If the call has less arguments than are defined, it raises an issue.

If a function is defined several times with a different number of parameters, the check may pick up the wrong definition and report false positives. One codebase where this has been known to happen is Wordpress. Please note the @ignore annotation hint above.

Loading history...
2909 14
                    $str,
2910 14
                    [$start_code, 0xfffff, 0, 0xfffff]
2911
                );
2912 14
                if ($return !== null && $return !== false) {
2913 14
                    return $return;
2914
                }
2915
            }
2916
2917
            /** @var false|string|null $return - needed for PhpStan (stubs error) */
2918 4
            $return = \mb_encode_numericentity(
2919 4
                $str,
2920 4
                [$start_code, 0xfffff, 0, 0xfffff],
2921 4
                $encoding
2922
            );
2923 4
            if ($return !== null && $return !== false) {
2924 4
                return $return;
2925
            }
2926
        }
2927
2928
        //
2929
        // fallback via vanilla php
2930
        //
2931
2932
        return \implode(
2933
            '',
2934
            \array_map(
2935
                static function (string $chr) use ($keep_ascii_chars, $encoding): string {
2936
                    return self::single_chr_html_encode($chr, $keep_ascii_chars, $encoding);
2937
                },
2938
                self::str_split($str)
2939
            )
2940
        );
2941
    }
2942
2943
    /**
2944
     * UTF-8 version of html_entity_decode()
2945
     *
2946
     * The reason we are not using html_entity_decode() by itself is because
2947
     * while it is not technically correct to leave out the semicolon
2948
     * at the end of an entity most browsers will still interpret the entity
2949
     * correctly. html_entity_decode() does not convert entities without
2950
     * semicolons, so we are left with our own little solution here. Bummer.
2951
     *
2952
     * Convert all HTML entities to their applicable characters.
2953
     *
2954
     * INFO: opposite to UTF8::html_encode()
2955
     *
2956
     * EXAMPLE: <code>UTF8::html_entity_decode('&#20013;&#25991;&#31354;&#30333;'); // '中文空白'</code>
2957
     *
2958
     * @see http://php.net/manual/en/function.html-entity-decode.php
2959
     *
2960
     * @param string   $str      <p>
2961
     *                           The input string.
2962
     *                           </p>
2963
     * @param int|null $flags    [optional] <p>
2964
     *                           A bitmask of one or more of the following flags, which specify how to handle quotes
2965
     *                           and which document type to use. The default is ENT_COMPAT | ENT_HTML401.
2966
     *                           <table>
2967
     *                           Available <i>flags</i> constants
2968
     *                           <tr valign="top">
2969
     *                           <td>Constant Name</td>
2970
     *                           <td>Description</td>
2971
     *                           </tr>
2972
     *                           <tr valign="top">
2973
     *                           <td><b>ENT_COMPAT</b></td>
2974
     *                           <td>Will convert double-quotes and leave single-quotes alone.</td>
2975
     *                           </tr>
2976
     *                           <tr valign="top">
2977
     *                           <td><b>ENT_QUOTES</b></td>
2978
     *                           <td>Will convert both double and single quotes.</td>
2979
     *                           </tr>
2980
     *                           <tr valign="top">
2981
     *                           <td><b>ENT_NOQUOTES</b></td>
2982
     *                           <td>Will leave both double and single quotes unconverted.</td>
2983
     *                           </tr>
2984
     *                           <tr valign="top">
2985
     *                           <td><b>ENT_HTML401</b></td>
2986
     *                           <td>
2987
     *                           Handle code as HTML 4.01.
2988
     *                           </td>
2989
     *                           </tr>
2990
     *                           <tr valign="top">
2991
     *                           <td><b>ENT_XML1</b></td>
2992
     *                           <td>
2993
     *                           Handle code as XML 1.
2994
     *                           </td>
2995
     *                           </tr>
2996
     *                           <tr valign="top">
2997
     *                           <td><b>ENT_XHTML</b></td>
2998
     *                           <td>
2999
     *                           Handle code as XHTML.
3000
     *                           </td>
3001
     *                           </tr>
3002
     *                           <tr valign="top">
3003
     *                           <td><b>ENT_HTML5</b></td>
3004
     *                           <td>
3005
     *                           Handle code as HTML 5.
3006
     *                           </td>
3007
     *                           </tr>
3008
     *                           </table>
3009
     *                           </p>
3010
     * @param string   $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
3011
     *
3012
     * @psalm-pure
3013
     *
3014
     * @return string the decoded string
3015
     */
3016 51
    public static function html_entity_decode(
3017
        string $str,
3018
        int $flags = null,
3019
        string $encoding = 'UTF-8'
3020
    ): string {
3021
        if (
3022 51
            !isset($str[3]) // examples: &; || &x;
3023
            ||
3024 51
            \strpos($str, '&') === false // no "&"
3025
        ) {
3026 24
            return $str;
3027
        }
3028
3029 49
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
3030 9
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
3031
        }
3032
3033 49
        if ($flags === null) {
3034 11
            $flags = \ENT_QUOTES | \ENT_HTML5;
3035
        }
3036
3037
        /** @noinspection InArrayCanBeUsedInspection */
3038
        if (
3039 49
            $encoding !== 'UTF-8'
3040
            &&
3041 49
            $encoding !== 'ISO-8859-1'
3042
            &&
3043 49
            $encoding !== 'WINDOWS-1252'
3044
            &&
3045 49
            self::$SUPPORT['mbstring'] === false
3046
        ) {
3047
            /**
3048
             * @psalm-suppress ImpureFunctionCall - is is only a warning
3049
             */
3050
            \trigger_error('UTF8::html_entity_decode() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
3051
        }
3052
3053
        do {
3054 49
            $str_compare = $str;
3055
3056 49
            if (\strpos($str, '&') !== false) {
3057 49
                if (\strpos($str, '&#') !== false) {
3058
                    // decode also numeric & UTF16 two byte entities
3059 41
                    $str = (string) \preg_replace(
3060 41
                        '/(&#(?:x0*[0-9a-fA-F]{2,6}(?![0-9a-fA-F;])|(?:0*\d{2,6}(?![0-9;]))))/S',
3061 41
                        '$1;',
3062 41
                        $str
3063
                    );
3064
                }
3065
3066 49
                $str = \html_entity_decode(
3067 49
                    $str,
3068 49
                    $flags,
3069 49
                    $encoding
3070
                );
3071
            }
3072 49
        } while ($str_compare !== $str);
3073
3074 49
        return $str;
3075
    }
3076
3077
    /**
3078
     * Create a escape html version of the string via "UTF8::htmlspecialchars()".
3079
     *
3080
     * @param string $str
3081
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
3082
     *
3083
     * @psalm-pure
3084
     *
3085
     * @return string
3086
     */
3087 6
    public static function html_escape(string $str, string $encoding = 'UTF-8'): string
3088
    {
3089 6
        return self::htmlspecialchars(
3090 6
            $str,
3091 6
            \ENT_QUOTES | \ENT_SUBSTITUTE,
3092 6
            $encoding
3093
        );
3094
    }
3095
3096
    /**
3097
     * Remove empty html-tag.
3098
     *
3099
     * e.g.: <pre><tag></tag></pre>
3100
     *
3101
     * @param string $str
3102
     *
3103
     * @psalm-pure
3104
     *
3105
     * @return string
3106
     */
3107 1
    public static function html_stripe_empty_tags(string $str): string
3108
    {
3109 1
        return (string) \preg_replace(
3110 1
            '/<[^\\/>]*?>\\s*?<\\/[^>]*?>/u',
3111 1
            '',
3112 1
            $str
3113
        );
3114
    }
3115
3116
    /**
3117
     * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities().
3118
     *
3119
     * EXAMPLE: <code>UTF8::htmlentities('<白-öäü>'); // '&lt;&#30333;-&ouml;&auml;&uuml;&gt;'</code>
3120
     *
3121
     * @see http://php.net/manual/en/function.htmlentities.php
3122
     *
3123
     * @param string $str           <p>
3124
     *                              The input string.
3125
     *                              </p>
3126
     * @param int    $flags         [optional] <p>
3127
     *                              A bitmask of one or more of the following flags, which specify how to handle
3128
     *                              quotes, invalid code unit sequences and the used document type. The default is
3129
     *                              ENT_COMPAT | ENT_HTML401.
3130
     *                              <table>
3131
     *                              Available <i>flags</i> constants
3132
     *                              <tr valign="top">
3133
     *                              <td>Constant Name</td>
3134
     *                              <td>Description</td>
3135
     *                              </tr>
3136
     *                              <tr valign="top">
3137
     *                              <td><b>ENT_COMPAT</b></td>
3138
     *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
3139
     *                              </tr>
3140
     *                              <tr valign="top">
3141
     *                              <td><b>ENT_QUOTES</b></td>
3142
     *                              <td>Will convert both double and single quotes.</td>
3143
     *                              </tr>
3144
     *                              <tr valign="top">
3145
     *                              <td><b>ENT_NOQUOTES</b></td>
3146
     *                              <td>Will leave both double and single quotes unconverted.</td>
3147
     *                              </tr>
3148
     *                              <tr valign="top">
3149
     *                              <td><b>ENT_IGNORE</b></td>
3150
     *                              <td>
3151
     *                              Silently discard invalid code unit sequences instead of returning
3152
     *                              an empty string. Using this flag is discouraged as it
3153
     *                              may have security implications.
3154
     *                              </td>
3155
     *                              </tr>
3156
     *                              <tr valign="top">
3157
     *                              <td><b>ENT_SUBSTITUTE</b></td>
3158
     *                              <td>
3159
     *                              Replace invalid code unit sequences with a Unicode Replacement Character
3160
     *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty
3161
     *                              string.
3162
     *                              </td>
3163
     *                              </tr>
3164
     *                              <tr valign="top">
3165
     *                              <td><b>ENT_DISALLOWED</b></td>
3166
     *                              <td>
3167
     *                              Replace invalid code points for the given document type with a
3168
     *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
3169
     *                              (otherwise) instead of leaving them as is. This may be useful, for
3170
     *                              instance, to ensure the well-formedness of XML documents with
3171
     *                              embedded external content.
3172
     *                              </td>
3173
     *                              </tr>
3174
     *                              <tr valign="top">
3175
     *                              <td><b>ENT_HTML401</b></td>
3176
     *                              <td>
3177
     *                              Handle code as HTML 4.01.
3178
     *                              </td>
3179
     *                              </tr>
3180
     *                              <tr valign="top">
3181
     *                              <td><b>ENT_XML1</b></td>
3182
     *                              <td>
3183
     *                              Handle code as XML 1.
3184
     *                              </td>
3185
     *                              </tr>
3186
     *                              <tr valign="top">
3187
     *                              <td><b>ENT_XHTML</b></td>
3188
     *                              <td>
3189
     *                              Handle code as XHTML.
3190
     *                              </td>
3191
     *                              </tr>
3192
     *                              <tr valign="top">
3193
     *                              <td><b>ENT_HTML5</b></td>
3194
     *                              <td>
3195
     *                              Handle code as HTML 5.
3196
     *                              </td>
3197
     *                              </tr>
3198
     *                              </table>
3199
     *                              </p>
3200
     * @param string $encoding      [optional] <p>
3201
     *                              Like <b>htmlspecialchars</b>,
3202
     *                              <b>htmlentities</b> takes an optional third argument
3203
     *                              <i>encoding</i> which defines encoding used in
3204
     *                              conversion.
3205
     *                              Although this argument is technically optional, you are highly
3206
     *                              encouraged to specify the correct value for your code.
3207
     *                              </p>
3208
     * @param bool   $double_encode [optional] <p>
3209
     *                              When <i>double_encode</i> is turned off PHP will not
3210
     *                              encode existing html entities. The default is to convert everything.
3211
     *                              </p>
3212
     *
3213
     * @psalm-pure
3214
     *
3215
     * @return string
3216
     *                <p>
3217
     *                The encoded string.
3218
     *                <br><br>
3219
     *                If the input <i>string</i> contains an invalid code unit
3220
     *                sequence within the given <i>encoding</i> an empty string
3221
     *                will be returned, unless either the <b>ENT_IGNORE</b> or
3222
     *                <b>ENT_SUBSTITUTE</b> flags are set.
3223
     *                </p>
3224
     */
3225 9
    public static function htmlentities(
3226
        string $str,
3227
        int $flags = \ENT_COMPAT,
3228
        string $encoding = 'UTF-8',
3229
        bool $double_encode = true
3230
    ): string {
3231 9
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
3232 7
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
3233
        }
3234
3235 9
        $str = \htmlentities(
3236 9
            $str,
3237 9
            $flags,
3238 9
            $encoding,
3239 9
            $double_encode
3240
        );
3241
3242
        /**
3243
         * PHP doesn't replace a backslash to its html entity since this is something
3244
         * that's mostly used to escape characters when inserting in a database. Since
3245
         * we're using a decent database layer, we don't need this shit and we're replacing
3246
         * the double backslashes by its' html entity equivalent.
3247
         *
3248
         * https://github.com/forkcms/library/blob/master/spoon/filter/filter.php#L303
3249
         */
3250 9
        $str = \str_replace('\\', '&#92;', $str);
3251
3252 9
        return self::html_encode($str, true, $encoding);
3253
    }
3254
3255
    /**
3256
     * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
3257
     *
3258
     * INFO: Take a look at "UTF8::htmlentities()"
3259
     *
3260
     * EXAMPLE: <code>UTF8::htmlspecialchars('<白-öäü>'); // '&lt;白-öäü&gt;'</code>
3261
     *
3262
     * @see http://php.net/manual/en/function.htmlspecialchars.php
3263
     *
3264
     * @param string $str           <p>
3265
     *                              The string being converted.
3266
     *                              </p>
3267
     * @param int    $flags         [optional] <p>
3268
     *                              A bitmask of one or more of the following flags, which specify how to handle
3269
     *                              quotes, invalid code unit sequences and the used document type. The default is
3270
     *                              ENT_COMPAT | ENT_HTML401.
3271
     *                              <table>
3272
     *                              Available <i>flags</i> constants
3273
     *                              <tr valign="top">
3274
     *                              <td>Constant Name</td>
3275
     *                              <td>Description</td>
3276
     *                              </tr>
3277
     *                              <tr valign="top">
3278
     *                              <td><b>ENT_COMPAT</b></td>
3279
     *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
3280
     *                              </tr>
3281
     *                              <tr valign="top">
3282
     *                              <td><b>ENT_QUOTES</b></td>
3283
     *                              <td>Will convert both double and single quotes.</td>
3284
     *                              </tr>
3285
     *                              <tr valign="top">
3286
     *                              <td><b>ENT_NOQUOTES</b></td>
3287
     *                              <td>Will leave both double and single quotes unconverted.</td>
3288
     *                              </tr>
3289
     *                              <tr valign="top">
3290
     *                              <td><b>ENT_IGNORE</b></td>
3291
     *                              <td>
3292
     *                              Silently discard invalid code unit sequences instead of returning
3293
     *                              an empty string. Using this flag is discouraged as it
3294
     *                              may have security implications.
3295
     *                              </td>
3296
     *                              </tr>
3297
     *                              <tr valign="top">
3298
     *                              <td><b>ENT_SUBSTITUTE</b></td>
3299
     *                              <td>
3300
     *                              Replace invalid code unit sequences with a Unicode Replacement Character
3301
     *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty
3302
     *                              string.
3303
     *                              </td>
3304
     *                              </tr>
3305
     *                              <tr valign="top">
3306
     *                              <td><b>ENT_DISALLOWED</b></td>
3307
     *                              <td>
3308
     *                              Replace invalid code points for the given document type with a
3309
     *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
3310
     *                              (otherwise) instead of leaving them as is. This may be useful, for
3311
     *                              instance, to ensure the well-formedness of XML documents with
3312
     *                              embedded external content.
3313
     *                              </td>
3314
     *                              </tr>
3315
     *                              <tr valign="top">
3316
     *                              <td><b>ENT_HTML401</b></td>
3317
     *                              <td>
3318
     *                              Handle code as HTML 4.01.
3319
     *                              </td>
3320
     *                              </tr>
3321
     *                              <tr valign="top">
3322
     *                              <td><b>ENT_XML1</b></td>
3323
     *                              <td>
3324
     *                              Handle code as XML 1.
3325
     *                              </td>
3326
     *                              </tr>
3327
     *                              <tr valign="top">
3328
     *                              <td><b>ENT_XHTML</b></td>
3329
     *                              <td>
3330
     *                              Handle code as XHTML.
3331
     *                              </td>
3332
     *                              </tr>
3333
     *                              <tr valign="top">
3334
     *                              <td><b>ENT_HTML5</b></td>
3335
     *                              <td>
3336
     *                              Handle code as HTML 5.
3337
     *                              </td>
3338
     *                              </tr>
3339
     *                              </table>
3340
     *                              </p>
3341
     * @param string $encoding      [optional] <p>
3342
     *                              Defines encoding used in conversion.
3343
     *                              </p>
3344
     *                              <p>
3345
     *                              For the purposes of this function, the encodings
3346
     *                              ISO-8859-1, ISO-8859-15,
3347
     *                              UTF-8, cp866,
3348
     *                              cp1251, cp1252, and
3349
     *                              KOI8-R are effectively equivalent, provided the
3350
     *                              <i>string</i> itself is valid for the encoding, as
3351
     *                              the characters affected by <b>htmlspecialchars</b> occupy
3352
     *                              the same positions in all of these encodings.
3353
     *                              </p>
3354
     * @param bool   $double_encode [optional] <p>
3355
     *                              When <i>double_encode</i> is turned off PHP will not
3356
     *                              encode existing html entities, the default is to convert everything.
3357
     *                              </p>
3358
     *
3359
     * @psalm-pure
3360
     *
3361
     * @return string the converted string.
3362
     *                </p>
3363
     *                <p>
3364
     *                If the input <i>string</i> contains an invalid code unit
3365
     *                sequence within the given <i>encoding</i> an empty string
3366
     *                will be returned, unless either the <b>ENT_IGNORE</b> or
3367
     *                <b>ENT_SUBSTITUTE</b> flags are set
3368
     */
3369 8
    public static function htmlspecialchars(
3370
        string $str,
3371
        int $flags = \ENT_COMPAT,
3372
        string $encoding = 'UTF-8',
3373
        bool $double_encode = true
3374
    ): string {
3375 8
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
3376 8
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
3377
        }
3378
3379 8
        return \htmlspecialchars(
3380 8
            $str,
3381 8
            $flags,
3382 8
            $encoding,
3383 8
            $double_encode
3384
        );
3385
    }
3386
3387
    /**
3388
     * Checks whether iconv is available on the server.
3389
     *
3390
     * @psalm-pure
3391
     *
3392
     * @return bool
3393
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
3394
     *
3395
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
3396
     */
3397
    public static function iconv_loaded(): bool
3398
    {
3399
        return \extension_loaded('iconv');
3400
    }
3401
3402
    /**
3403
     * alias for "UTF8::decimal_to_chr()"
3404
     *
3405
     * @param int|string $int
3406
     *
3407
     * @phpstan-param int|numeric-string $int
3408
     *
3409
     * @psalm-pure
3410
     *
3411
     * @return string
3412
     *
3413
     * @see        UTF8::decimal_to_chr()
3414
     * @deprecated <p>please use "UTF8::decimal_to_chr()"</p>
3415
     */
3416 4
    public static function int_to_chr($int): string
3417
    {
3418 4
        return self::decimal_to_chr($int);
3419
    }
3420
3421
    /**
3422
     * Converts Integer to hexadecimal U+xxxx code point representation.
3423
     *
3424
     * INFO: opposite to UTF8::hex_to_int()
3425
     *
3426
     * EXAMPLE: <code>UTF8::int_to_hex(241); // 'U+00f1'</code>
3427
     *
3428
     * @param int    $int    <p>The integer to be converted to hexadecimal code point.</p>
3429
     * @param string $prefix [optional]
3430
     *
3431
     * @psalm-pure
3432
     *
3433
     * @return string the code point, or empty string on failure
3434
     */
3435 6
    public static function int_to_hex(int $int, string $prefix = 'U+'): string
3436
    {
3437 6
        $hex = \dechex($int);
3438
3439 6
        $hex = (\strlen($hex) < 4 ? \substr('0000' . $hex, -4) : $hex);
3440
3441 6
        return $prefix . $hex . '';
3442
    }
3443
3444
    /**
3445
     * Checks whether intl-char is available on the server.
3446
     *
3447
     * @psalm-pure
3448
     *
3449
     * @return bool
3450
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
3451
     *
3452
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
3453
     */
3454
    public static function intlChar_loaded(): bool
3455
    {
3456
        return \class_exists('IntlChar');
3457
    }
3458
3459
    /**
3460
     * Checks whether intl is available on the server.
3461
     *
3462
     * @psalm-pure
3463
     *
3464
     * @return bool
3465
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
3466
     *
3467
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
3468
     */
3469 5
    public static function intl_loaded(): bool
3470
    {
3471 5
        return \extension_loaded('intl');
3472
    }
3473
3474
    /**
3475
     * alias for "UTF8::is_ascii()"
3476
     *
3477
     * @param string $str
3478
     *
3479
     * @psalm-pure
3480
     *
3481
     * @return bool
3482
     *
3483
     * @see        UTF8::is_ascii()
3484
     * @deprecated <p>please use "UTF8::is_ascii()"</p>
3485
     */
3486 2
    public static function isAscii(string $str): bool
3487
    {
3488 2
        return ASCII::is_ascii($str);
3489
    }
3490
3491
    /**
3492
     * alias for "UTF8::is_base64()"
3493
     *
3494
     * @param string $str
3495
     *
3496
     * @psalm-pure
3497
     *
3498
     * @return bool
3499
     *
3500
     * @see        UTF8::is_base64()
3501
     * @deprecated <p>please use "UTF8::is_base64()"</p>
3502
     */
3503 2
    public static function isBase64($str): bool
3504
    {
3505 2
        return self::is_base64($str);
3506
    }
3507
3508
    /**
3509
     * alias for "UTF8::is_binary()"
3510
     *
3511
     * @param int|string $str
3512
     * @param bool       $strict
3513
     *
3514
     * @psalm-pure
3515
     *
3516
     * @return bool
3517
     *
3518
     * @see        UTF8::is_binary()
3519
     * @deprecated <p>please use "UTF8::is_binary()"</p>
3520
     */
3521 4
    public static function isBinary($str, bool $strict = false): bool
3522
    {
3523 4
        return self::is_binary($str, $strict);
3524
    }
3525
3526
    /**
3527
     * alias for "UTF8::is_bom()"
3528
     *
3529
     * @param string $utf8_chr
3530
     *
3531
     * @psalm-pure
3532
     *
3533
     * @return bool
3534
     *
3535
     * @see        UTF8::is_bom()
3536
     * @deprecated <p>please use "UTF8::is_bom()"</p>
3537
     */
3538 2
    public static function isBom(string $utf8_chr): bool
3539
    {
3540 2
        return self::is_bom($utf8_chr);
3541
    }
3542
3543
    /**
3544
     * alias for "UTF8::is_html()"
3545
     *
3546
     * @param string $str
3547
     *
3548
     * @psalm-pure
3549
     *
3550
     * @return bool
3551
     *
3552
     * @see        UTF8::is_html()
3553
     * @deprecated <p>please use "UTF8::is_html()"</p>
3554
     */
3555 2
    public static function isHtml(string $str): bool
3556
    {
3557 2
        return self::is_html($str);
3558
    }
3559
3560
    /**
3561
     * alias for "UTF8::is_json()"
3562
     *
3563
     * @param string $str
3564
     *
3565
     * @return bool
3566
     *
3567
     * @see        UTF8::is_json()
3568
     * @deprecated <p>please use "UTF8::is_json()"</p>
3569
     */
3570 1
    public static function isJson(string $str): bool
3571
    {
3572 1
        return self::is_json($str);
3573
    }
3574
3575
    /**
3576
     * alias for "UTF8::is_utf16()"
3577
     *
3578
     * @param string $str
3579
     *
3580
     * @psalm-pure
3581
     *
3582
     * @return false|int
3583
     *                   <strong>false</strong> if is't not UTF16,<br>
3584
     *                   <strong>1</strong> for UTF-16LE,<br>
3585
     *                   <strong>2</strong> for UTF-16BE
3586
     *
3587
     * @see        UTF8::is_utf16()
3588
     * @deprecated <p>please use "UTF8::is_utf16()"</p>
3589
     */
3590 2
    public static function isUtf16($str)
3591
    {
3592 2
        return self::is_utf16($str);
3593
    }
3594
3595
    /**
3596
     * alias for "UTF8::is_utf32()"
3597
     *
3598
     * @param string $str
3599
     *
3600
     * @psalm-pure
3601
     *
3602
     * @return false|int
3603
     *                   <strong>false</strong> if is't not UTF16,
3604
     *                   <strong>1</strong> for UTF-32LE,
3605
     *                   <strong>2</strong> for UTF-32BE
3606
     *
3607
     * @see        UTF8::is_utf32()
3608
     * @deprecated <p>please use "UTF8::is_utf32()"</p>
3609
     */
3610 2
    public static function isUtf32($str)
3611
    {
3612 2
        return self::is_utf32($str);
3613
    }
3614
3615
    /**
3616
     * alias for "UTF8::is_utf8()"
3617
     *
3618
     * @param string $str
3619
     * @param bool   $strict
3620
     *
3621
     * @psalm-pure
3622
     *
3623
     * @return bool
3624
     *
3625
     * @see        UTF8::is_utf8()
3626
     * @deprecated <p>please use "UTF8::is_utf8()"</p>
3627
     */
3628 17
    public static function isUtf8($str, bool $strict = false): bool
3629
    {
3630 17
        return self::is_utf8($str, $strict);
3631
    }
3632
3633
    /**
3634
     * Returns true if the string contains only alphabetic chars, false otherwise.
3635
     *
3636
     * @param string $str <p>The input string.</p>
3637
     *
3638
     * @psalm-pure
3639
     *
3640
     * @return bool
3641
     *              <p>Whether or not $str contains only alphabetic chars.</p>
3642
     */
3643 10
    public static function is_alpha(string $str): bool
3644
    {
3645 10
        if (self::$SUPPORT['mbstring'] === true) {
3646
            /** @noinspection PhpComposerExtensionStubsInspection */
3647 10
            return \mb_ereg_match('^[[:alpha:]]*$', $str);
3648
        }
3649
3650
        return self::str_matches_pattern($str, '^[[:alpha:]]*$');
3651
    }
3652
3653
    /**
3654
     * Returns true if the string contains only alphabetic and numeric chars, false otherwise.
3655
     *
3656
     * @param string $str <p>The input string.</p>
3657
     *
3658
     * @psalm-pure
3659
     *
3660
     * @return bool
3661
     *              <p>Whether or not $str contains only alphanumeric chars.</p>
3662
     */
3663 13
    public static function is_alphanumeric(string $str): bool
3664
    {
3665 13
        if (self::$SUPPORT['mbstring'] === true) {
3666
            /** @noinspection PhpComposerExtensionStubsInspection */
3667 13
            return \mb_ereg_match('^[[:alnum:]]*$', $str);
3668
        }
3669
3670
        return self::str_matches_pattern($str, '^[[:alnum:]]*$');
3671
    }
3672
3673
    /**
3674
     * Returns true if the string contains only punctuation chars, false otherwise.
3675
     *
3676
     * @param string $str <p>The input string.</p>
3677
     *
3678
     * @psalm-pure
3679
     *
3680
     * @return bool
3681
     *              <p>Whether or not $str contains only punctuation chars.</p>
3682
     */
3683 10
    public static function is_punctuation(string $str): bool
3684
    {
3685 10
        return self::str_matches_pattern($str, '^[[:punct:]]*$');
3686
    }
3687
3688
    /**
3689
     * Returns true if the string contains only printable (non-invisible) chars, false otherwise.
3690
     *
3691
     * @param string $str                       <p>The input string.</p>
3692
     * @param bool   $ignore_control_characters [optional] <p>Ignore control characters like [LRM] or [LSEP].</p>
3693
     *
3694
     * @psalm-pure
3695
     *
3696
     * @return bool
3697
     *              <p>Whether or not $str contains only printable (non-invisible) chars.</p>
3698
     */
3699 1
    public static function is_printable(string $str, bool $ignore_control_characters = false): bool
3700
    {
3701 1
        return self::remove_invisible_characters($str, false, '', $ignore_control_characters) === $str;
3702
    }
3703
3704
    /**
3705
     * Checks if a string is 7 bit ASCII.
3706
     *
3707
     * EXAMPLE: <code>UTF8::is_ascii('白'); // false</code>
3708
     *
3709
     * @param string $str <p>The string to check.</p>
3710
     *
3711
     * @psalm-pure
3712
     *
3713
     * @return bool
3714
     *              <p>
3715
     *              <strong>true</strong> if it is ASCII<br>
3716
     *              <strong>false</strong> otherwise
3717
     *              </p>
3718
     */
3719 8
    public static function is_ascii(string $str): bool
3720
    {
3721 8
        return ASCII::is_ascii($str);
3722
    }
3723
3724
    /**
3725
     * Returns true if the string is base64 encoded, false otherwise.
3726
     *
3727
     * EXAMPLE: <code>UTF8::is_base64('4KSu4KWL4KSo4KS/4KSa'); // true</code>
3728
     *
3729
     * @param string|null $str                   <p>The input string.</p>
3730
     * @param bool        $empty_string_is_valid [optional] <p>Is an empty string valid base64 or not?</p>
3731
     *
3732
     * @psalm-pure
3733
     *
3734
     * @return bool
3735
     *              <p>Whether or not $str is base64 encoded.</p>
3736
     */
3737 16
    public static function is_base64($str, bool $empty_string_is_valid = false): bool
3738
    {
3739
        if (
3740 16
            !$empty_string_is_valid
3741
            &&
3742 16
            $str === ''
3743
        ) {
3744 3
            return false;
3745
        }
3746
3747 15
        if (!\is_string($str)) {
3748 2
            return false;
3749
        }
3750
3751 15
        $base64String = \base64_decode($str, true);
3752
3753 15
        return $base64String !== false && \base64_encode($base64String) === $str;
3754
    }
3755
3756
    /**
3757
     * Check if the input is binary... (is look like a hack).
3758
     *
3759
     * EXAMPLE: <code>UTF8::is_binary(01); // true</code>
3760
     *
3761
     * @param int|string $input
3762
     * @param bool       $strict
3763
     *
3764
     * @psalm-pure
3765
     *
3766
     * @return bool
3767
     */
3768 40
    public static function is_binary($input, bool $strict = false): bool
3769
    {
3770 40
        $input = (string) $input;
3771 40
        if ($input === '') {
3772 10
            return false;
3773
        }
3774
3775 40
        if (\preg_match('~^[01]+$~', $input)) {
3776 13
            return true;
3777
        }
3778
3779 40
        $ext = self::get_file_type($input);
3780 40
        if ($ext['type'] === 'binary') {
3781 7
            return true;
3782
        }
3783
3784 39
        $test_length = \strlen($input);
3785 39
        $test_null_counting = \substr_count($input, "\x0", 0, $test_length);
3786 39
        if (($test_null_counting / $test_length) > 0.25) {
3787 15
            return true;
3788
        }
3789
3790 35
        if ($strict) {
3791 35
            if (self::$SUPPORT['finfo'] === false) {
3792
                throw new \RuntimeException('ext-fileinfo: is not installed');
3793
            }
3794
3795
            /**
3796
             * @noinspection   PhpComposerExtensionStubsInspection
3797
             * @psalm-suppress ImpureMethodCall - it will return the same result for the same file ...
3798
             */
3799 35
            $finfo_encoding = (new \finfo(\FILEINFO_MIME_ENCODING))->buffer($input);
3800 35
            if ($finfo_encoding && $finfo_encoding === 'binary') {
3801 15
                return true;
3802
            }
3803
        }
3804
3805 31
        return false;
3806
    }
3807
3808
    /**
3809
     * Check if the file is binary.
3810
     *
3811
     * EXAMPLE: <code>UTF8::is_binary('./utf32.txt'); // true</code>
3812
     *
3813
     * @param string $file
3814
     *
3815
     * @return bool
3816
     */
3817 6
    public static function is_binary_file($file): bool
3818
    {
3819
        // init
3820 6
        $block = '';
3821
3822 6
        $fp = \fopen($file, 'rb');
3823 6
        if (\is_resource($fp)) {
3824 6
            $block = \fread($fp, 512);
3825 6
            \fclose($fp);
3826
        }
3827
3828 6
        if ($block === '' || $block === false) {
3829 2
            return false;
3830
        }
3831
3832 6
        return self::is_binary($block, true);
3833
    }
3834
3835
    /**
3836
     * Returns true if the string contains only whitespace chars, false otherwise.
3837
     *
3838
     * @param string $str <p>The input string.</p>
3839
     *
3840
     * @psalm-pure
3841
     *
3842
     * @return bool
3843
     *              <p>Whether or not $str contains only whitespace characters.</p>
3844
     */
3845 15
    public static function is_blank(string $str): bool
3846
    {
3847 15
        if (self::$SUPPORT['mbstring'] === true) {
3848
            /** @noinspection PhpComposerExtensionStubsInspection */
3849 15
            return \mb_ereg_match('^[[:space:]]*$', $str);
3850
        }
3851
3852
        return self::str_matches_pattern($str, '^[[:space:]]*$');
3853
    }
3854
3855
    /**
3856
     * Checks if the given string is equal to any "Byte Order Mark".
3857
     *
3858
     * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
3859
     *
3860
     * EXAMPLE: <code>UTF8::is_bom("\xef\xbb\xbf"); // true</code>
3861
     *
3862
     * @param string $str <p>The input string.</p>
3863
     *
3864
     * @psalm-pure
3865
     *
3866
     * @return bool
3867
     *              <p><strong>true</strong> if the $utf8_chr is Byte Order Mark, <strong>false</strong> otherwise.</p>
3868
     */
3869 2
    public static function is_bom($str): bool
3870
    {
3871
        /** @noinspection PhpUnusedLocalVariableInspection */
3872 2
        foreach (self::$BOM as $bom_string => &$bom_byte_length) {
3873 2
            if ($str === $bom_string) {
3874 2
                return true;
3875
            }
3876
        }
3877
3878 2
        return false;
3879
    }
3880
3881
    /**
3882
     * Determine whether the string is considered to be empty.
3883
     *
3884
     * A variable is considered empty if it does not exist or if its value equals FALSE.
3885
     * empty() does not generate a warning if the variable does not exist.
3886
     *
3887
     * @param array|float|int|string $str
3888
     *
3889
     * @psalm-pure
3890
     *
3891
     * @return bool
3892
     *              <p>Whether or not $str is empty().</p>
3893
     */
3894 1
    public static function is_empty($str): bool
3895
    {
3896 1
        return empty($str);
3897
    }
3898
3899
    /**
3900
     * Returns true if the string contains only hexadecimal chars, false otherwise.
3901
     *
3902
     * @param string $str <p>The input string.</p>
3903
     *
3904
     * @psalm-pure
3905
     *
3906
     * @return bool
3907
     *              <p>Whether or not $str contains only hexadecimal chars.</p>
3908
     */
3909 13
    public static function is_hexadecimal(string $str): bool
3910
    {
3911 13
        if (self::$SUPPORT['mbstring'] === true) {
3912
            /** @noinspection PhpComposerExtensionStubsInspection */
3913 13
            return \mb_ereg_match('^[[:xdigit:]]*$', $str);
3914
        }
3915
3916
        return self::str_matches_pattern($str, '^[[:xdigit:]]*$');
3917
    }
3918
3919
    /**
3920
     * Check if the string contains any HTML tags.
3921
     *
3922
     * EXAMPLE: <code>UTF8::is_html('<b>lall</b>'); // true</code>
3923
     *
3924
     * @param string $str <p>The input string.</p>
3925
     *
3926
     * @psalm-pure
3927
     *
3928
     * @return bool
3929
     *              <p>Whether or not $str contains html elements.</p>
3930
     */
3931 3
    public static function is_html(string $str): bool
3932
    {
3933 3
        if ($str === '') {
3934 3
            return false;
3935
        }
3936
3937
        // init
3938 3
        $matches = [];
3939
3940 3
        $str = self::emoji_encode($str); // hack for emoji support :/
3941
3942 3
        \preg_match("/<\\/?\\w+(?:(?:\\s+\\w+(?:\\s*=\\s*(?:\".*?\"|'.*?'|[^'\">\\s]+))?)*\\s*|\\s*)\\/?>/u", $str, $matches);
3943
3944 3
        return $matches !== [];
3945
    }
3946
3947
    /**
3948
     * Check if $url is an correct url.
3949
     *
3950
     * @param string $url
3951
     * @param bool   $disallow_localhost
3952
     *
3953
     * @psalm-pure
3954
     *
3955
     * @return bool
3956
     */
3957 1
    public static function is_url(string $url, bool $disallow_localhost = false): bool
3958
    {
3959 1
        if ($url === '') {
3960 1
            return false;
3961
        }
3962
3963
        // WARNING: keep this as hack protection
3964 1
        if (!self::str_istarts_with_any($url, ['http://', 'https://'])) {
3965 1
            return false;
3966
        }
3967
3968
        // e.g. -> the server itself connect to "https://foo.localhost/phpmyadmin/...
3969 1
        if ($disallow_localhost) {
3970 1
            if (self::str_istarts_with_any(
3971 1
                $url,
3972
                [
3973 1
                    'http://localhost',
3974
                    'https://localhost',
3975
                    'http://127.0.0.1',
3976
                    'https://127.0.0.1',
3977
                    'http://::1',
3978
                    'https://::1',
3979
                ]
3980
            )) {
3981 1
                return false;
3982
            }
3983
3984 1
            $regex = '/^(?:http(?:s)?:\/\/).*?(?:\.localhost)/iu';
3985
            /** @noinspection BypassedUrlValidationInspection */
3986 1
            if (\preg_match($regex, $url)) {
3987 1
                return false;
3988
            }
3989
        }
3990
3991
        // INFO: this is needed for e.g. "http://müller.de/" (internationalized domain names) and non ASCII-parameters
3992
        /** @noinspection SuspiciousAssignmentsInspection - false-positive - https://github.com/kalessil/phpinspectionsea/issues/1500 */
3993 1
        $regex = '/^(?:http(?:s)?:\\/\\/)(?:[\p{L}0-9][\p{L}0-9_-]*(?:\\.[\p{L}0-9][\p{L}0-9_-]*))(?:\\d+)?(?:\\/\\.*)?/iu';
3994
        /** @noinspection BypassedUrlValidationInspection */
3995 1
        if (\preg_match($regex, $url)) {
3996 1
            return true;
3997
        }
3998
3999
        /** @noinspection BypassedUrlValidationInspection */
4000 1
        return \filter_var($url, \FILTER_VALIDATE_URL) !== false;
4001
    }
4002
4003
    /**
4004
     * Try to check if "$str" is a JSON-string.
4005
     *
4006
     * EXAMPLE: <code>UTF8::is_json('{"array":[1,"¥","ä"]}'); // true</code>
4007
     *
4008
     * @param string $str                                    <p>The input string.</p>
4009
     * @param bool   $only_array_or_object_results_are_valid [optional] <p>Only array and objects are valid json
4010
     *                                                       results.</p>
4011
     *
4012
     * @return bool
4013
     *              <p>Whether or not the $str is in JSON format.</p>
4014
     */
4015 42
    public static function is_json(string $str, bool $only_array_or_object_results_are_valid = true): bool
4016
    {
4017 42
        if ($str === '') {
4018 4
            return false;
4019
        }
4020
4021 40
        if (self::$SUPPORT['json'] === false) {
4022
            throw new \RuntimeException('ext-json: is not installed');
4023
        }
4024
4025 40
        $jsonOrNull = self::json_decode($str);
4026 40
        if ($jsonOrNull === null && \strtoupper($str) !== 'NULL') {
4027 18
            return false;
4028
        }
4029
4030
        if (
4031 24
            $only_array_or_object_results_are_valid
4032
            &&
4033 24
            !\is_object($jsonOrNull)
4034
            &&
4035 24
            !\is_array($jsonOrNull)
4036
        ) {
4037 5
            return false;
4038
        }
4039
4040
        /** @noinspection PhpComposerExtensionStubsInspection */
4041 19
        return \json_last_error() === \JSON_ERROR_NONE;
4042
    }
4043
4044
    /**
4045
     * @param string $str <p>The input string.</p>
4046
     *
4047
     * @psalm-pure
4048
     *
4049
     * @return bool
4050
     *              <p>Whether or not $str contains only lowercase chars.</p>
4051
     */
4052 8
    public static function is_lowercase(string $str): bool
4053
    {
4054 8
        if (self::$SUPPORT['mbstring'] === true) {
4055
            /** @noinspection PhpComposerExtensionStubsInspection */
4056 8
            return \mb_ereg_match('^[[:lower:]]*$', $str);
4057
        }
4058
4059
        return self::str_matches_pattern($str, '^[[:lower:]]*$');
4060
    }
4061
4062
    /**
4063
     * Returns true if the string is serialized, false otherwise.
4064
     *
4065
     * @param string $str <p>The input string.</p>
4066
     *
4067
     * @psalm-pure
4068
     *
4069
     * @return bool
4070
     *              <p>Whether or not $str is serialized.</p>
4071
     */
4072 7
    public static function is_serialized(string $str): bool
4073
    {
4074 7
        if ($str === '') {
4075 1
            return false;
4076
        }
4077
4078
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
4079
        /** @noinspection UnserializeExploitsInspection */
4080 6
        return $str === 'b:0;'
4081
               ||
4082 6
               @\unserialize($str) !== false;
4083
    }
4084
4085
    /**
4086
     * Returns true if the string contains only lower case chars, false
4087
     * otherwise.
4088
     *
4089
     * @param string $str <p>The input string.</p>
4090
     *
4091
     * @psalm-pure
4092
     *
4093
     * @return bool
4094
     *              <p>Whether or not $str contains only lower case characters.</p>
4095
     */
4096 8
    public static function is_uppercase(string $str): bool
4097
    {
4098 8
        if (self::$SUPPORT['mbstring'] === true) {
4099
            /** @noinspection PhpComposerExtensionStubsInspection */
4100 8
            return \mb_ereg_match('^[[:upper:]]*$', $str);
4101
        }
4102
4103
        return self::str_matches_pattern($str, '^[[:upper:]]*$');
4104
    }
4105
4106
    /**
4107
     * Check if the string is UTF-16.
4108
     *
4109
     * EXAMPLE: <code>
4110
     * UTF8::is_utf16(file_get_contents('utf-16-le.txt')); // 1
4111
     * //
4112
     * UTF8::is_utf16(file_get_contents('utf-16-be.txt')); // 2
4113
     * //
4114
     * UTF8::is_utf16(file_get_contents('utf-8.txt')); // false
4115
     * </code>
4116
     *
4117
     * @param string $str                       <p>The input string.</p>
4118
     * @param bool   $check_if_string_is_binary
4119
     *
4120
     * @psalm-pure
4121
     *
4122
     * @return false|int
4123
     *                   <strong>false</strong> if is't not UTF-16,<br>
4124
     *                   <strong>1</strong> for UTF-16LE,<br>
4125
     *                   <strong>2</strong> for UTF-16BE
4126
     */
4127 22
    public static function is_utf16($str, bool $check_if_string_is_binary = true)
4128
    {
4129
        // init
4130 22
        $str = (string) $str;
4131 22
        $str_chars = [];
4132
4133
        if (
4134 22
            $check_if_string_is_binary
4135
            &&
4136 22
            !self::is_binary($str, true)
4137
        ) {
4138 2
            return false;
4139
        }
4140
4141 22
        if (self::$SUPPORT['mbstring'] === false) {
4142
            /**
4143
             * @psalm-suppress ImpureFunctionCall - is is only a warning
4144
             */
4145 3
            \trigger_error('UTF8::is_utf16() without mbstring may did not work correctly', \E_USER_WARNING);
4146
        }
4147
4148 22
        $str = self::remove_bom($str);
4149
4150 22
        $maybe_utf16le = 0;
4151 22
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
4152 22
        if ($test) {
4153 15
            $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
4154 15
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
4155 15
            if ($test3 === $test) {
4156
                /**
4157
                 * @psalm-suppress RedundantCondition
4158
                 */
4159 15
                if ($str_chars === []) {
4160 15
                    $str_chars = self::count_chars($str, true, false);
4161
                }
4162 15
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
Bug introduced by
It seems like $test3 can also be of type array; however, parameter $str of voku\helper\UTF8::count_chars() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

4162
                foreach (self::count_chars(/** @scrutinizer ignore-type */ $test3) as $test3char => &$test3charEmpty) {
Loading history...
4163 15
                    if (\in_array($test3char, $str_chars, true)) {
4164 15
                        ++$maybe_utf16le;
4165
                    }
4166
                }
4167 15
                unset($test3charEmpty);
4168
            }
4169
        }
4170
4171 22
        $maybe_utf16be = 0;
4172 22
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
4173 22
        if ($test) {
4174 15
            $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
4175 15
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
4176 15
            if ($test3 === $test) {
4177 15
                if ($str_chars === []) {
4178 7
                    $str_chars = self::count_chars($str, true, false);
4179
                }
4180 15
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
4181 15
                    if (\in_array($test3char, $str_chars, true)) {
4182 15
                        ++$maybe_utf16be;
4183
                    }
4184
                }
4185 15
                unset($test3charEmpty);
4186
            }
4187
        }
4188
4189 22
        if ($maybe_utf16be !== $maybe_utf16le) {
4190 7
            if ($maybe_utf16le > $maybe_utf16be) {
4191 5
                return 1;
4192
            }
4193
4194 6
            return 2;
4195
        }
4196
4197 18
        return false;
4198
    }
4199
4200
    /**
4201
     * Check if the string is UTF-32.
4202
     *
4203
     * EXAMPLE: <code>
4204
     * UTF8::is_utf32(file_get_contents('utf-32-le.txt')); // 1
4205
     * //
4206
     * UTF8::is_utf32(file_get_contents('utf-32-be.txt')); // 2
4207
     * //
4208
     * UTF8::is_utf32(file_get_contents('utf-8.txt')); // false
4209
     * </code>
4210
     *
4211
     * @param string $str                       <p>The input string.</p>
4212
     * @param bool   $check_if_string_is_binary
4213
     *
4214
     * @psalm-pure
4215
     *
4216
     * @return false|int
4217
     *                   <strong>false</strong> if is't not UTF-32,<br>
4218
     *                   <strong>1</strong> for UTF-32LE,<br>
4219
     *                   <strong>2</strong> for UTF-32BE
4220
     */
4221 20
    public static function is_utf32($str, bool $check_if_string_is_binary = true)
4222
    {
4223
        // init
4224 20
        $str = (string) $str;
4225 20
        $str_chars = [];
4226
4227
        if (
4228 20
            $check_if_string_is_binary
4229
            &&
4230 20
            !self::is_binary($str, true)
4231
        ) {
4232 2
            return false;
4233
        }
4234
4235 20
        if (self::$SUPPORT['mbstring'] === false) {
4236
            /**
4237
             * @psalm-suppress ImpureFunctionCall - is is only a warning
4238
             */
4239 3
            \trigger_error('UTF8::is_utf32() without mbstring may did not work correctly', \E_USER_WARNING);
4240
        }
4241
4242 20
        $str = self::remove_bom($str);
4243
4244 20
        $maybe_utf32le = 0;
4245 20
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
4246 20
        if ($test) {
4247 13
            $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
4248 13
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
4249 13
            if ($test3 === $test) {
4250
                /**
4251
                 * @psalm-suppress RedundantCondition
4252
                 */
4253 13
                if ($str_chars === []) {
4254 13
                    $str_chars = self::count_chars($str, true, false);
4255
                }
4256 13
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
Bug introduced by
It seems like $test3 can also be of type array; however, parameter $str of voku\helper\UTF8::count_chars() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

4256
                foreach (self::count_chars(/** @scrutinizer ignore-type */ $test3) as $test3char => &$test3charEmpty) {
Loading history...
4257 13
                    if (\in_array($test3char, $str_chars, true)) {
4258 13
                        ++$maybe_utf32le;
4259
                    }
4260
                }
4261 13
                unset($test3charEmpty);
4262
            }
4263
        }
4264
4265 20
        $maybe_utf32be = 0;
4266 20
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
4267 20
        if ($test) {
4268 13
            $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
4269 13
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
4270 13
            if ($test3 === $test) {
4271 13
                if ($str_chars === []) {
4272 7
                    $str_chars = self::count_chars($str, true, false);
4273
                }
4274 13
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
4275 13
                    if (\in_array($test3char, $str_chars, true)) {
4276 13
                        ++$maybe_utf32be;
4277
                    }
4278
                }
4279 13
                unset($test3charEmpty);
4280
            }
4281
        }
4282
4283 20
        if ($maybe_utf32be !== $maybe_utf32le) {
4284 3
            if ($maybe_utf32le > $maybe_utf32be) {
4285 2
                return 1;
4286
            }
4287
4288 3
            return 2;
4289
        }
4290
4291 20
        return false;
4292
    }
4293
4294
    /**
4295
     * Checks whether the passed input contains only byte sequences that appear valid UTF-8.
4296
     *
4297
     * EXAMPLE: <code>
4298
     * UTF8::is_utf8(['Iñtërnâtiônàlizætiøn', 'foo']); // true
4299
     * //
4300
     * UTF8::is_utf8(["Iñtërnâtiônàlizætiøn\xA0\xA1", 'bar']); // false
4301
     * </code>
4302
     *
4303
     * @param int|string|string[]|null $str    <p>The input to be checked.</p>
4304
     * @param bool                     $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
4305
     *
4306
     * @psalm-pure
4307
     *
4308
     * @return bool
4309
     */
4310 83
    public static function is_utf8($str, bool $strict = false): bool
4311
    {
4312 83
        if (\is_array($str)) {
4313 2
            foreach ($str as &$v) {
4314 2
                if (!self::is_utf8($v, $strict)) {
4315 2
                    return false;
4316
                }
4317
            }
4318
4319
            return true;
4320
        }
4321
4322 83
        return self::is_utf8_string((string) $str, $strict);
4323
    }
4324
4325
    /**
4326
     * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
4327
     * Decodes a JSON string
4328
     *
4329
     * EXAMPLE: <code>UTF8::json_decode('[1,"\u00a5","\u00e4"]'); // array(1, '¥', 'ä')</code>
4330
     *
4331
     * @see http://php.net/manual/en/function.json-decode.php
4332
     *
4333
     * @param string $json    <p>
4334
     *                        The <i>json</i> string being decoded.
4335
     *                        </p>
4336
     *                        <p>
4337
     *                        This function only works with UTF-8 encoded strings.
4338
     *                        </p>
4339
     *                        <p>PHP implements a superset of
4340
     *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
4341
     *                        only supports these values when they are nested inside an array or an object.
4342
     *                        </p>
4343
     * @param bool   $assoc   [optional] <p>
4344
     *                        When <b>TRUE</b>, returned objects will be converted into
4345
     *                        associative arrays.
4346
     *                        </p>
4347
     * @param int    $depth   [optional] <p>
4348
     *                        User specified recursion depth.
4349
     *                        </p>
4350
     * @param int    $options [optional] <p>
4351
     *                        Bitmask of JSON decode options. Currently only
4352
     *                        <b>JSON_BIGINT_AS_STRING</b>
4353
     *                        is supported (default is to cast large integers as floats)
4354
     *                        </p>
4355
     *
4356
     * @psalm-pure
4357
     *
4358
     * @return mixed
4359
     *               <p>The value encoded in <i>json</i> in appropriate PHP type. Values true, false and
4360
     *               null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b> and <b>NULL</b> respectively.
4361
     *               <b>NULL</b> is returned if the <i>json</i> cannot be decoded or if the encoded data
4362
     *               is deeper than the recursion limit.</p>
4363
     */
4364 43
    public static function json_decode(
4365
        string $json,
4366
        bool $assoc = false,
4367
        int $depth = 512,
4368
        int $options = 0
4369
    ) {
4370 43
        $json = self::filter($json);
4371
4372 43
        if (self::$SUPPORT['json'] === false) {
4373
            throw new \RuntimeException('ext-json: is not installed');
4374
        }
4375
4376
        /** @noinspection PhpComposerExtensionStubsInspection */
4377 43
        return \json_decode($json, $assoc, $depth, $options);
4378
    }
4379
4380
    /**
4381
     * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
4382
     * Returns the JSON representation of a value.
4383
     *
4384
     * EXAMPLE: <code>UTF8::json_enocde(array(1, '¥', 'ä')); // '[1,"\u00a5","\u00e4"]'</code>
4385
     *
4386
     * @see http://php.net/manual/en/function.json-encode.php
4387
     *
4388
     * @param mixed $value   <p>
4389
     *                       The <i>value</i> being encoded. Can be any type except
4390
     *                       a resource.
4391
     *                       </p>
4392
     *                       <p>
4393
     *                       All string data must be UTF-8 encoded.
4394
     *                       </p>
4395
     *                       <p>PHP implements a superset of
4396
     *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
4397
     *                       only supports these values when they are nested inside an array or an object.
4398
     *                       </p>
4399
     * @param int   $options [optional] <p>
4400
     *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
4401
     *                       <b>JSON_HEX_TAG</b>,
4402
     *                       <b>JSON_HEX_AMP</b>,
4403
     *                       <b>JSON_HEX_APOS</b>,
4404
     *                       <b>JSON_NUMERIC_CHECK</b>,
4405
     *                       <b>JSON_PRETTY_PRINT</b>,
4406
     *                       <b>JSON_UNESCAPED_SLASHES</b>,
4407
     *                       <b>JSON_FORCE_OBJECT</b>,
4408
     *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
4409
     *                       constants is described on
4410
     *                       the JSON constants page.
4411
     *                       </p>
4412
     * @param int   $depth   [optional] <p>
4413
     *                       Set the maximum depth. Must be greater than zero.
4414
     *                       </p>
4415
     *
4416
     * @psalm-pure
4417
     *
4418
     * @return false|string
4419
     *                      A JSON encoded <strong>string</strong> on success or<br>
4420
     *                      <strong>FALSE</strong> on failure
4421
     */
4422 5
    public static function json_encode($value, int $options = 0, int $depth = 512)
4423
    {
4424 5
        $value = self::filter($value);
4425
4426 5
        if (self::$SUPPORT['json'] === false) {
4427
            throw new \RuntimeException('ext-json: is not installed');
4428
        }
4429
4430
        /** @noinspection PhpComposerExtensionStubsInspection */
4431 5
        return \json_encode($value, $options, $depth);
4432
    }
4433
4434
    /**
4435
     * Checks whether JSON is available on the server.
4436
     *
4437
     * @psalm-pure
4438
     *
4439
     * @return bool
4440
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
4441
     *
4442
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
4443
     */
4444
    public static function json_loaded(): bool
4445
    {
4446
        return \function_exists('json_decode');
4447
    }
4448
4449
    /**
4450
     * Makes string's first char lowercase.
4451
     *
4452
     * EXAMPLE: <code>UTF8::lcfirst('ÑTËRNÂTIÔNÀLIZÆTIØN'); // ñTËRNÂTIÔNÀLIZÆTIØN</code>
4453
     *
4454
     * @param string      $str                           <p>The input string</p>
4455
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
4456
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
4457
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
4458
     *                                                   tr</p>
4459
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
4460
     *                                                   -> ß</p>
4461
     *
4462
     * @psalm-pure
4463
     *
4464
     * @return string the resulting string
4465
     */
4466 46
    public static function lcfirst(
4467
        string $str,
4468
        string $encoding = 'UTF-8',
4469
        bool $clean_utf8 = false,
4470
        string $lang = null,
4471
        bool $try_to_keep_the_string_length = false
4472
    ): string {
4473 46
        if ($clean_utf8) {
4474
            $str = self::clean($str);
4475
        }
4476
4477 46
        $use_mb_functions = ($lang === null && !$try_to_keep_the_string_length);
4478
4479 46
        if ($encoding === 'UTF-8') {
4480 43
            $str_part_two = (string) \mb_substr($str, 1);
4481
4482 43
            if ($use_mb_functions) {
4483 43
                $str_part_one = \mb_strtolower(
4484 43
                    (string) \mb_substr($str, 0, 1)
4485
                );
4486
            } else {
4487
                $str_part_one = self::strtolower(
4488
                    (string) \mb_substr($str, 0, 1),
4489
                    $encoding,
4490
                    false,
4491
                    $lang,
4492 43
                    $try_to_keep_the_string_length
4493
                );
4494
            }
4495
        } else {
4496 3
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4497
4498 3
            $str_part_two = (string) self::substr($str, 1, null, $encoding);
4499
4500 3
            $str_part_one = self::strtolower(
4501 3
                (string) self::substr($str, 0, 1, $encoding),
4502 3
                $encoding,
4503 3
                false,
4504 3
                $lang,
4505 3
                $try_to_keep_the_string_length
4506
            );
4507
        }
4508
4509 46
        return $str_part_one . $str_part_two;
4510
    }
4511
4512
    /**
4513
     * alias for "UTF8::lcfirst()"
4514
     *
4515
     * @param string      $str
4516
     * @param string      $encoding
4517
     * @param bool        $clean_utf8
4518
     * @param string|null $lang
4519
     * @param bool        $try_to_keep_the_string_length
4520
     *
4521
     * @psalm-pure
4522
     *
4523
     * @return string
4524
     *
4525
     * @see        UTF8::lcfirst()
4526
     * @deprecated <p>please use "UTF8::lcfirst()"</p>
4527
     */
4528 2
    public static function lcword(
4529
        string $str,
4530
        string $encoding = 'UTF-8',
4531
        bool $clean_utf8 = false,
4532
        string $lang = null,
4533
        bool $try_to_keep_the_string_length = false
4534
    ): string {
4535 2
        return self::lcfirst(
4536 2
            $str,
4537 2
            $encoding,
4538 2
            $clean_utf8,
4539 2
            $lang,
4540 2
            $try_to_keep_the_string_length
4541
        );
4542
    }
4543
4544
    /**
4545
     * Lowercase for all words in the string.
4546
     *
4547
     * @param string      $str                           <p>The input string.</p>
4548
     * @param string[]    $exceptions                    [optional] <p>Exclusion for some words.</p>
4549
     * @param string      $char_list                     [optional] <p>Additional chars that contains to words and do
4550
     *                                                   not start a new word.</p>
4551
     * @param string      $encoding                      [optional] <p>Set the charset.</p>
4552
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
4553
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
4554
     *                                                   tr</p>
4555
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
4556
     *                                                   -> ß</p>
4557
     *
4558
     * @psalm-pure
4559
     *
4560
     * @return string
4561
     */
4562 2
    public static function lcwords(
4563
        string $str,
4564
        array $exceptions = [],
4565
        string $char_list = '',
4566
        string $encoding = 'UTF-8',
4567
        bool $clean_utf8 = false,
4568
        string $lang = null,
4569
        bool $try_to_keep_the_string_length = false
4570
    ): string {
4571 2
        if (!$str) {
4572 2
            return '';
4573
        }
4574
4575 2
        $words = self::str_to_words($str, $char_list);
4576 2
        $use_exceptions = $exceptions !== [];
4577
4578 2
        $words_str = '';
4579 2
        foreach ($words as &$word) {
4580 2
            if (!$word) {
4581 2
                continue;
4582
            }
4583
4584
            if (
4585 2
                !$use_exceptions
4586
                ||
4587 2
                !\in_array($word, $exceptions, true)
4588
            ) {
4589 2
                $words_str .= self::lcfirst($word, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
4590
            } else {
4591 2
                $words_str .= $word;
4592
            }
4593
        }
4594
4595 2
        return $words_str;
4596
    }
4597
4598
    /**
4599
     * alias for "UTF8::lcfirst()"
4600
     *
4601
     * @param string      $str
4602
     * @param string      $encoding
4603
     * @param bool        $clean_utf8
4604
     * @param string|null $lang
4605
     * @param bool        $try_to_keep_the_string_length
4606
     *
4607
     * @psalm-pure
4608
     *
4609
     * @return string
4610
     *
4611
     * @see        UTF8::lcfirst()
4612
     * @deprecated <p>please use "UTF8::lcfirst()"</p>
4613
     */
4614 5
    public static function lowerCaseFirst(
4615
        string $str,
4616
        string $encoding = 'UTF-8',
4617
        bool $clean_utf8 = false,
4618
        string $lang = null,
4619
        bool $try_to_keep_the_string_length = false
4620
    ): string {
4621 5
        return self::lcfirst(
4622 5
            $str,
4623 5
            $encoding,
4624 5
            $clean_utf8,
4625 5
            $lang,
4626 5
            $try_to_keep_the_string_length
4627
        );
4628
    }
4629
4630
    /**
4631
     * Strip whitespace or other characters from the beginning of a UTF-8 string.
4632
     *
4633
     * EXAMPLE: <code>UTF8::ltrim(' 中文空白  '); // '中文空白  '</code>
4634
     *
4635
     * @param string      $str   <p>The string to be trimmed</p>
4636
     * @param string|null $chars <p>Optional characters to be stripped</p>
4637
     *
4638
     * @psalm-pure
4639
     *
4640
     * @return string the string with unwanted characters stripped from the left
4641
     */
4642 23
    public static function ltrim(string $str = '', string $chars = null): string
4643
    {
4644 23
        if ($str === '') {
4645 3
            return '';
4646
        }
4647
4648 22
        if (self::$SUPPORT['mbstring'] === true) {
4649 22
            if ($chars !== null) {
4650
                /** @noinspection PregQuoteUsageInspection */
4651 11
                $chars = \preg_quote($chars);
4652 11
                $pattern = "^[${chars}]+";
4653
            } else {
4654 14
                $pattern = '^[\\s]+';
4655
            }
4656
4657
            /** @noinspection PhpComposerExtensionStubsInspection */
4658 22
            return (string) \mb_ereg_replace($pattern, '', $str);
4659
        }
4660
4661
        if ($chars !== null) {
4662
            $chars = \preg_quote($chars, '/');
4663
            $pattern = "^[${chars}]+";
4664
        } else {
4665
            $pattern = '^[\\s]+';
4666
        }
4667
4668
        return self::regex_replace($str, $pattern, '');
4669
    }
4670
4671
    /**
4672
     * Returns the UTF-8 character with the maximum code point in the given data.
4673
     *
4674
     * EXAMPLE: <code>UTF8::max('abc-äöü-中文空白'); // 'ø'</code>
4675
     *
4676
     * @param array<string>|string $arg <p>A UTF-8 encoded string or an array of such strings.</p>
4677
     *
4678
     * @psalm-pure
4679
     *
4680
     * @return string|null the character with the highest code point than others, returns null on failure or empty input
4681
     */
4682 2
    public static function max($arg)
4683
    {
4684 2
        if (\is_array($arg)) {
4685 2
            $arg = \implode('', $arg);
4686
        }
4687
4688 2
        $codepoints = self::codepoints($arg);
4689 2
        if ($codepoints === []) {
4690 2
            return null;
4691
        }
4692
4693 2
        $codepoint_max = \max($codepoints);
4694
4695 2
        return self::chr((int) $codepoint_max);
4696
    }
4697
4698
    /**
4699
     * Calculates and returns the maximum number of bytes taken by any
4700
     * UTF-8 encoded character in the given string.
4701
     *
4702
     * EXAMPLE: <code>UTF8::max_chr_width('Intërnâtiônàlizætiøn'); // 2</code>
4703
     *
4704
     * @param string $str <p>The original Unicode string.</p>
4705
     *
4706
     * @psalm-pure
4707
     *
4708
     * @return int
4709
     *             <p>Max byte lengths of the given chars.</p>
4710
     */
4711 2
    public static function max_chr_width(string $str): int
4712
    {
4713 2
        $bytes = self::chr_size_list($str);
4714 2
        if ($bytes !== []) {
4715 2
            return (int) \max($bytes);
4716
        }
4717
4718 2
        return 0;
4719
    }
4720
4721
    /**
4722
     * Checks whether mbstring is available on the server.
4723
     *
4724
     * @psalm-pure
4725
     *
4726
     * @return bool
4727
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
4728
     *
4729
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
4730
     */
4731 28
    public static function mbstring_loaded(): bool
4732
    {
4733 28
        return \extension_loaded('mbstring');
4734
    }
4735
4736
    /**
4737
     * Returns the UTF-8 character with the minimum code point in the given data.
4738
     *
4739
     * EXAMPLE: <code>UTF8::min('abc-äöü-中文空白'); // '-'</code>
4740
     *
4741
     * @param string|string[] $arg <strong>A UTF-8 encoded string or an array of such strings.</strong>
4742
     *
4743
     * @psalm-pure
4744
     *
4745
     * @return string|null
4746
     *                     <p>The character with the lowest code point than others, returns null on failure or empty input.</p>
4747
     */
4748 2
    public static function min($arg)
4749
    {
4750 2
        if (\is_array($arg)) {
4751 2
            $arg = \implode('', $arg);
4752
        }
4753
4754 2
        $codepoints = self::codepoints($arg);
4755 2
        if ($codepoints === []) {
4756 2
            return null;
4757
        }
4758
4759 2
        $codepoint_min = \min($codepoints);
4760
4761 2
        return self::chr((int) $codepoint_min);
4762
    }
4763
4764
    /**
4765
     * alias for "UTF8::normalize_encoding()"
4766
     *
4767
     * @param mixed $encoding
4768
     * @param mixed $fallback
4769
     *
4770
     * @psalm-pure
4771
     *
4772
     * @return mixed
4773
     *
4774
     * @see        UTF8::normalize_encoding()
4775
     * @deprecated <p>please use "UTF8::normalize_encoding()"</p>
4776
     */
4777 2
    public static function normalizeEncoding($encoding, $fallback = '')
4778
    {
4779 2
        return self::normalize_encoding($encoding, $fallback);
4780
    }
4781
4782
    /**
4783
     * Normalize the encoding-"name" input.
4784
     *
4785
     * EXAMPLE: <code>UTF8::normalize_encoding('UTF8'); // 'UTF-8'</code>
4786
     *
4787
     * @param mixed $encoding <p>e.g.: ISO, UTF8, WINDOWS-1251 etc.</p>
4788
     * @param mixed $fallback <p>e.g.: UTF-8</p>
4789
     *
4790
     * @psalm-pure
4791
     *
4792
     * @return mixed|string
4793
     *                      <p>e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.<br>Will return a empty string as fallback (by default)</p>
4794
     *
4795
     * @template TNormalizeEncodingFallback
4796
     * @phpstan-param string|TNormalizeEncodingFallback $fallback
4797
     * @phpstan-return string|TNormalizeEncodingFallback
4798
     */
4799 339
    public static function normalize_encoding($encoding, $fallback = '')
4800
    {
4801
        /**
4802
         * @psalm-suppress ImpureStaticVariable
4803
         *
4804
         * @var array<string,string>
4805
         */
4806 339
        static $STATIC_NORMALIZE_ENCODING_CACHE = [];
4807
4808
        // init
4809 339
        $encoding = (string) $encoding;
4810
4811 339
        if (!$encoding) {
4812 290
            return $fallback;
4813
        }
4814
4815
        if (
4816 53
            $encoding === 'UTF-8'
4817
            ||
4818 53
            $encoding === 'UTF8'
4819
        ) {
4820 29
            return 'UTF-8';
4821
        }
4822
4823
        if (
4824 44
            $encoding === '8BIT'
4825
            ||
4826 44
            $encoding === 'BINARY'
4827
        ) {
4828
            return 'CP850';
4829
        }
4830
4831
        if (
4832 44
            $encoding === 'HTML'
4833
            ||
4834 44
            $encoding === 'HTML-ENTITIES'
4835
        ) {
4836 2
            return 'HTML-ENTITIES';
4837
        }
4838
4839
        if (
4840 44
            $encoding === 'ISO'
4841
            ||
4842 44
            $encoding === 'ISO-8859-1'
4843
        ) {
4844 41
            return 'ISO-8859-1';
4845
        }
4846
4847
        if (
4848 11
            $encoding === '1' // only a fallback, for non "strict_types" usage ...
4849
            ||
4850 11
            $encoding === '0' // only a fallback, for non "strict_types" usage ...
4851
        ) {
4852
            return $fallback;
4853
        }
4854
4855 11
        if (isset($STATIC_NORMALIZE_ENCODING_CACHE[$encoding])) {
4856 8
            return $STATIC_NORMALIZE_ENCODING_CACHE[$encoding];
4857
        }
4858
4859 5
        if (self::$ENCODINGS === null) {
4860 1
            self::$ENCODINGS = self::getData('encodings');
4861
        }
4862
4863 5
        if (\in_array($encoding, self::$ENCODINGS, true)) {
4864 3
            $STATIC_NORMALIZE_ENCODING_CACHE[$encoding] = $encoding;
4865
4866 3
            return $encoding;
4867
        }
4868
4869 4
        $encoding_original = $encoding;
4870 4
        $encoding = \strtoupper($encoding);
4871 4
        $encoding_upper_helper = (string) \preg_replace('/[^a-zA-Z0-9]/u', '', $encoding);
4872
4873
        $equivalences = [
4874 4
            'ISO8859'     => 'ISO-8859-1',
4875
            'ISO88591'    => 'ISO-8859-1',
4876
            'ISO'         => 'ISO-8859-1',
4877
            'LATIN'       => 'ISO-8859-1',
4878
            'LATIN1'      => 'ISO-8859-1', // Western European
4879
            'ISO88592'    => 'ISO-8859-2',
4880
            'LATIN2'      => 'ISO-8859-2', // Central European
4881
            'ISO88593'    => 'ISO-8859-3',
4882
            'LATIN3'      => 'ISO-8859-3', // Southern European
4883
            'ISO88594'    => 'ISO-8859-4',
4884
            'LATIN4'      => 'ISO-8859-4', // Northern European
4885
            'ISO88595'    => 'ISO-8859-5',
4886
            'ISO88596'    => 'ISO-8859-6', // Greek
4887
            'ISO88597'    => 'ISO-8859-7',
4888
            'ISO88598'    => 'ISO-8859-8', // Hebrew
4889
            'ISO88599'    => 'ISO-8859-9',
4890
            'LATIN5'      => 'ISO-8859-9', // Turkish
4891
            'ISO885911'   => 'ISO-8859-11',
4892
            'TIS620'      => 'ISO-8859-11', // Thai
4893
            'ISO885910'   => 'ISO-8859-10',
4894
            'LATIN6'      => 'ISO-8859-10', // Nordic
4895
            'ISO885913'   => 'ISO-8859-13',
4896
            'LATIN7'      => 'ISO-8859-13', // Baltic
4897
            'ISO885914'   => 'ISO-8859-14',
4898
            'LATIN8'      => 'ISO-8859-14', // Celtic
4899
            'ISO885915'   => 'ISO-8859-15',
4900
            'LATIN9'      => 'ISO-8859-15', // Western European (with some extra chars e.g. €)
4901
            'ISO885916'   => 'ISO-8859-16',
4902
            'LATIN10'     => 'ISO-8859-16', // Southeast European
4903
            'CP1250'      => 'WINDOWS-1250',
4904
            'WIN1250'     => 'WINDOWS-1250',
4905
            'WINDOWS1250' => 'WINDOWS-1250',
4906
            'CP1251'      => 'WINDOWS-1251',
4907
            'WIN1251'     => 'WINDOWS-1251',
4908
            'WINDOWS1251' => 'WINDOWS-1251',
4909
            'CP1252'      => 'WINDOWS-1252',
4910
            'WIN1252'     => 'WINDOWS-1252',
4911
            'WINDOWS1252' => 'WINDOWS-1252',
4912
            'CP1253'      => 'WINDOWS-1253',
4913
            'WIN1253'     => 'WINDOWS-1253',
4914
            'WINDOWS1253' => 'WINDOWS-1253',
4915
            'CP1254'      => 'WINDOWS-1254',
4916
            'WIN1254'     => 'WINDOWS-1254',
4917
            'WINDOWS1254' => 'WINDOWS-1254',
4918
            'CP1255'      => 'WINDOWS-1255',
4919
            'WIN1255'     => 'WINDOWS-1255',
4920
            'WINDOWS1255' => 'WINDOWS-1255',
4921
            'CP1256'      => 'WINDOWS-1256',
4922
            'WIN1256'     => 'WINDOWS-1256',
4923
            'WINDOWS1256' => 'WINDOWS-1256',
4924
            'CP1257'      => 'WINDOWS-1257',
4925
            'WIN1257'     => 'WINDOWS-1257',
4926
            'WINDOWS1257' => 'WINDOWS-1257',
4927
            'CP1258'      => 'WINDOWS-1258',
4928
            'WIN1258'     => 'WINDOWS-1258',
4929
            'WINDOWS1258' => 'WINDOWS-1258',
4930
            'UTF16'       => 'UTF-16',
4931
            'UTF32'       => 'UTF-32',
4932
            'UTF8'        => 'UTF-8',
4933
            'UTF'         => 'UTF-8',
4934
            'UTF7'        => 'UTF-7',
4935
            '8BIT'        => 'CP850',
4936
            'BINARY'      => 'CP850',
4937
        ];
4938
4939 4
        if (!empty($equivalences[$encoding_upper_helper])) {
4940 3
            $encoding = $equivalences[$encoding_upper_helper];
4941
        }
4942
4943 4
        $STATIC_NORMALIZE_ENCODING_CACHE[$encoding_original] = $encoding;
4944
4945 4
        return $encoding;
4946
    }
4947
4948
    /**
4949
     * Standardize line ending to unix-like.
4950
     *
4951
     * @param string          $str      <p>The input string.</p>
4952
     * @param string|string[] $replacer <p>The replacer char e.g. "\n" (Linux) or "\r\n" (Windows). You can also use \PHP_EOL
4953
     *                                  here.</p>
4954
     *
4955
     * @psalm-pure
4956
     *
4957
     * @return string
4958
     *                <p>A string with normalized line ending.</p>
4959
     */
4960 5
    public static function normalize_line_ending(string $str, $replacer = "\n"): string
4961
    {
4962 5
        return \str_replace(["\r\n", "\r", "\n"], $replacer, $str);
4963
    }
4964
4965
    /**
4966
     * Normalize some MS Word special characters.
4967
     *
4968
     * EXAMPLE: <code>UTF8::normalize_msword('„Abcdef…”'); // '"Abcdef..."'</code>
4969
     *
4970
     * @param string $str <p>The string to be normalized.</p>
4971
     *
4972
     * @psalm-pure
4973
     *
4974
     * @return string
4975
     *                <p>A string with normalized characters for commonly used chars in Word documents.</p>
4976
     */
4977 10
    public static function normalize_msword(string $str): string
4978
    {
4979 10
        return ASCII::normalize_msword($str);
4980
    }
4981
4982
    /**
4983
     * Normalize the whitespace.
4984
     *
4985
     * EXAMPLE: <code>UTF8::normalize_whitespace("abc-\xc2\xa0-öäü-\xe2\x80\xaf-\xE2\x80\xAC", true); // "abc-\xc2\xa0-öäü- -"</code>
4986
     *
4987
     * @param string $str                          <p>The string to be normalized.</p>
4988
     * @param bool   $keep_non_breaking_space      [optional] <p>Set to true, to keep non-breaking-spaces.</p>
4989
     * @param bool   $keep_bidi_unicode_controls   [optional] <p>Set to true, to keep non-printable (for the web)
4990
     *                                             bidirectional text chars.</p>
4991
     * @param bool   $normalize_control_characters [optional] <p>Set to true, to convert e.g. LINE-, PARAGRAPH-SEPARATOR with "\n" and LINE TABULATION with "\t".</p>
4992
     *
4993
     * @psalm-pure
4994
     *
4995
     * @return string
4996
     *                <p>A string with normalized whitespace.</p>
4997
     */
4998 61
    public static function normalize_whitespace(
4999
        string $str,
5000
        bool $keep_non_breaking_space = false,
5001
        bool $keep_bidi_unicode_controls = false,
5002
        bool $normalize_control_characters = false
5003
    ): string {
5004 61
        return ASCII::normalize_whitespace(
5005 61
            $str,
5006 61
            $keep_non_breaking_space,
5007 61
            $keep_bidi_unicode_controls,
5008 61
            $normalize_control_characters
5009
        );
5010
    }
5011
5012
    /**
5013
     * Calculates Unicode code point of the given UTF-8 encoded character.
5014
     *
5015
     * INFO: opposite to UTF8::chr()
5016
     *
5017
     * EXAMPLE: <code>UTF8::ord('☃'); // 0x2603</code>
5018
     *
5019
     * @param string $chr      <p>The character of which to calculate code point.<p/>
5020
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5021
     *
5022
     * @psalm-pure
5023
     *
5024
     * @return int
5025
     *             <p>Unicode code point of the given character,<br>
5026
     *             0 on invalid UTF-8 byte sequence</p>
5027
     */
5028 27
    public static function ord($chr, string $encoding = 'UTF-8'): int
5029
    {
5030
        /**
5031
         * @psalm-suppress ImpureStaticVariable
5032
         *
5033
         * @var array<string,int>
5034
         */
5035 27
        static $CHAR_CACHE = [];
5036
5037
        // init
5038 27
        $chr = (string) $chr;
5039
5040 27
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
5041 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
5042
        }
5043
5044 27
        $cache_key = $chr . '_' . $encoding;
5045 27
        if (isset($CHAR_CACHE[$cache_key])) {
5046 27
            return $CHAR_CACHE[$cache_key];
5047
        }
5048
5049
        // check again, if it's still not UTF-8
5050 11
        if ($encoding !== 'UTF-8') {
5051 3
            $chr = self::encode($encoding, $chr);
5052
        }
5053
5054 11
        if (self::$ORD === null) {
5055
            self::$ORD = self::getData('ord');
5056
        }
5057
5058 11
        if (isset(self::$ORD[$chr])) {
5059 11
            return $CHAR_CACHE[$cache_key] = self::$ORD[$chr];
5060
        }
5061
5062
        //
5063
        // fallback via "IntlChar"
5064
        //
5065
5066 6
        if (self::$SUPPORT['intlChar'] === true) {
5067
            /** @noinspection PhpComposerExtensionStubsInspection */
5068 5
            $code = \IntlChar::ord($chr);
5069 5
            if ($code) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $code of type integer|null is loosely compared to true; this is ambiguous if the integer can be 0. You might want to explicitly use !== null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
5070 5
                return $CHAR_CACHE[$cache_key] = $code;
5071
            }
5072
        }
5073
5074
        //
5075
        // fallback via vanilla php
5076
        //
5077
5078
        /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
5079 1
        $chr = \unpack('C*', (string) \substr($chr, 0, 4));
5080
        /** @noinspection OffsetOperationsInspection */
5081 1
        $code = $chr ? $chr[1] : 0;
5082
5083
        /** @noinspection OffsetOperationsInspection */
5084 1
        if ($code >= 0xF0 && isset($chr[4])) {
5085
            /** @noinspection UnnecessaryCastingInspection */
5086
            /** @noinspection OffsetOperationsInspection */
5087
            return $CHAR_CACHE[$cache_key] = (int) ((($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80);
5088
        }
5089
5090
        /** @noinspection OffsetOperationsInspection */
5091 1
        if ($code >= 0xE0 && isset($chr[3])) {
5092
            /** @noinspection UnnecessaryCastingInspection */
5093
            /** @noinspection OffsetOperationsInspection */
5094 1
            return $CHAR_CACHE[$cache_key] = (int) ((($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80);
5095
        }
5096
5097
        /** @noinspection OffsetOperationsInspection */
5098 1
        if ($code >= 0xC0 && isset($chr[2])) {
5099
            /** @noinspection UnnecessaryCastingInspection */
5100
            /** @noinspection OffsetOperationsInspection */
5101 1
            return $CHAR_CACHE[$cache_key] = (int) ((($code - 0xC0) << 6) + $chr[2] - 0x80);
5102
        }
5103
5104
        return $CHAR_CACHE[$cache_key] = $code;
5105
    }
5106
5107
    /**
5108
     * Parses the string into an array (into the the second parameter).
5109
     *
5110
     * WARNING: Unlike "parse_str()", this method does not (re-)place variables in the current scope,
5111
     *          if the second parameter is not set!
5112
     *
5113
     * EXAMPLE: <code>
5114
     * UTF8::parse_str('Iñtërnâtiônéàlizætiøn=測試&arr[]=foo+測試&arr[]=ການທົດສອບ', $array);
5115
     * echo $array['Iñtërnâtiônéàlizætiøn']; // '測試'
5116
     * </code>
5117
     *
5118
     * @see http://php.net/manual/en/function.parse-str.php
5119
     *
5120
     * @param string $str        <p>The input string.</p>
5121
     * @param array  $result     <p>The result will be returned into this reference parameter.</p>
5122
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5123
     *
5124
     * @psalm-pure
5125
     *
5126
     * @return bool
5127
     *              <p>Will return <strong>false</strong> if php can't parse the string and we haven't any $result.</p>
5128
     */
5129 2
    public static function parse_str(string $str, &$result, bool $clean_utf8 = false): bool
5130
    {
5131 2
        if ($clean_utf8) {
5132 2
            $str = self::clean($str);
5133
        }
5134
5135 2
        if (self::$SUPPORT['mbstring'] === true) {
5136 2
            $return = \mb_parse_str($str, $result);
5137
5138 2
            return $return !== false && $result !== [];
5139
        }
5140
5141
        /**
5142
         * @psalm-suppress ImpureFunctionCall - we use the second parameter, so we don't change variables by magic
5143
         */
5144
        \parse_str($str, $result);
5145
5146
        return $result !== [];
5147
    }
5148
5149
    /**
5150
     * Checks if \u modifier is available that enables Unicode support in PCRE.
5151
     *
5152
     * @psalm-pure
5153
     *
5154
     * @return bool
5155
     *              <p>
5156
     *              <strong>true</strong> if support is available,<br>
5157
     *              <strong>false</strong> otherwise
5158
     *              </p>
5159
     */
5160
    public static function pcre_utf8_support(): bool
5161
    {
5162
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
5163
        return (bool) @\preg_match('//u', '');
5164
    }
5165
5166
    /**
5167
     * Create an array containing a range of UTF-8 characters.
5168
     *
5169
     * EXAMPLE: <code>UTF8::range('κ', 'ζ'); // array('κ', 'ι', 'θ', 'η', 'ζ',)</code>
5170
     *
5171
     * @param int|string $var1      <p>Numeric or hexadecimal code points, or a UTF-8 character to start from.</p>
5172
     * @param int|string $var2      <p>Numeric or hexadecimal code points, or a UTF-8 character to end at.</p>
5173
     * @param bool       $use_ctype <p>use ctype to detect numeric and hexadecimal, otherwise we will use a simple
5174
     *                              "is_numeric"</p>
5175
     * @param string     $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
5176
     * @param float|int  $step      [optional] <p>
5177
     *                              If a step value is given, it will be used as the
5178
     *                              increment between elements in the sequence. step
5179
     *                              should be given as a positive number. If not specified,
5180
     *                              step will default to 1.
5181
     *                              </p>
5182
     *
5183
     * @psalm-pure
5184
     *
5185
     * @return string[]
5186
     */
5187 2
    public static function range(
5188
        $var1,
5189
        $var2,
5190
        bool $use_ctype = true,
5191
        string $encoding = 'UTF-8',
5192
        $step = 1
5193
    ): array {
5194 2
        if (!$var1 || !$var2) {
5195 2
            return [];
5196
        }
5197
5198 2
        if ($step !== 1) {
5199
            /**
5200
             * @psalm-suppress RedundantConditionGivenDocblockType
5201
             * @psalm-suppress DocblockTypeContradiction
5202
             */
5203 1
            if (!\is_numeric($step)) {
0 ignored issues
show
introduced by
The condition is_numeric($step) is always true.
Loading history...
5204
                throw new \InvalidArgumentException('$step need to be a number, type given: ' . \gettype($step));
5205
            }
5206
5207
            /**
5208
             * @psalm-suppress RedundantConditionGivenDocblockType - false-positive from psalm?
5209
             */
5210 1
            if ($step <= 0) {
5211
                throw new \InvalidArgumentException('$step need to be a positive number, given: ' . $step);
5212
            }
5213
        }
5214
5215 2
        if ($use_ctype && self::$SUPPORT['ctype'] === false) {
5216
            throw new \RuntimeException('ext-ctype: is not installed');
5217
        }
5218
5219 2
        $is_digit = false;
5220 2
        $is_xdigit = false;
5221
5222
        /** @noinspection PhpComposerExtensionStubsInspection */
5223 2
        if ($use_ctype && \ctype_digit((string) $var1) && \ctype_digit((string) $var2)) {
5224 2
            $is_digit = true;
5225 2
            $start = (int) $var1;
5226 2
        } /** @noinspection PhpComposerExtensionStubsInspection */ elseif ($use_ctype && \ctype_xdigit($var1) && \ctype_xdigit($var2)) {
5227
            $is_xdigit = true;
5228
            $start = (int) self::hex_to_int((string) $var1);
5229 2
        } elseif (!$use_ctype && \is_numeric($var1)) {
5230 1
            $start = (int) $var1;
5231
        } else {
5232 2
            $start = self::ord((string) $var1);
5233
        }
5234
5235 2
        if (!$start) {
5236
            return [];
5237
        }
5238
5239 2
        if ($is_digit) {
5240 2
            $end = (int) $var2;
5241 2
        } elseif ($is_xdigit) {
5242
            $end = (int) self::hex_to_int((string) $var2);
5243 2
        } elseif (!$use_ctype && \is_numeric($var2)) {
5244 1
            $end = (int) $var2;
5245
        } else {
5246 2
            $end = self::ord((string) $var2);
5247
        }
5248
5249 2
        if (!$end) {
5250
            return [];
5251
        }
5252
5253 2
        $array = [];
5254 2
        foreach (\range($start, $end, $step) as $i) {
5255 2
            $array[] = (string) self::chr((int) $i, $encoding);
5256
        }
5257
5258 2
        return $array;
5259
    }
5260
5261
    /**
5262
     * Multi decode HTML entity + fix urlencoded-win1252-chars.
5263
     *
5264
     * EXAMPLE: <code>UTF8::rawurldecode('tes%20öäü%20\u00edtest+test'); // 'tes öäü ítest+test'</code>
5265
     *
5266
     * e.g:
5267
     * 'test+test'                     => 'test+test'
5268
     * 'D&#252;sseldorf'               => 'Düsseldorf'
5269
     * 'D%FCsseldorf'                  => 'Düsseldorf'
5270
     * 'D&#xFC;sseldorf'               => 'Düsseldorf'
5271
     * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
5272
     * 'Düsseldorf'                   => 'Düsseldorf'
5273
     * 'D%C3%BCsseldorf'               => 'Düsseldorf'
5274
     * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
5275
     * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
5276
     *
5277
     * @param string $str          <p>The input string.</p>
5278
     * @param bool   $multi_decode <p>Decode as often as possible.</p>
5279
     *
5280
     * @psalm-pure
5281
     *
5282
     * @return string
5283
     *                <p>The decoded URL, as a string.</p>
5284
     */
5285 7
    public static function rawurldecode(string $str, bool $multi_decode = true): string
5286
    {
5287 7
        if ($str === '') {
5288 4
            return '';
5289
        }
5290
5291 7
        $str = self::urldecode_unicode_helper($str);
5292
5293 7
        if ($multi_decode) {
5294
            do {
5295 6
                $str_compare = $str;
5296
5297
                /**
5298
                 * @psalm-suppress PossiblyInvalidArgument
5299
                 */
5300 6
                $str = \rawurldecode(
5301 6
                    self::html_entity_decode(
5302 6
                        self::to_utf8($str),
5303 6
                        \ENT_QUOTES | \ENT_HTML5
5304
                    )
5305
                );
5306 6
            } while ($str_compare !== $str);
5307
        } else {
5308
            /**
5309
             * @psalm-suppress PossiblyInvalidArgument
5310
             */
5311 1
            $str = \rawurldecode(
5312 1
                self::html_entity_decode(
5313 1
                    self::to_utf8($str),
5314 1
                    \ENT_QUOTES | \ENT_HTML5
5315
                )
5316
            );
5317
        }
5318
5319 7
        return self::fix_simple_utf8($str);
5320
    }
5321
5322
    /**
5323
     * Replaces all occurrences of $pattern in $str by $replacement.
5324
     *
5325
     * @param string $str         <p>The input string.</p>
5326
     * @param string $pattern     <p>The regular expression pattern.</p>
5327
     * @param string $replacement <p>The string to replace with.</p>
5328
     * @param string $options     [optional] <p>Matching conditions to be used.</p>
5329
     * @param string $delimiter   [optional] <p>Delimiter the the regex. Default: '/'</p>
5330
     *
5331
     * @psalm-pure
5332
     *
5333
     * @return string
5334
     */
5335 18
    public static function regex_replace(
5336
        string $str,
5337
        string $pattern,
5338
        string $replacement,
5339
        string $options = '',
5340
        string $delimiter = '/'
5341
    ): string {
5342 18
        if ($options === 'msr') {
5343 9
            $options = 'ms';
5344
        }
5345
5346
        // fallback
5347 18
        if (!$delimiter) {
5348
            $delimiter = '/';
5349
        }
5350
5351 18
        return (string) \preg_replace(
5352 18
            $delimiter . $pattern . $delimiter . 'u' . $options,
5353 18
            $replacement,
5354 18
            $str
5355
        );
5356
    }
5357
5358
    /**
5359
     * alias for "UTF8::remove_bom()"
5360
     *
5361
     * @param string $str
5362
     *
5363
     * @psalm-pure
5364
     *
5365
     * @return string
5366
     *
5367
     * @see        UTF8::remove_bom()
5368
     * @deprecated <p>please use "UTF8::remove_bom()"</p>
5369
     */
5370 1
    public static function removeBOM(string $str): string
5371
    {
5372 1
        return self::remove_bom($str);
5373
    }
5374
5375
    /**
5376
     * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
5377
     *
5378
     * EXAMPLE: <code>UTF8::remove_bom("\xEF\xBB\xBFΜπορώ να"); // 'Μπορώ να'</code>
5379
     *
5380
     * @param string $str <p>The input string.</p>
5381
     *
5382
     * @psalm-pure
5383
     *
5384
     * @return string
5385
     *                <p>A string without UTF-BOM.</p>
5386
     */
5387 55
    public static function remove_bom(string $str): string
5388
    {
5389 55
        if ($str === '') {
5390 9
            return '';
5391
        }
5392
5393 55
        $str_length = \strlen($str);
5394 55
        foreach (self::$BOM as $bom_string => $bom_byte_length) {
5395 55
            if (\strncmp($str, $bom_string, $bom_byte_length) === 0) {
5396
                /** @var false|string $str_tmp - needed for PhpStan (stubs error) */
5397 11
                $str_tmp = \substr($str, $bom_byte_length, $str_length);
5398 11
                if ($str_tmp === false) {
5399
                    return '';
5400
                }
5401
5402 11
                $str_length -= (int) $bom_byte_length;
5403
5404 55
                $str = (string) $str_tmp;
5405
            }
5406
        }
5407
5408 55
        return $str;
5409
    }
5410
5411
    /**
5412
     * Removes duplicate occurrences of a string in another string.
5413
     *
5414
     * EXAMPLE: <code>UTF8::remove_duplicates('öäü-κόσμεκόσμε-äöü', 'κόσμε'); // 'öäü-κόσμε-äöü'</code>
5415
     *
5416
     * @param string          $str  <p>The base string.</p>
5417
     * @param string|string[] $what <p>String to search for in the base string.</p>
5418
     *
5419
     * @psalm-pure
5420
     *
5421
     * @return string
5422
     *                <p>A string with removed duplicates.</p>
5423
     */
5424 2
    public static function remove_duplicates(string $str, $what = ' '): string
5425
    {
5426 2
        if (\is_string($what)) {
5427 2
            $what = [$what];
5428
        }
5429
5430
        /**
5431
         * @psalm-suppress RedundantConditionGivenDocblockType
5432
         */
5433 2
        if (\is_array($what)) {
0 ignored issues
show
introduced by
The condition is_array($what) is always true.
Loading history...
5434 2
            foreach ($what as $item) {
5435 2
                $str = (string) \preg_replace('/(' . \preg_quote($item, '/') . ')+/u', $item, $str);
5436
            }
5437
        }
5438
5439 2
        return $str;
5440
    }
5441
5442
    /**
5443
     * Remove html via "strip_tags()" from the string.
5444
     *
5445
     * @param string $str            <p>The input string.</p>
5446
     * @param string $allowable_tags [optional] <p>You can use the optional second parameter to specify tags which
5447
     *                               should not be stripped. Default: null
5448
     *                               </p>
5449
     *
5450
     * @psalm-pure
5451
     *
5452
     * @return string
5453
     *                <p>A string with without html tags.</p>
5454
     */
5455 6
    public static function remove_html(string $str, string $allowable_tags = ''): string
5456
    {
5457 6
        return \strip_tags($str, $allowable_tags);
5458
    }
5459
5460
    /**
5461
     * Remove all breaks [<br> | \r\n | \r | \n | ...] from the string.
5462
     *
5463
     * @param string $str         <p>The input string.</p>
5464
     * @param string $replacement [optional] <p>Default is a empty string.</p>
5465
     *
5466
     * @psalm-pure
5467
     *
5468
     * @return string
5469
     *                <p>A string without breaks.</p>
5470
     */
5471 6
    public static function remove_html_breaks(string $str, string $replacement = ''): string
5472
    {
5473 6
        return (string) \preg_replace("#/\r\n|\r|\n|<br.*/?>#isU", $replacement, $str);
5474
    }
5475
5476
    /**
5477
     * Remove invisible characters from a string.
5478
     *
5479
     * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
5480
     *
5481
     * EXAMPLE: <code>UTF8::remove_invisible_characters("κόσ\0με"); // 'κόσμε'</code>
5482
     *
5483
     * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
5484
     *
5485
     * @param string $str                           <p>The input string.</p>
5486
     * @param bool   $url_encoded                   [optional] <p>
5487
     *                                              Try to remove url encoded control character.
5488
     *                                              WARNING: maybe contains false-positives e.g. aa%0Baa -> aaaa.
5489
     *                                              <br>
5490
     *                                              Default: false
5491
     *                                              </p>
5492
     * @param string $replacement                   [optional] <p>The replacement character.</p>
5493
     * @param bool   $keep_basic_control_characters [optional] <p>Keep control characters like [LRM] or [LSEP].</p>
5494
     *
5495
     * @psalm-pure
5496
     *
5497
     * @return string
5498
     *                <p>A string without invisible chars.</p>
5499
     */
5500 92
    public static function remove_invisible_characters(
5501
        string $str,
5502
        bool $url_encoded = false,
5503
        string $replacement = '',
5504
        bool $keep_basic_control_characters = true
5505
    ): string {
5506 92
        return ASCII::remove_invisible_characters(
5507 92
            $str,
5508 92
            $url_encoded,
5509 92
            $replacement,
5510 92
            $keep_basic_control_characters
5511
        );
5512
    }
5513
5514
    /**
5515
     * Returns a new string with the prefix $substring removed, if present.
5516
     *
5517
     * @param string $str       <p>The input string.</p>
5518
     * @param string $substring <p>The prefix to remove.</p>
5519
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
5520
     *
5521
     * @psalm-pure
5522
     *
5523
     * @return string
5524
     *                <p>A string without the prefix $substring.</p>
5525
     */
5526 12
    public static function remove_left(
5527
        string $str,
5528
        string $substring,
5529
        string $encoding = 'UTF-8'
5530
    ): string {
5531
        if (
5532 12
            $substring
5533
            &&
5534 12
            \strpos($str, $substring) === 0
5535
        ) {
5536 6
            if ($encoding === 'UTF-8') {
5537 4
                return (string) \mb_substr(
5538 4
                    $str,
5539 4
                    (int) \mb_strlen($substring)
5540
                );
5541
            }
5542
5543 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
5544
5545 2
            return (string) self::substr(
5546 2
                $str,
5547 2
                (int) self::strlen($substring, $encoding),
5548 2
                null,
5549 2
                $encoding
5550
            );
5551
        }
5552
5553 6
        return $str;
5554
    }
5555
5556
    /**
5557
     * Returns a new string with the suffix $substring removed, if present.
5558
     *
5559
     * @param string $str
5560
     * @param string $substring <p>The suffix to remove.</p>
5561
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
5562
     *
5563
     * @psalm-pure
5564
     *
5565
     * @return string
5566
     *                <p>A string having a $str without the suffix $substring.</p>
5567
     */
5568 12
    public static function remove_right(
5569
        string $str,
5570
        string $substring,
5571
        string $encoding = 'UTF-8'
5572
    ): string {
5573 12
        if ($substring && \substr($str, -\strlen($substring)) === $substring) {
5574 6
            if ($encoding === 'UTF-8') {
5575 4
                return (string) \mb_substr(
5576 4
                    $str,
5577 4
                    0,
5578 4
                    (int) \mb_strlen($str) - (int) \mb_strlen($substring)
5579
                );
5580
            }
5581
5582 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
5583
5584 2
            return (string) self::substr(
5585 2
                $str,
5586 2
                0,
5587 2
                (int) self::strlen($str, $encoding) - (int) self::strlen($substring, $encoding),
5588 2
                $encoding
5589
            );
5590
        }
5591
5592 6
        return $str;
5593
    }
5594
5595
    /**
5596
     * Replaces all occurrences of $search in $str by $replacement.
5597
     *
5598
     * @param string $str            <p>The input string.</p>
5599
     * @param string $search         <p>The needle to search for.</p>
5600
     * @param string $replacement    <p>The string to replace with.</p>
5601
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5602
     *
5603
     * @psalm-pure
5604
     *
5605
     * @return string
5606
     *                <p>A string with replaced parts.</p>
5607
     */
5608 29
    public static function replace(
5609
        string $str,
5610
        string $search,
5611
        string $replacement,
5612
        bool $case_sensitive = true
5613
    ): string {
5614 29
        if ($case_sensitive) {
5615 22
            return \str_replace($search, $replacement, $str);
5616
        }
5617
5618 7
        return self::str_ireplace($search, $replacement, $str);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::str_ireplac...ch, $replacement, $str) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
5619
    }
5620
5621
    /**
5622
     * Replaces all occurrences of $search in $str by $replacement.
5623
     *
5624
     * @param string       $str            <p>The input string.</p>
5625
     * @param array        $search         <p>The elements to search for.</p>
5626
     * @param array|string $replacement    <p>The string to replace with.</p>
5627
     * @param bool         $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5628
     *
5629
     * @psalm-pure
5630
     *
5631
     * @return string
5632
     *                <p>A string with replaced parts.</p>
5633
     */
5634 30
    public static function replace_all(
5635
        string $str,
5636
        array $search,
5637
        $replacement,
5638
        bool $case_sensitive = true
5639
    ): string {
5640 30
        if ($case_sensitive) {
5641 23
            return \str_replace($search, $replacement, $str);
5642
        }
5643
5644 7
        return self::str_ireplace($search, $replacement, $str);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::str_ireplac...ch, $replacement, $str) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
5645
    }
5646
5647
    /**
5648
     * Replace the diamond question mark (�) and invalid-UTF8 chars with the replacement.
5649
     *
5650
     * EXAMPLE: <code>UTF8::replace_diamond_question_mark('中文空白�', ''); // '中文空白'</code>
5651
     *
5652
     * @param string $str                        <p>The input string</p>
5653
     * @param string $replacement_char           <p>The replacement character.</p>
5654
     * @param bool   $process_invalid_utf8_chars <p>Convert invalid UTF-8 chars </p>
5655
     *
5656
     * @psalm-pure
5657
     *
5658
     * @return string
5659
     *                <p>A string without diamond question marks (�).</p>
5660
     */
5661 35
    public static function replace_diamond_question_mark(
5662
        string $str,
5663
        string $replacement_char = '',
5664
        bool $process_invalid_utf8_chars = true
5665
    ): string {
5666 35
        if ($str === '') {
5667 9
            return '';
5668
        }
5669
5670 35
        if ($process_invalid_utf8_chars) {
5671 35
            if ($replacement_char === '') {
5672 35
                $replacement_char_helper = 'none';
5673
            } else {
5674 2
                $replacement_char_helper = \ord($replacement_char);
5675
            }
5676
5677 35
            if (self::$SUPPORT['mbstring'] === false) {
5678
                // if there is no native support for "mbstring",
5679
                // then we need to clean the string before ...
5680
                $str = self::clean($str);
5681
            }
5682
5683
            /**
5684
             * @psalm-suppress ImpureFunctionCall - we will reset the value in the next step
5685
             */
5686 35
            $save = \mb_substitute_character();
5687
            /** @noinspection PhpUsageOfSilenceOperatorInspection - ignore "Unknown character" warnings, it's working anyway */
5688 35
            @\mb_substitute_character($replacement_char_helper);
0 ignored issues
show
Security Best Practice introduced by
It seems like you do not handle an error condition for mb_substitute_character(). This can introduce security issues, and is generally not recommended. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-unhandled  annotation

5688
            /** @scrutinizer ignore-unhandled */ @\mb_substitute_character($replacement_char_helper);

If you suppress an error, we recommend checking for the error condition explicitly:

// For example instead of
@mkdir($dir);

// Better use
if (@mkdir($dir) === false) {
    throw new \RuntimeException('The directory '.$dir.' could not be created.');
}
Loading history...
5689
            // the polyfill maybe return false, so cast to string
5690 35
            $str = (string) \mb_convert_encoding($str, 'UTF-8', 'UTF-8');
5691 35
            \mb_substitute_character($save);
0 ignored issues
show
Bug introduced by
It seems like $save can also be of type true; however, parameter $substitute_character of mb_substitute_character() does only seem to accept integer|null|string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

5691
            \mb_substitute_character(/** @scrutinizer ignore-type */ $save);
Loading history...
5692
        }
5693
5694 35
        return \str_replace(
5695
            [
5696 35
                "\xEF\xBF\xBD",
5697
                '�',
5698
            ],
5699
            [
5700 35
                $replacement_char,
5701 35
                $replacement_char,
5702
            ],
5703 35
            $str
5704
        );
5705
    }
5706
5707
    /**
5708
     * Strip whitespace or other characters from the end of a UTF-8 string.
5709
     *
5710
     * EXAMPLE: <code>UTF8::rtrim('-ABC-中文空白-  '); // '-ABC-中文空白-'</code>
5711
     *
5712
     * @param string      $str   <p>The string to be trimmed.</p>
5713
     * @param string|null $chars <p>Optional characters to be stripped.</p>
5714
     *
5715
     * @psalm-pure
5716
     *
5717
     * @return string
5718
     *                <p>A string with unwanted characters stripped from the right.</p>
5719
     */
5720 21
    public static function rtrim(string $str = '', string $chars = null): string
5721
    {
5722 21
        if ($str === '') {
5723 3
            return '';
5724
        }
5725
5726 20
        if (self::$SUPPORT['mbstring'] === true) {
5727 20
            if ($chars !== null) {
5728
                /** @noinspection PregQuoteUsageInspection */
5729 9
                $chars = \preg_quote($chars);
5730 9
                $pattern = "[${chars}]+$";
5731
            } else {
5732 14
                $pattern = '[\\s]+$';
5733
            }
5734
5735
            /** @noinspection PhpComposerExtensionStubsInspection */
5736 20
            return (string) \mb_ereg_replace($pattern, '', $str);
5737
        }
5738
5739
        if ($chars !== null) {
5740
            $chars = \preg_quote($chars, '/');
5741
            $pattern = "[${chars}]+$";
5742
        } else {
5743
            $pattern = '[\\s]+$';
5744
        }
5745
5746
        return self::regex_replace($str, $pattern, '');
5747
    }
5748
5749
    /**
5750
     * WARNING: Print native UTF-8 support (libs) by default, e.g. for debugging.
5751
     *
5752
     * @param bool $useEcho
5753
     *
5754
     * @psalm-pure
5755
     *
5756
     * @return string|void
5757
     */
5758 2
    public static function showSupport(bool $useEcho = true)
5759
    {
5760
        // init
5761 2
        $html = '';
5762
5763 2
        $html .= '<pre>';
5764
        /** @noinspection AlterInForeachInspection */
5765 2
        foreach (self::$SUPPORT as $key => &$value) {
5766 2
            $html .= $key . ' - ' . \print_r($value, true) . "\n<br>";
0 ignored issues
show
Bug introduced by
Are you sure print_r($value, true) of type string|true can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

5766
            $html .= $key . ' - ' . /** @scrutinizer ignore-type */ \print_r($value, true) . "\n<br>";
Loading history...
5767
        }
5768 2
        $html .= '</pre>';
5769
5770 2
        if ($useEcho) {
5771 1
            echo $html;
5772
        }
5773
5774 2
        return $html;
5775
    }
5776
5777
    /**
5778
     * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
5779
     *
5780
     * EXAMPLE: <code>UTF8::single_chr_html_encode('κ'); // '&#954;'</code>
5781
     *
5782
     * @param string $char             <p>The Unicode character to be encoded as numbered entity.</p>
5783
     * @param bool   $keep_ascii_chars <p>Set to <strong>true</strong> to keep ASCII chars.</>
5784
     * @param string $encoding         [optional] <p>Set the charset for e.g. "mb_" function</p>
5785
     *
5786
     * @psalm-pure
5787
     *
5788
     * @return string
5789
     *                <p>The HTML numbered entity for the given character.</p>
5790
     */
5791 2
    public static function single_chr_html_encode(
5792
        string $char,
5793
        bool $keep_ascii_chars = false,
5794
        string $encoding = 'UTF-8'
5795
    ): string {
5796 2
        if ($char === '') {
5797 2
            return '';
5798
        }
5799
5800
        if (
5801 2
            $keep_ascii_chars
5802
            &&
5803 2
            ASCII::is_ascii($char)
5804
        ) {
5805 2
            return $char;
5806
        }
5807
5808 2
        return '&#' . self::ord($char, $encoding) . ';';
5809
    }
5810
5811
    /**
5812
     * @param string $str
5813
     * @param int    $tab_length
5814
     *
5815
     * @psalm-pure
5816
     *
5817
     * @return string
5818
     */
5819 5
    public static function spaces_to_tabs(string $str, int $tab_length = 4): string
5820
    {
5821 5
        if ($tab_length === 4) {
5822 3
            $tab = '    ';
5823 2
        } elseif ($tab_length === 2) {
5824 1
            $tab = '  ';
5825
        } else {
5826 1
            $tab = \str_repeat(' ', $tab_length);
5827
        }
5828
5829 5
        return \str_replace($tab, "\t", $str);
5830
    }
5831
5832
    /**
5833
     * alias for "UTF8::str_split()"
5834
     *
5835
     * @param int|string $str
5836
     * @param int        $length
5837
     * @param bool       $clean_utf8
5838
     *
5839
     * @psalm-pure
5840
     *
5841
     * @return string[]
5842
     *
5843
     * @see        UTF8::str_split()
5844
     * @deprecated <p>please use "UTF8::str_split()"</p>
5845
     */
5846 9
    public static function split(
5847
        $str,
5848
        int $length = 1,
5849
        bool $clean_utf8 = false
5850
    ): array {
5851
        /** @var string[] */
5852 9
        return self::str_split($str, $length, $clean_utf8);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::str_split($str, $length, $clean_utf8) returns an array which contains values of type string[] which are incompatible with the documented value type string.
Loading history...
5853
    }
5854
5855
    /**
5856
     * alias for "UTF8::str_starts_with()"
5857
     *
5858
     * @param string $haystack
5859
     * @param string $needle
5860
     *
5861
     * @psalm-pure
5862
     *
5863
     * @return bool
5864
     *
5865
     * @see        UTF8::str_starts_with()
5866
     * @deprecated <p>please use "UTF8::str_starts_with()"</p>
5867
     */
5868 1
    public static function str_begins(string $haystack, string $needle): bool
5869
    {
5870 1
        return self::str_starts_with($haystack, $needle);
5871
    }
5872
5873
    /**
5874
     * Returns a camelCase version of the string. Trims surrounding spaces,
5875
     * capitalizes letters following digits, spaces, dashes and underscores,
5876
     * and removes spaces, dashes, as well as underscores.
5877
     *
5878
     * @param string      $str                           <p>The input string.</p>
5879
     * @param string      $encoding                      [optional] <p>Default: 'UTF-8'</p>
5880
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
5881
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
5882
     *                                                   tr</p>
5883
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
5884
     *                                                   -> ß</p>
5885
     *
5886
     * @psalm-pure
5887
     *
5888
     * @return string
5889
     */
5890 32
    public static function str_camelize(
5891
        string $str,
5892
        string $encoding = 'UTF-8',
5893
        bool $clean_utf8 = false,
5894
        string $lang = null,
5895
        bool $try_to_keep_the_string_length = false
5896
    ): string {
5897 32
        if ($clean_utf8) {
5898
            $str = self::clean($str);
5899
        }
5900
5901 32
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
5902 26
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
5903
        }
5904
5905 32
        $str = self::lcfirst(
5906 32
            \trim($str),
5907 32
            $encoding,
5908 32
            false,
5909 32
            $lang,
5910 32
            $try_to_keep_the_string_length
5911
        );
5912 32
        $str = (string) \preg_replace('/^[-_]+/', '', $str);
5913
5914 32
        $use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
5915
5916 32
        $str = (string) \preg_replace_callback(
5917 32
            '/[-_\\s]+(.)?/u',
5918
            /**
5919
             * @param array $match
5920
             *
5921
             * @psalm-pure
5922
             *
5923
             * @return string
5924
             */
5925
            static function (array $match) use ($use_mb_functions, $encoding, $lang, $try_to_keep_the_string_length): string {
5926 27
                if (isset($match[1])) {
5927 27
                    if ($use_mb_functions) {
5928 27
                        if ($encoding === 'UTF-8') {
5929 27
                            return \mb_strtoupper($match[1]);
5930
                        }
5931
5932
                        return \mb_strtoupper($match[1], $encoding);
5933
                    }
5934
5935
                    return self::strtoupper($match[1], $encoding, false, $lang, $try_to_keep_the_string_length);
5936
                }
5937
5938 1
                return '';
5939 32
            },
5940 32
            $str
5941
        );
5942
5943 32
        return (string) \preg_replace_callback(
5944 32
            '/[\\p{N}]+(.)?/u',
5945
            /**
5946
             * @param array $match
5947
             *
5948
             * @psalm-pure
5949
             *
5950
             * @return string
5951
             */
5952
            static function (array $match) use ($use_mb_functions, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length): string {
5953 6
                if ($use_mb_functions) {
5954 6
                    if ($encoding === 'UTF-8') {
5955 6
                        return \mb_strtoupper($match[0]);
5956
                    }
5957
5958
                    return \mb_strtoupper($match[0], $encoding);
5959
                }
5960
5961
                return self::strtoupper($match[0], $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
5962 32
            },
5963 32
            $str
5964
        );
5965
    }
5966
5967
    /**
5968
     * Returns the string with the first letter of each word capitalized,
5969
     * except for when the word is a name which shouldn't be capitalized.
5970
     *
5971
     * @param string $str
5972
     *
5973
     * @psalm-pure
5974
     *
5975
     * @return string
5976
     *                <p>A string with $str capitalized.</p>
5977
     */
5978 1
    public static function str_capitalize_name(string $str): string
5979
    {
5980 1
        return self::str_capitalize_name_helper(
5981 1
            self::str_capitalize_name_helper(
5982 1
                self::collapse_whitespace($str),
5983 1
                ' '
5984
            ),
5985 1
            '-'
5986
        );
5987
    }
5988
5989
    /**
5990
     * Returns true if the string contains $needle, false otherwise. By default
5991
     * the comparison is case-sensitive, but can be made insensitive by setting
5992
     * $case_sensitive to false.
5993
     *
5994
     * @param string $haystack       <p>The input string.</p>
5995
     * @param string $needle         <p>Substring to look for.</p>
5996
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5997
     *
5998
     * @psalm-pure
5999
     *
6000
     * @return bool
6001
     *              <p>Whether or not $haystack contains $needle.</p>
6002
     */
6003 21
    public static function str_contains(
6004
        string $haystack,
6005
        string $needle,
6006
        bool $case_sensitive = true
6007
    ): bool {
6008 21
        if ($case_sensitive) {
6009 11
            if (\PHP_VERSION_ID >= 80000) {
6010
                /** @phpstan-ignore-next-line - only for PHP8 */
6011
                return \str_contains($haystack, $needle);
6012
            }
6013
6014 11
            return \strpos($haystack, $needle) !== false;
6015
        }
6016
6017 10
        return \mb_stripos($haystack, $needle) !== false;
6018
    }
6019
6020
    /**
6021
     * Returns true if the string contains all $needles, false otherwise. By
6022
     * default the comparison is case-sensitive, but can be made insensitive by
6023
     * setting $case_sensitive to false.
6024
     *
6025
     * @param string $haystack       <p>The input string.</p>
6026
     * @param array  $needles        <p>SubStrings to look for.</p>
6027
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
6028
     *
6029
     * @psalm-pure
6030
     *
6031
     * @return bool
6032
     *              <p>Whether or not $haystack contains $needle.</p>
6033
     */
6034 45
    public static function str_contains_all(
6035
        string $haystack,
6036
        array $needles,
6037
        bool $case_sensitive = true
6038
    ): bool {
6039 45
        if ($haystack === '' || $needles === []) {
6040 1
            return false;
6041
        }
6042
6043
        /** @noinspection LoopWhichDoesNotLoopInspection */
6044 44
        foreach ($needles as &$needle) {
6045 44
            if ($case_sensitive) {
6046
                /** @noinspection NestedPositiveIfStatementsInspection */
6047 24
                if (!$needle || \strpos($haystack, $needle) === false) {
6048 12
                    return false;
6049
                }
6050
            }
6051
6052 33
            if (!$needle || \mb_stripos($haystack, $needle) === false) {
6053 33
                return false;
6054
            }
6055
        }
6056
6057 24
        return true;
6058
    }
6059
6060
    /**
6061
     * Returns true if the string contains any $needles, false otherwise. By
6062
     * default the comparison is case-sensitive, but can be made insensitive by
6063
     * setting $case_sensitive to false.
6064
     *
6065
     * @param string $haystack       <p>The input string.</p>
6066
     * @param array  $needles        <p>SubStrings to look for.</p>
6067
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
6068
     *
6069
     * @psalm-pure
6070
     *
6071
     * @return bool
6072
     *              <p>Whether or not $str contains $needle.</p>
6073
     */
6074 46
    public static function str_contains_any(
6075
        string $haystack,
6076
        array $needles,
6077
        bool $case_sensitive = true
6078
    ): bool {
6079 46
        if ($haystack === '' || $needles === []) {
6080 1
            return false;
6081
        }
6082
6083
        /** @noinspection LoopWhichDoesNotLoopInspection */
6084 45
        foreach ($needles as &$needle) {
6085 45
            if (!$needle) {
6086
                continue;
6087
            }
6088
6089 45
            if ($case_sensitive) {
6090 25
                if (\strpos($haystack, $needle) !== false) {
6091 14
                    return true;
6092
                }
6093
6094 13
                continue;
6095
            }
6096
6097 20
            if (\mb_stripos($haystack, $needle) !== false) {
6098 20
                return true;
6099
            }
6100
        }
6101
6102 19
        return false;
6103
    }
6104
6105
    /**
6106
     * Returns a lowercase and trimmed string separated by dashes. Dashes are
6107
     * inserted before uppercase characters (with the exception of the first
6108
     * character of the string), and in place of spaces as well as underscores.
6109
     *
6110
     * @param string $str      <p>The input string.</p>
6111
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6112
     *
6113
     * @psalm-pure
6114
     *
6115
     * @return string
6116
     */
6117 19
    public static function str_dasherize(string $str, string $encoding = 'UTF-8'): string
6118
    {
6119 19
        return self::str_delimit($str, '-', $encoding);
6120
    }
6121
6122
    /**
6123
     * Returns a lowercase and trimmed string separated by the given delimiter.
6124
     * Delimiters are inserted before uppercase characters (with the exception
6125
     * of the first character of the string), and in place of spaces, dashes,
6126
     * and underscores. Alpha delimiters are not converted to lowercase.
6127
     *
6128
     * @param string      $str                           <p>The input string.</p>
6129
     * @param string      $delimiter                     <p>Sequence used to separate parts of the string.</p>
6130
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
6131
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
6132
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
6133
     *                                                   tr</p>
6134
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ ->
6135
     *                                                   ß</p>
6136
     *
6137
     * @psalm-pure
6138
     *
6139
     * @return string
6140
     */
6141 49
    public static function str_delimit(
6142
        string $str,
6143
        string $delimiter,
6144
        string $encoding = 'UTF-8',
6145
        bool $clean_utf8 = false,
6146
        string $lang = null,
6147
        bool $try_to_keep_the_string_length = false
6148
    ): string {
6149 49
        if (self::$SUPPORT['mbstring'] === true) {
6150
            /** @noinspection PhpComposerExtensionStubsInspection */
6151 49
            $str = (string) \mb_ereg_replace('\\B(\\p{Lu})', '-\1', \trim($str));
6152
6153 49
            $use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
6154 49
            if ($use_mb_functions && $encoding === 'UTF-8') {
6155 22
                $str = \mb_strtolower($str);
6156
            } else {
6157 27
                $str = self::strtolower($str, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
6158
            }
6159
6160
            /** @noinspection PhpComposerExtensionStubsInspection */
6161 49
            return (string) \mb_ereg_replace('[\\-_\\s]+', $delimiter, $str);
6162
        }
6163
6164
        $str = (string) \preg_replace('/\\B(\\p{Lu})/u', '-\1', \trim($str));
6165
6166
        $use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
6167
        if ($use_mb_functions && $encoding === 'UTF-8') {
6168
            $str = \mb_strtolower($str);
6169
        } else {
6170
            $str = self::strtolower($str, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
6171
        }
6172
6173
        return (string) \preg_replace('/[\\-_\\s]+/u', $delimiter, $str);
6174
    }
6175
6176
    /**
6177
     * Optimized "mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
6178
     *
6179
     * EXAMPLE: <code>
6180
     * UTF8::str_detect_encoding('中文空白'); // 'UTF-8'
6181
     * UTF8::str_detect_encoding('Abc'); // 'ASCII'
6182
     * </code>
6183
     *
6184
     * @param string $str <p>The input string.</p>
6185
     *
6186
     * @psalm-pure
6187
     *
6188
     * @return false|string
6189
     *                      <p>
6190
     *                      The detected string-encoding e.g. UTF-8 or UTF-16BE,<br>
6191
     *                      otherwise it will return false e.g. for BINARY or not detected encoding.
6192
     *                      </p>
6193
     */
6194 31
    public static function str_detect_encoding($str)
6195
    {
6196
        // init
6197 31
        $str = (string) $str;
6198
6199
        //
6200
        // 1.) check binary strings (010001001...) like UTF-16 / UTF-32 / PDF / Images / ...
6201
        //
6202
6203 31
        if (self::is_binary($str, true)) {
6204 11
            $is_utf32 = self::is_utf32($str, false);
6205 11
            if ($is_utf32 === 1) {
6206
                return 'UTF-32LE';
6207
            }
6208 11
            if ($is_utf32 === 2) {
6209 1
                return 'UTF-32BE';
6210
            }
6211
6212 11
            $is_utf16 = self::is_utf16($str, false);
6213 11
            if ($is_utf16 === 1) {
6214 3
                return 'UTF-16LE';
6215
            }
6216 11
            if ($is_utf16 === 2) {
6217 2
                return 'UTF-16BE';
6218
            }
6219
6220
            // is binary but not "UTF-16" or "UTF-32"
6221 9
            return false;
6222
        }
6223
6224
        //
6225
        // 2.) simple check for ASCII chars
6226
        //
6227
6228 27
        if (ASCII::is_ascii($str)) {
6229 10
            return 'ASCII';
6230
        }
6231
6232
        //
6233
        // 3.) simple check for UTF-8 chars
6234
        //
6235
6236 27
        if (self::is_utf8_string($str)) {
6237 19
            return 'UTF-8';
6238
        }
6239
6240
        //
6241
        // 4.) check via "mb_detect_encoding()"
6242
        //
6243
        // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "mb_detect_encoding()"
6244
6245
        $encoding_detecting_order = [
6246 16
            'ISO-8859-1',
6247
            'ISO-8859-2',
6248
            'ISO-8859-3',
6249
            'ISO-8859-4',
6250
            'ISO-8859-5',
6251
            'ISO-8859-6',
6252
            'ISO-8859-7',
6253
            'ISO-8859-8',
6254
            'ISO-8859-9',
6255
            'ISO-8859-10',
6256
            'ISO-8859-13',
6257
            'ISO-8859-14',
6258
            'ISO-8859-15',
6259
            'ISO-8859-16',
6260
            'WINDOWS-1251',
6261
            'WINDOWS-1252',
6262
            'WINDOWS-1254',
6263
            'CP932',
6264
            'CP936',
6265
            'CP950',
6266
            'CP866',
6267
            'CP850',
6268
            'CP51932',
6269
            'CP50220',
6270
            'CP50221',
6271
            'CP50222',
6272
            'ISO-2022-JP',
6273
            'ISO-2022-KR',
6274
            'JIS',
6275
            'JIS-ms',
6276
            'EUC-CN',
6277
            'EUC-JP',
6278
        ];
6279
6280 16
        if (self::$SUPPORT['mbstring'] === true) {
6281
            // info: do not use the symfony polyfill here
6282 16
            $encoding = \mb_detect_encoding($str, $encoding_detecting_order, true);
6283 16
            if ($encoding) {
6284 16
                return $encoding;
6285
            }
6286
        }
6287
6288
        //
6289
        // 5.) check via "iconv()"
6290
        //
6291
6292
        if (self::$ENCODINGS === null) {
6293
            self::$ENCODINGS = self::getData('encodings');
6294
        }
6295
6296
        foreach (self::$ENCODINGS as $encoding_tmp) {
6297
            // INFO: //IGNORE but still throw notice
6298
            /** @noinspection PhpUsageOfSilenceOperatorInspection */
6299
            if ((string) @\iconv($encoding_tmp, $encoding_tmp . '//IGNORE', $str) === $str) {
6300
                return $encoding_tmp;
6301
            }
6302
        }
6303
6304
        return false;
6305
    }
6306
6307
    /**
6308
     * alias for "UTF8::str_ends_with()"
6309
     *
6310
     * @param string $haystack
6311
     * @param string $needle
6312
     *
6313
     * @psalm-pure
6314
     *
6315
     * @return bool
6316
     *
6317
     * @see        UTF8::str_ends_with()
6318
     * @deprecated <p>please use "UTF8::str_ends_with()"</p>
6319
     */
6320 1
    public static function str_ends(string $haystack, string $needle): bool
6321
    {
6322 1
        return self::str_ends_with($haystack, $needle);
6323
    }
6324
6325
    /**
6326
     * Check if the string ends with the given substring.
6327
     *
6328
     * EXAMPLE: <code>
6329
     * UTF8::str_ends_with('BeginMiddleΚόσμε', 'Κόσμε'); // true
6330
     * UTF8::str_ends_with('BeginMiddleΚόσμε', 'κόσμε'); // false
6331
     * </code>
6332
     *
6333
     * @param string $haystack <p>The string to search in.</p>
6334
     * @param string $needle   <p>The substring to search for.</p>
6335
     *
6336
     * @psalm-pure
6337
     *
6338
     * @return bool
6339
     */
6340 9
    public static function str_ends_with(string $haystack, string $needle): bool
6341
    {
6342 9
        if ($needle === '') {
6343 2
            return true;
6344
        }
6345
6346 9
        if ($haystack === '') {
6347
            return false;
6348
        }
6349
6350 9
        if (\PHP_VERSION_ID >= 80000) {
6351
            /** @phpstan-ignore-next-line - only for PHP8 */
6352
            return \str_ends_with($haystack, $needle);
6353
        }
6354
6355 9
        return \substr($haystack, -\strlen($needle)) === $needle;
6356
    }
6357
6358
    /**
6359
     * Returns true if the string ends with any of $substrings, false otherwise.
6360
     *
6361
     * - case-sensitive
6362
     *
6363
     * @param string   $str        <p>The input string.</p>
6364
     * @param string[] $substrings <p>Substrings to look for.</p>
6365
     *
6366
     * @psalm-pure
6367
     *
6368
     * @return bool
6369
     *              <p>Whether or not $str ends with $substring.</p>
6370
     */
6371 7
    public static function str_ends_with_any(string $str, array $substrings): bool
6372
    {
6373 7
        if ($substrings === []) {
6374
            return false;
6375
        }
6376
6377 7
        foreach ($substrings as &$substring) {
6378 7
            if (\substr($str, -\strlen($substring)) === $substring) {
6379 7
                return true;
6380
            }
6381
        }
6382
6383 6
        return false;
6384
    }
6385
6386
    /**
6387
     * Ensures that the string begins with $substring. If it doesn't, it's
6388
     * prepended.
6389
     *
6390
     * @param string $str       <p>The input string.</p>
6391
     * @param string $substring <p>The substring to add if not present.</p>
6392
     *
6393
     * @psalm-pure
6394
     *
6395
     * @return string
6396
     */
6397 10
    public static function str_ensure_left(string $str, string $substring): string
6398
    {
6399
        if (
6400 10
            $substring !== ''
6401
            &&
6402 10
            \strpos($str, $substring) === 0
6403
        ) {
6404 6
            return $str;
6405
        }
6406
6407 4
        return $substring . $str;
6408
    }
6409
6410
    /**
6411
     * Ensures that the string ends with $substring. If it doesn't, it's appended.
6412
     *
6413
     * @param string $str       <p>The input string.</p>
6414
     * @param string $substring <p>The substring to add if not present.</p>
6415
     *
6416
     * @psalm-pure
6417
     *
6418
     * @return string
6419
     */
6420 10
    public static function str_ensure_right(string $str, string $substring): string
6421
    {
6422
        if (
6423 10
            $str === ''
6424
            ||
6425 10
            $substring === ''
6426
            ||
6427 10
            \substr($str, -\strlen($substring)) !== $substring
6428
        ) {
6429 4
            $str .= $substring;
6430
        }
6431
6432 10
        return $str;
6433
    }
6434
6435
    /**
6436
     * Capitalizes the first word of the string, replaces underscores with
6437
     * spaces, and strips '_id'.
6438
     *
6439
     * @param string $str
6440
     *
6441
     * @psalm-pure
6442
     *
6443
     * @return string
6444
     */
6445 3
    public static function str_humanize($str): string
6446
    {
6447 3
        $str = \str_replace(
6448
            [
6449 3
                '_id',
6450
                '_',
6451
            ],
6452
            [
6453 3
                '',
6454
                ' ',
6455
            ],
6456 3
            $str
6457
        );
6458
6459 3
        return self::ucfirst(\trim($str));
6460
    }
6461
6462
    /**
6463
     * alias for "UTF8::str_istarts_with()"
6464
     *
6465
     * @param string $haystack
6466
     * @param string $needle
6467
     *
6468
     * @psalm-pure
6469
     *
6470
     * @return bool
6471
     *
6472
     * @see        UTF8::str_istarts_with()
6473
     * @deprecated <p>please use "UTF8::str_istarts_with()"</p>
6474
     */
6475 1
    public static function str_ibegins(string $haystack, string $needle): bool
6476
    {
6477 1
        return self::str_istarts_with($haystack, $needle);
6478
    }
6479
6480
    /**
6481
     * alias for "UTF8::str_iends_with()"
6482
     *
6483
     * @param string $haystack
6484
     * @param string $needle
6485
     *
6486
     * @psalm-pure
6487
     *
6488
     * @return bool
6489
     *
6490
     * @see        UTF8::str_iends_with()
6491
     * @deprecated <p>please use "UTF8::str_iends_with()"</p>
6492
     */
6493 1
    public static function str_iends(string $haystack, string $needle): bool
6494
    {
6495 1
        return self::str_iends_with($haystack, $needle);
6496
    }
6497
6498
    /**
6499
     * Check if the string ends with the given substring, case-insensitive.
6500
     *
6501
     * EXAMPLE: <code>
6502
     * UTF8::str_iends_with('BeginMiddleΚόσμε', 'Κόσμε'); // true
6503
     * UTF8::str_iends_with('BeginMiddleΚόσμε', 'κόσμε'); // true
6504
     * </code>
6505
     *
6506
     * @param string $haystack <p>The string to search in.</p>
6507
     * @param string $needle   <p>The substring to search for.</p>
6508
     *
6509
     * @psalm-pure
6510
     *
6511
     * @return bool
6512
     */
6513 12
    public static function str_iends_with(string $haystack, string $needle): bool
6514
    {
6515 12
        if ($needle === '') {
6516 2
            return true;
6517
        }
6518
6519 12
        if ($haystack === '') {
6520
            return false;
6521
        }
6522
6523 12
        return self::strcasecmp(\substr($haystack, -\strlen($needle)), $needle) === 0;
6524
    }
6525
6526
    /**
6527
     * Returns true if the string ends with any of $substrings, false otherwise.
6528
     *
6529
     * - case-insensitive
6530
     *
6531
     * @param string   $str        <p>The input string.</p>
6532
     * @param string[] $substrings <p>Substrings to look for.</p>
6533
     *
6534
     * @psalm-pure
6535
     *
6536
     * @return bool
6537
     *              <p>Whether or not $str ends with $substring.</p>
6538
     */
6539 4
    public static function str_iends_with_any(string $str, array $substrings): bool
6540
    {
6541 4
        if ($substrings === []) {
6542
            return false;
6543
        }
6544
6545 4
        foreach ($substrings as &$substring) {
6546 4
            if (self::str_iends_with($str, $substring)) {
6547 4
                return true;
6548
            }
6549
        }
6550
6551
        return false;
6552
    }
6553
6554
    /**
6555
     * Returns the index of the first occurrence of $needle in the string,
6556
     * and false if not found. Accepts an optional offset from which to begin
6557
     * the search.
6558
     *
6559
     * @param string $str      <p>The input string.</p>
6560
     * @param string $needle   <p>Substring to look for.</p>
6561
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
6562
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6563
     *
6564
     * @psalm-pure
6565
     *
6566
     * @return false|int
6567
     *                   <p>The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.</p>
6568
     *
6569
     * @see        UTF8::stripos()
6570
     * @deprecated <p>please use "UTF8::stripos()"</p>
6571
     */
6572 1
    public static function str_iindex_first(
6573
        string $str,
6574
        string $needle,
6575
        int $offset = 0,
6576
        string $encoding = 'UTF-8'
6577
    ) {
6578 1
        return self::stripos(
6579 1
            $str,
6580 1
            $needle,
6581 1
            $offset,
6582 1
            $encoding
6583
        );
6584
    }
6585
6586
    /**
6587
     * Returns the index of the last occurrence of $needle in the string,
6588
     * and false if not found. Accepts an optional offset from which to begin
6589
     * the search. Offsets may be negative to count from the last character
6590
     * in the string.
6591
     *
6592
     * @param string $str      <p>The input string.</p>
6593
     * @param string $needle   <p>Substring to look for.</p>
6594
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
6595
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6596
     *
6597
     * @psalm-pure
6598
     *
6599
     * @return false|int
6600
     *                   <p>The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.</p>
6601
     *
6602
     * @see        UTF8::strripos()
6603
     * @deprecated <p>please use "UTF8::strripos()"</p>
6604
     */
6605 10
    public static function str_iindex_last(
6606
        string $str,
6607
        string $needle,
6608
        int $offset = 0,
6609
        string $encoding = 'UTF-8'
6610
    ) {
6611 10
        return self::strripos(
6612 10
            $str,
6613 10
            $needle,
6614 10
            $offset,
6615 10
            $encoding
6616
        );
6617
    }
6618
6619
    /**
6620
     * Returns the index of the first occurrence of $needle in the string,
6621
     * and false if not found. Accepts an optional offset from which to begin
6622
     * the search.
6623
     *
6624
     * @param string $str      <p>The input string.</p>
6625
     * @param string $needle   <p>Substring to look for.</p>
6626
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
6627
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6628
     *
6629
     * @psalm-pure
6630
     *
6631
     * @return false|int
6632
     *                   <p>The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.</p>
6633
     *
6634
     * @see        UTF8::strpos()
6635
     * @deprecated <p>please use "UTF8::strpos()"</p>
6636
     */
6637 11
    public static function str_index_first(
6638
        string $str,
6639
        string $needle,
6640
        int $offset = 0,
6641
        string $encoding = 'UTF-8'
6642
    ) {
6643 11
        return self::strpos(
6644 11
            $str,
6645 11
            $needle,
6646 11
            $offset,
6647 11
            $encoding
6648
        );
6649
    }
6650
6651
    /**
6652
     * Returns the index of the last occurrence of $needle in the string,
6653
     * and false if not found. Accepts an optional offset from which to begin
6654
     * the search. Offsets may be negative to count from the last character
6655
     * in the string.
6656
     *
6657
     * @param string $str      <p>The input string.</p>
6658
     * @param string $needle   <p>Substring to look for.</p>
6659
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
6660
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6661
     *
6662
     * @psalm-pure
6663
     *
6664
     * @return false|int
6665
     *                   <p>The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.</p>
6666
     *
6667
     * @see        UTF8::strrpos()
6668
     * @deprecated <p>please use "UTF8::strrpos()"</p>
6669
     */
6670 10
    public static function str_index_last(
6671
        string $str,
6672
        string $needle,
6673
        int $offset = 0,
6674
        string $encoding = 'UTF-8'
6675
    ) {
6676 10
        return self::strrpos(
6677 10
            $str,
6678 10
            $needle,
6679 10
            $offset,
6680 10
            $encoding
6681
        );
6682
    }
6683
6684
    /**
6685
     * Inserts $substring into the string at the $index provided.
6686
     *
6687
     * @param string $str       <p>The input string.</p>
6688
     * @param string $substring <p>String to be inserted.</p>
6689
     * @param int    $index     <p>The index at which to insert the substring.</p>
6690
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
6691
     *
6692
     * @psalm-pure
6693
     *
6694
     * @return string
6695
     */
6696 8
    public static function str_insert(
6697
        string $str,
6698
        string $substring,
6699
        int $index,
6700
        string $encoding = 'UTF-8'
6701
    ): string {
6702 8
        if ($encoding === 'UTF-8') {
6703 4
            $len = (int) \mb_strlen($str);
6704 4
            if ($index > $len) {
6705
                return $str;
6706
            }
6707
6708
            /** @noinspection UnnecessaryCastingInspection */
6709 4
            return (string) \mb_substr($str, 0, $index) .
6710 4
                   $substring .
6711 4
                   (string) \mb_substr($str, $index, $len);
6712
        }
6713
6714 4
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6715
6716 4
        $len = (int) self::strlen($str, $encoding);
6717 4
        if ($index > $len) {
6718 1
            return $str;
6719
        }
6720
6721 3
        return ((string) self::substr($str, 0, $index, $encoding)) .
6722 3
               $substring .
6723 3
               ((string) self::substr($str, $index, $len, $encoding));
6724
    }
6725
6726
    /**
6727
     * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
6728
     *
6729
     * EXAMPLE: <code>
6730
     * UTF8::str_ireplace('lIzÆ', 'lise', 'Iñtërnâtiônàlizætiøn'); // 'Iñtërnâtiônàlisetiøn'
6731
     * </code>
6732
     *
6733
     * @see http://php.net/manual/en/function.str-ireplace.php
6734
     *
6735
     * @param string|string[] $search      <p>
6736
     *                                     Every replacement with search array is
6737
     *                                     performed on the result of previous replacement.
6738
     *                                     </p>
6739
     * @param string|string[] $replacement <p>The replacement.</p>
6740
     * @param string|string[] $subject     <p>
6741
     *                                     If subject is an array, then the search and
6742
     *                                     replace is performed with every entry of
6743
     *                                     subject, and the return value is an array as
6744
     *                                     well.
6745
     *                                     </p>
6746
     * @param int             $count       [optional] <p>
6747
     *                                     The number of matched and replaced needles will
6748
     *                                     be returned in count which is passed by
6749
     *                                     reference.
6750
     *                                     </p>
6751
     *
6752
     * @psalm-pure
6753
     *
6754
     * @return string|string[]
6755
     *                         <p>A string or an array of replacements.</p>
6756
     *
6757
     * @template TStrIReplaceSubject
6758
     * @phpstan-param TStrIReplaceSubject $subject
6759
     * @phpstan-return TStrIReplaceSubject
6760
     */
6761 29
    public static function str_ireplace($search, $replacement, $subject, &$count = null)
6762
    {
6763 29
        $search = (array) $search;
6764
6765
        /** @noinspection AlterInForeachInspection */
6766 29
        foreach ($search as &$s) {
6767 29
            $s = (string) $s;
6768 29
            if ($s === '') {
6769 6
                $s = '/^(?<=.)$/';
6770
            } else {
6771 29
                $s = '/' . \preg_quote($s, '/') . '/ui';
6772
            }
6773
        }
6774
6775
        // fallback
6776
        /** @phpstan-ignore-next-line - only a fallback for PHP8 */
6777 29
        if ($replacement === null) {
0 ignored issues
show
introduced by
The condition $replacement === null is always false.
Loading history...
6778 1
            $replacement = '';
6779
        }
6780
        /** @phpstan-ignore-next-line - only a fallback for PHP8 */
6781 29
        if ($subject === null) {
6782 1
            $subject = '';
6783
        }
6784
6785
        /**
6786
         * @psalm-suppress PossiblyNullArgument
6787
         * @phpstan-var TStrIReplaceSubject $subject
6788
         */
6789 29
        $subject = \preg_replace($search, $replacement, $subject, -1, $count);
6790
6791 29
        return $subject;
6792
    }
6793
6794
    /**
6795
     * Replaces $search from the beginning of string with $replacement.
6796
     *
6797
     * @param string $str         <p>The input string.</p>
6798
     * @param string $search      <p>The string to search for.</p>
6799
     * @param string $replacement <p>The replacement.</p>
6800
     *
6801
     * @psalm-pure
6802
     *
6803
     * @return string
6804
     *                <p>The string after the replacement.</p>
6805
     */
6806 17
    public static function str_ireplace_beginning(string $str, string $search, string $replacement): string
6807
    {
6808 17
        if ($str === '') {
6809 4
            if ($replacement === '') {
6810 2
                return '';
6811
            }
6812
6813 2
            if ($search === '') {
6814 2
                return $replacement;
6815
            }
6816
        }
6817
6818 13
        if ($search === '') {
6819 2
            return $str . $replacement;
6820
        }
6821
6822 11
        $searchLength = \strlen($search);
6823 11
        if (\strncasecmp($str, $search, $searchLength) === 0) {
6824 10
            return $replacement . \substr($str, $searchLength);
6825
        }
6826
6827 1
        return $str;
6828
    }
6829
6830
    /**
6831
     * Replaces $search from the ending of string with $replacement.
6832
     *
6833
     * @param string $str         <p>The input string.</p>
6834
     * @param string $search      <p>The string to search for.</p>
6835
     * @param string $replacement <p>The replacement.</p>
6836
     *
6837
     * @psalm-pure
6838
     *
6839
     * @return string
6840
     *                <p>The string after the replacement.</p>
6841
     */
6842 17
    public static function str_ireplace_ending(string $str, string $search, string $replacement): string
6843
    {
6844 17
        if ($str === '') {
6845 4
            if ($replacement === '') {
6846 2
                return '';
6847
            }
6848
6849 2
            if ($search === '') {
6850 2
                return $replacement;
6851
            }
6852
        }
6853
6854 13
        if ($search === '') {
6855 2
            return $str . $replacement;
6856
        }
6857
6858 11
        if (\stripos($str, $search, \strlen($str) - \strlen($search)) !== false) {
6859 9
            $str = \substr($str, 0, -\strlen($search)) . $replacement;
6860
        }
6861
6862 11
        return $str;
6863
    }
6864
6865
    /**
6866
     * Check if the string starts with the given substring, case-insensitive.
6867
     *
6868
     * EXAMPLE: <code>
6869
     * UTF8::str_istarts_with('ΚόσμεMiddleEnd', 'Κόσμε'); // true
6870
     * UTF8::str_istarts_with('ΚόσμεMiddleEnd', 'κόσμε'); // true
6871
     * </code>
6872
     *
6873
     * @param string $haystack <p>The string to search in.</p>
6874
     * @param string $needle   <p>The substring to search for.</p>
6875
     *
6876
     * @psalm-pure
6877
     *
6878
     * @return bool
6879
     */
6880 13
    public static function str_istarts_with(string $haystack, string $needle): bool
6881
    {
6882 13
        if ($needle === '') {
6883 2
            return true;
6884
        }
6885
6886 13
        if ($haystack === '') {
6887
            return false;
6888
        }
6889
6890 13
        return self::stripos($haystack, $needle) === 0;
6891
    }
6892
6893
    /**
6894
     * Returns true if the string begins with any of $substrings, false otherwise.
6895
     *
6896
     * - case-insensitive
6897
     *
6898
     * @param string $str        <p>The input string.</p>
6899
     * @param array  $substrings <p>Substrings to look for.</p>
6900
     *
6901
     * @psalm-pure
6902
     *
6903
     * @return bool
6904
     *              <p>Whether or not $str starts with $substring.</p>
6905
     */
6906 5
    public static function str_istarts_with_any(string $str, array $substrings): bool
6907
    {
6908 5
        if ($str === '') {
6909
            return false;
6910
        }
6911
6912 5
        if ($substrings === []) {
6913
            return false;
6914
        }
6915
6916 5
        foreach ($substrings as &$substring) {
6917 5
            if (self::str_istarts_with($str, $substring)) {
6918 5
                return true;
6919
            }
6920
        }
6921
6922 1
        return false;
6923
    }
6924
6925
    /**
6926
     * Gets the substring after the first occurrence of a separator.
6927
     *
6928
     * @param string $str       <p>The input string.</p>
6929
     * @param string $separator <p>The string separator.</p>
6930
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6931
     *
6932
     * @psalm-pure
6933
     *
6934
     * @return string
6935
     */
6936 1
    public static function str_isubstr_after_first_separator(
6937
        string $str,
6938
        string $separator,
6939
        string $encoding = 'UTF-8'
6940
    ): string {
6941 1
        if ($separator === '' || $str === '') {
6942 1
            return '';
6943
        }
6944
6945 1
        $offset = self::stripos($str, $separator);
6946 1
        if ($offset === false) {
6947 1
            return '';
6948
        }
6949
6950 1
        if ($encoding === 'UTF-8') {
6951 1
            return (string) \mb_substr(
6952 1
                $str,
6953 1
                $offset + (int) \mb_strlen($separator)
6954
            );
6955
        }
6956
6957
        return (string) self::substr(
6958
            $str,
6959
            $offset + (int) self::strlen($separator, $encoding),
6960
            null,
6961
            $encoding
6962
        );
6963
    }
6964
6965
    /**
6966
     * Gets the substring after the last occurrence of a separator.
6967
     *
6968
     * @param string $str       <p>The input string.</p>
6969
     * @param string $separator <p>The string separator.</p>
6970
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6971
     *
6972
     * @psalm-pure
6973
     *
6974
     * @return string
6975
     */
6976 1
    public static function str_isubstr_after_last_separator(
6977
        string $str,
6978
        string $separator,
6979
        string $encoding = 'UTF-8'
6980
    ): string {
6981 1
        if ($separator === '' || $str === '') {
6982 1
            return '';
6983
        }
6984
6985 1
        $offset = self::strripos($str, $separator);
6986 1
        if ($offset === false) {
6987 1
            return '';
6988
        }
6989
6990 1
        if ($encoding === 'UTF-8') {
6991 1
            return (string) \mb_substr(
6992 1
                $str,
6993 1
                $offset + (int) self::strlen($separator)
6994
            );
6995
        }
6996
6997
        return (string) self::substr(
6998
            $str,
6999
            $offset + (int) self::strlen($separator, $encoding),
7000
            null,
7001
            $encoding
7002
        );
7003
    }
7004
7005
    /**
7006
     * Gets the substring before the first occurrence of a separator.
7007
     *
7008
     * @param string $str       <p>The input string.</p>
7009
     * @param string $separator <p>The string separator.</p>
7010
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7011
     *
7012
     * @psalm-pure
7013
     *
7014
     * @return string
7015
     */
7016 1
    public static function str_isubstr_before_first_separator(
7017
        string $str,
7018
        string $separator,
7019
        string $encoding = 'UTF-8'
7020
    ): string {
7021 1
        if ($separator === '' || $str === '') {
7022 1
            return '';
7023
        }
7024
7025 1
        $offset = self::stripos($str, $separator);
7026 1
        if ($offset === false) {
7027 1
            return '';
7028
        }
7029
7030 1
        if ($encoding === 'UTF-8') {
7031 1
            return (string) \mb_substr($str, 0, $offset);
7032
        }
7033
7034
        return (string) self::substr($str, 0, $offset, $encoding);
7035
    }
7036
7037
    /**
7038
     * Gets the substring before the last occurrence of a separator.
7039
     *
7040
     * @param string $str       <p>The input string.</p>
7041
     * @param string $separator <p>The string separator.</p>
7042
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7043
     *
7044
     * @psalm-pure
7045
     *
7046
     * @return string
7047
     */
7048 1
    public static function str_isubstr_before_last_separator(
7049
        string $str,
7050
        string $separator,
7051
        string $encoding = 'UTF-8'
7052
    ): string {
7053 1
        if ($separator === '' || $str === '') {
7054 1
            return '';
7055
        }
7056
7057 1
        if ($encoding === 'UTF-8') {
7058 1
            $offset = \mb_strripos($str, $separator);
7059 1
            if ($offset === false) {
7060 1
                return '';
7061
            }
7062
7063 1
            return (string) \mb_substr($str, 0, $offset);
7064
        }
7065
7066
        $offset = self::strripos($str, $separator, 0, $encoding);
7067
        if ($offset === false) {
7068
            return '';
7069
        }
7070
7071
        return (string) self::substr($str, 0, $offset, $encoding);
7072
    }
7073
7074
    /**
7075
     * Gets the substring after (or before via "$before_needle") the first occurrence of the "$needle".
7076
     *
7077
     * @param string $str           <p>The input string.</p>
7078
     * @param string $needle        <p>The string to look for.</p>
7079
     * @param bool   $before_needle [optional] <p>Default: false</p>
7080
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
7081
     *
7082
     * @psalm-pure
7083
     *
7084
     * @return string
7085
     */
7086 2
    public static function str_isubstr_first(
7087
        string $str,
7088
        string $needle,
7089
        bool $before_needle = false,
7090
        string $encoding = 'UTF-8'
7091
    ): string {
7092
        if (
7093 2
            $needle === ''
7094
            ||
7095 2
            $str === ''
7096
        ) {
7097 2
            return '';
7098
        }
7099
7100 2
        $part = self::stristr(
7101 2
            $str,
7102 2
            $needle,
7103 2
            $before_needle,
7104 2
            $encoding
7105
        );
7106 2
        if ($part === false) {
7107 2
            return '';
7108
        }
7109
7110 2
        return $part;
7111
    }
7112
7113
    /**
7114
     * Gets the substring after (or before via "$before_needle") the last occurrence of the "$needle".
7115
     *
7116
     * @param string $str           <p>The input string.</p>
7117
     * @param string $needle        <p>The string to look for.</p>
7118
     * @param bool   $before_needle [optional] <p>Default: false</p>
7119
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
7120
     *
7121
     * @psalm-pure
7122
     *
7123
     * @return string
7124
     */
7125 1
    public static function str_isubstr_last(
7126
        string $str,
7127
        string $needle,
7128
        bool $before_needle = false,
7129
        string $encoding = 'UTF-8'
7130
    ): string {
7131
        if (
7132 1
            $needle === ''
7133
            ||
7134 1
            $str === ''
7135
        ) {
7136 1
            return '';
7137
        }
7138
7139 1
        $part = self::strrichr(
7140 1
            $str,
7141 1
            $needle,
7142 1
            $before_needle,
7143 1
            $encoding
7144
        );
7145 1
        if ($part === false) {
7146 1
            return '';
7147
        }
7148
7149 1
        return $part;
7150
    }
7151
7152
    /**
7153
     * Returns the last $n characters of the string.
7154
     *
7155
     * @param string $str      <p>The input string.</p>
7156
     * @param int    $n        <p>Number of characters to retrieve from the end.</p>
7157
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7158
     *
7159
     * @psalm-pure
7160
     *
7161
     * @return string
7162
     */
7163 12
    public static function str_last_char(
7164
        string $str,
7165
        int $n = 1,
7166
        string $encoding = 'UTF-8'
7167
    ): string {
7168 12
        if ($str === '' || $n <= 0) {
7169 4
            return '';
7170
        }
7171
7172 8
        if ($encoding === 'UTF-8') {
7173 4
            return (string) \mb_substr($str, -$n);
7174
        }
7175
7176 4
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
7177
7178 4
        return (string) self::substr($str, -$n, null, $encoding);
7179
    }
7180
7181
    /**
7182
     * Limit the number of characters in a string.
7183
     *
7184
     * @param string $str        <p>The input string.</p>
7185
     * @param int    $length     [optional] <p>Default: 100</p>
7186
     * @param string $str_add_on [optional] <p>Default: …</p>
7187
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
7188
     *
7189
     * @psalm-pure
7190
     *
7191
     * @return string
7192
     */
7193 2
    public static function str_limit(
7194
        string $str,
7195
        int $length = 100,
7196
        string $str_add_on = '…',
7197
        string $encoding = 'UTF-8'
7198
    ): string {
7199 2
        if ($str === '' || $length <= 0) {
7200 2
            return '';
7201
        }
7202
7203 2
        if ($encoding === 'UTF-8') {
7204 2
            if ((int) \mb_strlen($str) <= $length) {
7205 2
                return $str;
7206
            }
7207
7208
            /** @noinspection UnnecessaryCastingInspection */
7209 2
            return (string) \mb_substr($str, 0, $length - (int) self::strlen($str_add_on)) . $str_add_on;
7210
        }
7211
7212
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
7213
7214
        if ((int) self::strlen($str, $encoding) <= $length) {
7215
            return $str;
7216
        }
7217
7218
        return ((string) self::substr($str, 0, $length - (int) self::strlen($str_add_on), $encoding)) . $str_add_on;
7219
    }
7220
7221
    /**
7222
     * Limit the number of characters in a string, but also after the next word.
7223
     *
7224
     * EXAMPLE: <code>UTF8::str_limit_after_word('fòô bàř fòô', 8, ''); // 'fòô bàř'</code>
7225
     *
7226
     * @param string $str        <p>The input string.</p>
7227
     * @param int    $length     [optional] <p>Default: 100</p>
7228
     * @param string $str_add_on [optional] <p>Default: …</p>
7229
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
7230
     *
7231
     * @psalm-pure
7232
     *
7233
     * @return string
7234
     */
7235 6
    public static function str_limit_after_word(
7236
        string $str,
7237
        int $length = 100,
7238
        string $str_add_on = '…',
7239
        string $encoding = 'UTF-8'
7240
    ): string {
7241 6
        if ($str === '' || $length <= 0) {
7242 2
            return '';
7243
        }
7244
7245 6
        if ($encoding === 'UTF-8') {
7246
            /** @noinspection UnnecessaryCastingInspection */
7247 2
            if ((int) \mb_strlen($str) <= $length) {
7248 2
                return $str;
7249
            }
7250
7251 2
            if (\mb_substr($str, $length - 1, 1) === ' ') {
7252 2
                return ((string) \mb_substr($str, 0, $length - 1)) . $str_add_on;
7253
            }
7254
7255 2
            $str = \mb_substr($str, 0, $length);
7256
7257 2
            $array = \explode(' ', $str, -1);
7258 2
            $new_str = \implode(' ', $array);
7259
7260 2
            if ($new_str === '') {
7261 2
                return ((string) \mb_substr($str, 0, $length - 1)) . $str_add_on;
7262
            }
7263
        } else {
7264 4
            if ((int) self::strlen($str, $encoding) <= $length) {
7265
                return $str;
7266
            }
7267
7268 4
            if (self::substr($str, $length - 1, 1, $encoding) === ' ') {
7269 3
                return ((string) self::substr($str, 0, $length - 1, $encoding)) . $str_add_on;
7270
            }
7271
7272
            /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
7273 1
            $str = self::substr($str, 0, $length, $encoding);
7274
            /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
7275 1
            if ($str === false) {
7276
                return '' . $str_add_on;
7277
            }
7278
7279 1
            $array = \explode(' ', $str, -1);
7280 1
            $new_str = \implode(' ', $array);
7281
7282 1
            if ($new_str === '') {
7283
                return ((string) self::substr($str, 0, $length - 1, $encoding)) . $str_add_on;
7284
            }
7285
        }
7286
7287 3
        return $new_str . $str_add_on;
7288
    }
7289
7290
    /**
7291
     * Returns the longest common prefix between the $str1 and $str2.
7292
     *
7293
     * @param string $str1     <p>The input sting.</p>
7294
     * @param string $str2     <p>Second string for comparison.</p>
7295
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7296
     *
7297
     * @psalm-pure
7298
     *
7299
     * @return string
7300
     */
7301 10
    public static function str_longest_common_prefix(
7302
        string $str1,
7303
        string $str2,
7304
        string $encoding = 'UTF-8'
7305
    ): string {
7306
        // init
7307 10
        $longest_common_prefix = '';
7308
7309 10
        if ($encoding === 'UTF-8') {
7310 5
            $max_length = (int) \min(
7311 5
                \mb_strlen($str1),
7312 5
                \mb_strlen($str2)
7313
            );
7314
7315 5
            for ($i = 0; $i < $max_length; ++$i) {
7316 4
                $char = \mb_substr($str1, $i, 1);
7317
7318
                if (
7319 4
                    $char !== false
7320
                    &&
7321 4
                    $char === \mb_substr($str2, $i, 1)
7322
                ) {
7323 3
                    $longest_common_prefix .= $char;
7324
                } else {
7325 3
                    break;
7326
                }
7327
            }
7328
        } else {
7329 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7330
7331 5
            $max_length = (int) \min(
7332 5
                self::strlen($str1, $encoding),
7333 5
                self::strlen($str2, $encoding)
7334
            );
7335
7336 5
            for ($i = 0; $i < $max_length; ++$i) {
7337 4
                $char = self::substr($str1, $i, 1, $encoding);
7338
7339
                if (
7340 4
                    $char !== false
7341
                    &&
7342 4
                    $char === self::substr($str2, $i, 1, $encoding)
7343
                ) {
7344 3
                    $longest_common_prefix .= $char;
7345
                } else {
7346 3
                    break;
7347
                }
7348
            }
7349
        }
7350
7351 10
        return $longest_common_prefix;
7352
    }
7353
7354
    /**
7355
     * Returns the longest common substring between the $str1 and $str2.
7356
     * In the case of ties, it returns that which occurs first.
7357
     *
7358
     * @param string $str1
7359
     * @param string $str2     <p>Second string for comparison.</p>
7360
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7361
     *
7362
     * @psalm-pure
7363
     *
7364
     * @return string
7365
     *                <p>A string with its $str being the longest common substring.</p>
7366
     */
7367 11
    public static function str_longest_common_substring(
7368
        string $str1,
7369
        string $str2,
7370
        string $encoding = 'UTF-8'
7371
    ): string {
7372 11
        if ($str1 === '' || $str2 === '') {
7373 2
            return '';
7374
        }
7375
7376
        // Uses dynamic programming to solve
7377
        // http://en.wikipedia.org/wiki/Longest_common_substring_problem
7378
7379 9
        if ($encoding === 'UTF-8') {
7380 4
            $str_length = (int) \mb_strlen($str1);
7381 4
            $other_length = (int) \mb_strlen($str2);
7382
        } else {
7383 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7384
7385 5
            $str_length = (int) self::strlen($str1, $encoding);
7386 5
            $other_length = (int) self::strlen($str2, $encoding);
7387
        }
7388
7389
        // Return if either string is empty
7390 9
        if ($str_length === 0 || $other_length === 0) {
7391
            return '';
7392
        }
7393
7394 9
        $len = 0;
7395 9
        $end = 0;
7396 9
        $table = \array_fill(
7397 9
            0,
7398 9
            $str_length + 1,
7399 9
            \array_fill(0, $other_length + 1, 0)
7400
        );
7401
7402 9
        if ($encoding === 'UTF-8') {
7403 9
            for ($i = 1; $i <= $str_length; ++$i) {
7404 9
                for ($j = 1; $j <= $other_length; ++$j) {
7405 9
                    $str_char = \mb_substr($str1, $i - 1, 1);
7406 9
                    $other_char = \mb_substr($str2, $j - 1, 1);
7407
7408 9
                    if ($str_char === $other_char) {
7409 8
                        $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
7410 8
                        if ($table[$i][$j] > $len) {
7411 8
                            $len = $table[$i][$j];
7412 8
                            $end = $i;
7413
                        }
7414
                    } else {
7415 9
                        $table[$i][$j] = 0;
7416
                    }
7417
                }
7418
            }
7419
        } else {
7420
            for ($i = 1; $i <= $str_length; ++$i) {
7421
                for ($j = 1; $j <= $other_length; ++$j) {
7422
                    $str_char = self::substr($str1, $i - 1, 1, $encoding);
7423
                    $other_char = self::substr($str2, $j - 1, 1, $encoding);
7424
7425
                    if ($str_char === $other_char) {
7426
                        $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
7427
                        if ($table[$i][$j] > $len) {
7428
                            $len = $table[$i][$j];
7429
                            $end = $i;
7430
                        }
7431
                    } else {
7432
                        $table[$i][$j] = 0;
7433
                    }
7434
                }
7435
            }
7436
        }
7437
7438 9
        if ($encoding === 'UTF-8') {
7439 9
            return (string) \mb_substr($str1, $end - $len, $len);
7440
        }
7441
7442
        return (string) self::substr($str1, $end - $len, $len, $encoding);
7443
    }
7444
7445
    /**
7446
     * Returns the longest common suffix between the $str1 and $str2.
7447
     *
7448
     * @param string $str1
7449
     * @param string $str2     <p>Second string for comparison.</p>
7450
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7451
     *
7452
     * @psalm-pure
7453
     *
7454
     * @return string
7455
     */
7456 10
    public static function str_longest_common_suffix(
7457
        string $str1,
7458
        string $str2,
7459
        string $encoding = 'UTF-8'
7460
    ): string {
7461 10
        if ($str1 === '' || $str2 === '') {
7462 2
            return '';
7463
        }
7464
7465 8
        if ($encoding === 'UTF-8') {
7466 4
            $max_length = (int) \min(
7467 4
                \mb_strlen($str1, $encoding),
7468 4
                \mb_strlen($str2, $encoding)
7469
            );
7470
7471 4
            $longest_common_suffix = '';
7472 4
            for ($i = 1; $i <= $max_length; ++$i) {
7473 4
                $char = \mb_substr($str1, -$i, 1);
7474
7475
                if (
7476 4
                    $char !== false
7477
                    &&
7478 4
                    $char === \mb_substr($str2, -$i, 1)
7479
                ) {
7480 3
                    $longest_common_suffix = $char . $longest_common_suffix;
7481
                } else {
7482 3
                    break;
7483
                }
7484
            }
7485
        } else {
7486 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7487
7488 4
            $max_length = (int) \min(
7489 4
                self::strlen($str1, $encoding),
7490 4
                self::strlen($str2, $encoding)
7491
            );
7492
7493 4
            $longest_common_suffix = '';
7494 4
            for ($i = 1; $i <= $max_length; ++$i) {
7495 4
                $char = self::substr($str1, -$i, 1, $encoding);
7496
7497
                if (
7498 4
                    $char !== false
7499
                    &&
7500 4
                    $char === self::substr($str2, -$i, 1, $encoding)
7501
                ) {
7502 3
                    $longest_common_suffix = $char . $longest_common_suffix;
7503
                } else {
7504 3
                    break;
7505
                }
7506
            }
7507
        }
7508
7509 8
        return $longest_common_suffix;
7510
    }
7511
7512
    /**
7513
     * Returns true if $str matches the supplied pattern, false otherwise.
7514
     *
7515
     * @param string $str     <p>The input string.</p>
7516
     * @param string $pattern <p>Regex pattern to match against.</p>
7517
     *
7518
     * @psalm-pure
7519
     *
7520
     * @return bool
7521
     *              <p>Whether or not $str matches the pattern.</p>
7522
     */
7523 10
    public static function str_matches_pattern(string $str, string $pattern): bool
7524
    {
7525 10
        return (bool) \preg_match('/' . $pattern . '/u', $str);
7526
    }
7527
7528
    /**
7529
     * Returns whether or not a character exists at an index. Offsets may be
7530
     * negative to count from the last character in the string. Implements
7531
     * part of the ArrayAccess interface.
7532
     *
7533
     * @param string $str      <p>The input string.</p>
7534
     * @param int    $offset   <p>The index to check.</p>
7535
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7536
     *
7537
     * @psalm-pure
7538
     *
7539
     * @return bool
7540
     *              <p>Whether or not the index exists.</p>
7541
     */
7542 6
    public static function str_offset_exists(string $str, int $offset, string $encoding = 'UTF-8'): bool
7543
    {
7544
        // init
7545 6
        $length = (int) self::strlen($str, $encoding);
7546
7547 6
        if ($offset >= 0) {
7548 3
            return $length > $offset;
7549
        }
7550
7551 3
        return $length >= \abs($offset);
7552
    }
7553
7554
    /**
7555
     * Returns the character at the given index. Offsets may be negative to
7556
     * count from the last character in the string. Implements part of the
7557
     * ArrayAccess interface, and throws an OutOfBoundsException if the index
7558
     * does not exist.
7559
     *
7560
     * @param string $str      <p>The input string.</p>
7561
     * @param int    $index    <p>The <strong>index</strong> from which to retrieve the char.</p>
7562
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7563
     *
7564
     * @throws \OutOfBoundsException if the positive or negative offset does not exist
7565
     *
7566
     * @return string
7567
     *                <p>The character at the specified index.</p>
7568
     *
7569
     * @psalm-pure
7570
     */
7571 2
    public static function str_offset_get(string $str, int $index, string $encoding = 'UTF-8'): string
7572
    {
7573
        // init
7574 2
        $length = (int) self::strlen($str);
7575
7576
        if (
7577 2
            ($index >= 0 && $length <= $index)
7578
            ||
7579 2
            $length < \abs($index)
7580
        ) {
7581 1
            throw new \OutOfBoundsException('No character exists at the index');
7582
        }
7583
7584 1
        return self::char_at($str, $index, $encoding);
7585
    }
7586
7587
    /**
7588
     * Pad a UTF-8 string to a given length with another string.
7589
     *
7590
     * EXAMPLE: <code>UTF8::str_pad('中文空白', 10, '_', STR_PAD_BOTH); // '___中文空白___'</code>
7591
     *
7592
     * @param string     $str        <p>The input string.</p>
7593
     * @param int        $pad_length <p>The length of return string.</p>
7594
     * @param string     $pad_string [optional] <p>String to use for padding the input string.</p>
7595
     * @param int|string $pad_type   [optional] <p>
7596
     *                               Can be <strong>STR_PAD_RIGHT</strong> (default), [or string "right"]<br>
7597
     *                               <strong>STR_PAD_LEFT</strong> [or string "left"] or<br>
7598
     *                               <strong>STR_PAD_BOTH</strong> [or string "both"]
7599
     *                               </p>
7600
     * @param string     $encoding   [optional] <p>Default: 'UTF-8'</p>
7601
     *
7602
     * @psalm-pure
7603
     *
7604
     * @return string
7605
     *                <p>Returns the padded string.</p>
7606
     */
7607 41
    public static function str_pad(
7608
        string $str,
7609
        int $pad_length,
7610
        string $pad_string = ' ',
7611
        $pad_type = \STR_PAD_RIGHT,
7612
        string $encoding = 'UTF-8'
7613
    ): string {
7614 41
        if ($pad_length === 0 || $pad_string === '') {
7615 1
            return $str;
7616
        }
7617
7618 41
        if ($pad_type !== (int) $pad_type) {
7619 13
            if ($pad_type === 'left') {
7620 3
                $pad_type = \STR_PAD_LEFT;
7621 10
            } elseif ($pad_type === 'right') {
7622 6
                $pad_type = \STR_PAD_RIGHT;
7623 4
            } elseif ($pad_type === 'both') {
7624 3
                $pad_type = \STR_PAD_BOTH;
7625
            } else {
7626 1
                throw new \InvalidArgumentException(
7627 1
                    'Pad expects $pad_type to be "STR_PAD_*" or ' . "to be one of 'left', 'right' or 'both'"
7628
                );
7629
            }
7630
        }
7631
7632 40
        if ($encoding === 'UTF-8') {
7633 25
            $str_length = (int) \mb_strlen($str);
7634
7635 25
            if ($pad_length >= $str_length) {
7636
                switch ($pad_type) {
7637 25
                    case \STR_PAD_LEFT:
7638 8
                        $ps_length = (int) \mb_strlen($pad_string);
7639
7640 8
                        $diff = ($pad_length - $str_length);
7641
7642 8
                        $pre = (string) \mb_substr(
7643 8
                            \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
7644 8
                            0,
7645 8
                            $diff
7646
                        );
7647 8
                        $post = '';
7648
7649 8
                        break;
7650
7651 20
                    case \STR_PAD_BOTH:
7652 14
                        $diff = ($pad_length - $str_length);
7653
7654 14
                        $ps_length_left = (int) \floor($diff / 2);
7655
7656 14
                        $ps_length_right = (int) \ceil($diff / 2);
7657
7658 14
                        $pre = (string) \mb_substr(
7659 14
                            \str_repeat($pad_string, $ps_length_left),
7660 14
                            0,
7661 14
                            $ps_length_left
7662
                        );
7663 14
                        $post = (string) \mb_substr(
7664 14
                            \str_repeat($pad_string, $ps_length_right),
7665 14
                            0,
7666 14
                            $ps_length_right
7667
                        );
7668
7669 14
                        break;
7670
7671 9
                    case \STR_PAD_RIGHT:
7672
                    default:
7673 9
                        $ps_length = (int) \mb_strlen($pad_string);
7674
7675 9
                        $diff = ($pad_length - $str_length);
7676
7677 9
                        $post = (string) \mb_substr(
7678 9
                            \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
7679 9
                            0,
7680 9
                            $diff
7681
                        );
7682 9
                        $pre = '';
7683
                }
7684
7685 25
                return $pre . $str . $post;
7686
            }
7687
7688 3
            return $str;
7689
        }
7690
7691 15
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
7692
7693 15
        $str_length = (int) self::strlen($str, $encoding);
7694
7695 15
        if ($pad_length >= $str_length) {
7696
            switch ($pad_type) {
7697 14
                case \STR_PAD_LEFT:
7698 5
                    $ps_length = (int) self::strlen($pad_string, $encoding);
7699
7700 5
                    $diff = ($pad_length - $str_length);
7701
7702 5
                    $pre = (string) self::substr(
7703 5
                        \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
7704 5
                        0,
7705 5
                        $diff,
7706 5
                        $encoding
7707
                    );
7708 5
                    $post = '';
7709
7710 5
                    break;
7711
7712 9
                case \STR_PAD_BOTH:
7713 3
                    $diff = ($pad_length - $str_length);
7714
7715 3
                    $ps_length_left = (int) \floor($diff / 2);
7716
7717 3
                    $ps_length_right = (int) \ceil($diff / 2);
7718
7719 3
                    $pre = (string) self::substr(
7720 3
                        \str_repeat($pad_string, $ps_length_left),
7721 3
                        0,
7722 3
                        $ps_length_left,
7723 3
                        $encoding
7724
                    );
7725 3
                    $post = (string) self::substr(
7726 3
                        \str_repeat($pad_string, $ps_length_right),
7727 3
                        0,
7728 3
                        $ps_length_right,
7729 3
                        $encoding
7730
                    );
7731
7732 3
                    break;
7733
7734 6
                case \STR_PAD_RIGHT:
7735
                default:
7736 6
                    $ps_length = (int) self::strlen($pad_string, $encoding);
7737
7738 6
                    $diff = ($pad_length - $str_length);
7739
7740 6
                    $post = (string) self::substr(
7741 6
                        \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
7742 6
                        0,
7743 6
                        $diff,
7744 6
                        $encoding
7745
                    );
7746 6
                    $pre = '';
7747
            }
7748
7749 14
            return $pre . $str . $post;
7750
        }
7751
7752 1
        return $str;
7753
    }
7754
7755
    /**
7756
     * Returns a new string of a given length such that both sides of the
7757
     * string are padded. Alias for "UTF8::str_pad()" with a $pad_type of 'both'.
7758
     *
7759
     * @param string $str
7760
     * @param int    $length   <p>Desired string length after padding.</p>
7761
     * @param string $pad_str  [optional] <p>String used to pad, defaults to space. Default: ' '</p>
7762
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7763
     *
7764
     * @psalm-pure
7765
     *
7766
     * @return string
7767
     *                <p>The string with padding applied.</p>
7768
     */
7769 11
    public static function str_pad_both(
7770
        string $str,
7771
        int $length,
7772
        string $pad_str = ' ',
7773
        string $encoding = 'UTF-8'
7774
    ): string {
7775 11
        return self::str_pad(
7776 11
            $str,
7777 11
            $length,
7778 11
            $pad_str,
7779 11
            \STR_PAD_BOTH,
7780 11
            $encoding
7781
        );
7782
    }
7783
7784
    /**
7785
     * Returns a new string of a given length such that the beginning of the
7786
     * string is padded. Alias for "UTF8::str_pad()" with a $pad_type of 'left'.
7787
     *
7788
     * @param string $str
7789
     * @param int    $length   <p>Desired string length after padding.</p>
7790
     * @param string $pad_str  [optional] <p>String used to pad, defaults to space. Default: ' '</p>
7791
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7792
     *
7793
     * @psalm-pure
7794
     *
7795
     * @return string
7796
     *                <p>The string with left padding.</p>
7797
     */
7798 7
    public static function str_pad_left(
7799
        string $str,
7800
        int $length,
7801
        string $pad_str = ' ',
7802
        string $encoding = 'UTF-8'
7803
    ): string {
7804 7
        return self::str_pad(
7805 7
            $str,
7806 7
            $length,
7807 7
            $pad_str,
7808 7
            \STR_PAD_LEFT,
7809 7
            $encoding
7810
        );
7811
    }
7812
7813
    /**
7814
     * Returns a new string of a given length such that the end of the string
7815
     * is padded. Alias for "UTF8::str_pad()" with a $pad_type of 'right'.
7816
     *
7817
     * @param string $str
7818
     * @param int    $length   <p>Desired string length after padding.</p>
7819
     * @param string $pad_str  [optional] <p>String used to pad, defaults to space. Default: ' '</p>
7820
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7821
     *
7822
     * @psalm-pure
7823
     *
7824
     * @return string
7825
     *                <p>The string with right padding.</p>
7826
     */
7827 7
    public static function str_pad_right(
7828
        string $str,
7829
        int $length,
7830
        string $pad_str = ' ',
7831
        string $encoding = 'UTF-8'
7832
    ): string {
7833 7
        return self::str_pad(
7834 7
            $str,
7835 7
            $length,
7836 7
            $pad_str,
7837 7
            \STR_PAD_RIGHT,
7838 7
            $encoding
7839
        );
7840
    }
7841
7842
    /**
7843
     * Repeat a string.
7844
     *
7845
     * EXAMPLE: <code>UTF8::str_repeat("°~\xf0\x90\x28\xbc", 2); // '°~ð(¼°~ð(¼'</code>
7846
     *
7847
     * @param string $str        <p>
7848
     *                           The string to be repeated.
7849
     *                           </p>
7850
     * @param int    $multiplier <p>
7851
     *                           Number of time the input string should be
7852
     *                           repeated.
7853
     *                           </p>
7854
     *                           <p>
7855
     *                           multiplier has to be greater than or equal to 0.
7856
     *                           If the multiplier is set to 0, the function
7857
     *                           will return an empty string.
7858
     *                           </p>
7859
     *
7860
     * @psalm-pure
7861
     *
7862
     * @return string
7863
     *                <p>The repeated string.</p>
7864
     */
7865 9
    public static function str_repeat(string $str, int $multiplier): string
7866
    {
7867 9
        $str = self::filter($str);
7868
7869 9
        return \str_repeat($str, $multiplier);
7870
    }
7871
7872
    /**
7873
     * INFO: This is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe.
7874
     *
7875
     * Replace all occurrences of the search string with the replacement string
7876
     *
7877
     * @see http://php.net/manual/en/function.str-replace.php
7878
     *
7879
     * @param string|string[] $search  <p>
7880
     *                                 The value being searched for, otherwise known as the needle.
7881
     *                                 An array may be used to designate multiple needles.
7882
     *                                 </p>
7883
     * @param string|string[] $replace <p>
7884
     *                                 The replacement value that replaces found search
7885
     *                                 values. An array may be used to designate multiple replacements.
7886
     *                                 </p>
7887
     * @param string|string[] $subject <p>
7888
     *                                 The string or array of strings being searched and replaced on,
7889
     *                                 otherwise known as the haystack.
7890
     *                                 </p>
7891
     *                                 <p>
7892
     *                                 If subject is an array, then the search and
7893
     *                                 replace is performed with every entry of
7894
     *                                 subject, and the return value is an array as
7895
     *                                 well.
7896
     *                                 </p>
7897
     * @param int|null        $count   [optional] <p>
7898
     *                                 If passed, this will hold the number of matched and replaced needles.
7899
     *                                 </p>
7900
     *
7901
     * @psalm-pure
7902
     *
7903
     * @return string|string[]
7904
     *                         <p>This function returns a string or an array with the replaced values.</p>
7905
     *
7906
     * @template TStrReplaceSubject
7907
     * @phpstan-param TStrReplaceSubject $subject
7908
     * @phpstan-return TStrReplaceSubject
7909
     *
7910
     * @deprecated please use \str_replace() instead
7911
     */
7912 12
    public static function str_replace(
7913
        $search,
7914
        $replace,
7915
        $subject,
7916
        int &$count = null
7917
    ) {
7918
        /**
7919
         * @psalm-suppress PossiblyNullArgument
7920
         * @phpstan-var TStrReplaceSubject $return;
7921
         */
7922 12
        $return = \str_replace(
7923 12
            $search,
7924 12
            $replace,
7925 12
            $subject,
7926 12
            $count
7927
        );
7928
7929 12
        return $return;
7930
    }
7931
7932
    /**
7933
     * Replaces $search from the beginning of string with $replacement.
7934
     *
7935
     * @param string $str         <p>The input string.</p>
7936
     * @param string $search      <p>The string to search for.</p>
7937
     * @param string $replacement <p>The replacement.</p>
7938
     *
7939
     * @psalm-pure
7940
     *
7941
     * @return string
7942
     *                <p>A string after the replacements.</p>
7943
     */
7944 17
    public static function str_replace_beginning(
7945
        string $str,
7946
        string $search,
7947
        string $replacement
7948
    ): string {
7949 17
        if ($str === '') {
7950 4
            if ($replacement === '') {
7951 2
                return '';
7952
            }
7953
7954 2
            if ($search === '') {
7955 2
                return $replacement;
7956
            }
7957
        }
7958
7959 13
        if ($search === '') {
7960 2
            return $str . $replacement;
7961
        }
7962
7963 11
        $searchLength = \strlen($search);
7964 11
        if (\strncmp($str, $search, $searchLength) === 0) {
7965 9
            return $replacement . \substr($str, $searchLength);
7966
        }
7967
7968 2
        return $str;
7969
    }
7970
7971
    /**
7972
     * Replaces $search from the ending of string with $replacement.
7973
     *
7974
     * @param string $str         <p>The input string.</p>
7975
     * @param string $search      <p>The string to search for.</p>
7976
     * @param string $replacement <p>The replacement.</p>
7977
     *
7978
     * @psalm-pure
7979
     *
7980
     * @return string
7981
     *                <p>A string after the replacements.</p>
7982
     */
7983 17
    public static function str_replace_ending(
7984
        string $str,
7985
        string $search,
7986
        string $replacement
7987
    ): string {
7988 17
        if ($str === '') {
7989 4
            if ($replacement === '') {
7990 2
                return '';
7991
            }
7992
7993 2
            if ($search === '') {
7994 2
                return $replacement;
7995
            }
7996
        }
7997
7998 13
        if ($search === '') {
7999 2
            return $str . $replacement;
8000
        }
8001
8002 11
        if (\strpos($str, $search, \strlen($str) - \strlen($search)) !== false) {
8003 8
            $str = \substr($str, 0, -\strlen($search)) . $replacement;
8004
        }
8005
8006 11
        return $str;
8007
    }
8008
8009
    /**
8010
     * Replace the first "$search"-term with the "$replace"-term.
8011
     *
8012
     * @param string $search
8013
     * @param string $replace
8014
     * @param string $subject
8015
     *
8016
     * @psalm-pure
8017
     *
8018
     * @return string
8019
     *
8020
     * @psalm-suppress InvalidReturnType
8021
     */
8022 2
    public static function str_replace_first(
8023
        string $search,
8024
        string $replace,
8025
        string $subject
8026
    ): string {
8027 2
        $pos = self::strpos($subject, $search);
8028
8029 2
        if ($pos !== false) {
8030
            /**
8031
             * @psalm-suppress InvalidReturnStatement
8032
             */
8033 2
            return self::substr_replace(
8034 2
                $subject,
8035 2
                $replace,
8036 2
                $pos,
8037 2
                (int) self::strlen($search)
8038
            );
8039
        }
8040
8041 2
        return $subject;
8042
    }
8043
8044
    /**
8045
     * Replace the last "$search"-term with the "$replace"-term.
8046
     *
8047
     * @param string $search
8048
     * @param string $replace
8049
     * @param string $subject
8050
     *
8051
     * @psalm-pure
8052
     *
8053
     * @return string
8054
     *
8055
     * @psalm-suppress InvalidReturnType
8056
     */
8057 2
    public static function str_replace_last(
8058
        string $search,
8059
        string $replace,
8060
        string $subject
8061
    ): string {
8062 2
        $pos = self::strrpos($subject, $search);
8063 2
        if ($pos !== false) {
8064
            /**
8065
             * @psalm-suppress InvalidReturnStatement
8066
             */
8067 2
            return self::substr_replace(
8068 2
                $subject,
8069 2
                $replace,
8070 2
                $pos,
8071 2
                (int) self::strlen($search)
8072
            );
8073
        }
8074
8075 2
        return $subject;
8076
    }
8077
8078
    /**
8079
     * Shuffles all the characters in the string.
8080
     *
8081
     * INFO: uses random algorithm which is weak for cryptography purposes
8082
     *
8083
     * EXAMPLE: <code>UTF8::str_shuffle('fòô bàř fòô'); // 'àòôřb ffòô '</code>
8084
     *
8085
     * @param string $str      <p>The input string</p>
8086
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8087
     *
8088
     * @return string
8089
     *                <p>The shuffled string.</p>
8090
     */
8091 5
    public static function str_shuffle(string $str, string $encoding = 'UTF-8'): string
8092
    {
8093 5
        if ($encoding === 'UTF-8') {
8094 5
            $indexes = \range(0, (int) \mb_strlen($str) - 1);
8095
            /** @noinspection NonSecureShuffleUsageInspection */
8096 5
            \shuffle($indexes);
8097
8098
            // init
8099 5
            $shuffled_str = '';
8100
8101 5
            foreach ($indexes as &$i) {
8102 5
                $tmp_sub_str = \mb_substr($str, $i, 1);
8103 5
                if ($tmp_sub_str !== false) {
8104 5
                    $shuffled_str .= $tmp_sub_str;
8105
                }
8106
            }
8107
        } else {
8108
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8109
8110
            $indexes = \range(0, (int) self::strlen($str, $encoding) - 1);
8111
            /** @noinspection NonSecureShuffleUsageInspection */
8112
            \shuffle($indexes);
8113
8114
            // init
8115
            $shuffled_str = '';
8116
8117
            foreach ($indexes as &$i) {
8118
                $tmp_sub_str = self::substr($str, $i, 1, $encoding);
8119
                if ($tmp_sub_str !== false) {
8120
                    $shuffled_str .= $tmp_sub_str;
8121
                }
8122
            }
8123
        }
8124
8125 5
        return $shuffled_str;
8126
    }
8127
8128
    /**
8129
     * Returns the substring beginning at $start, and up to, but not including
8130
     * the index specified by $end. If $end is omitted, the function extracts
8131
     * the remaining string. If $end is negative, it is computed from the end
8132
     * of the string.
8133
     *
8134
     * @param string   $str
8135
     * @param int      $start    <p>Initial index from which to begin extraction.</p>
8136
     * @param int|null $end      [optional] <p>Index at which to end extraction. Default: null</p>
8137
     * @param string   $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8138
     *
8139
     * @psalm-pure
8140
     *
8141
     * @return false|string
8142
     *                      <p>The extracted substring.</p><p>If <i>str</i> is shorter than <i>start</i>
8143
     *                      characters long, <b>FALSE</b> will be returned.
8144
     */
8145 18
    public static function str_slice(
8146
        string $str,
8147
        int $start,
8148
        int $end = null,
8149
        string $encoding = 'UTF-8'
8150
    ) {
8151 18
        if ($encoding === 'UTF-8') {
8152 7
            if ($end === null) {
8153 1
                $length = (int) \mb_strlen($str);
8154 6
            } elseif ($end >= 0 && $end <= $start) {
8155 2
                return '';
8156 4
            } elseif ($end < 0) {
8157 1
                $length = (int) \mb_strlen($str) + $end - $start;
8158
            } else {
8159 3
                $length = $end - $start;
8160
            }
8161
8162 5
            return \mb_substr($str, $start, $length);
8163
        }
8164
8165 11
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
8166
8167 11
        if ($end === null) {
8168 5
            $length = (int) self::strlen($str, $encoding);
8169 6
        } elseif ($end >= 0 && $end <= $start) {
8170 2
            return '';
8171 4
        } elseif ($end < 0) {
8172 1
            $length = (int) self::strlen($str, $encoding) + $end - $start;
8173
        } else {
8174 3
            $length = $end - $start;
8175
        }
8176
8177 9
        return self::substr($str, $start, $length, $encoding);
8178
    }
8179
8180
    /**
8181
     * Convert a string to e.g.: "snake_case"
8182
     *
8183
     * @param string $str
8184
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8185
     *
8186
     * @psalm-pure
8187
     *
8188
     * @return string
8189
     *                <p>A string in snake_case.</p>
8190
     */
8191 22
    public static function str_snakeize(string $str, string $encoding = 'UTF-8'): string
8192
    {
8193 22
        if ($str === '') {
8194
            return '';
8195
        }
8196
8197 22
        $str = \str_replace(
8198 22
            '-',
8199 22
            '_',
8200 22
            self::normalize_whitespace($str)
8201
        );
8202
8203 22
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8204 19
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8205
        }
8206
8207 22
        $str = (string) \preg_replace_callback(
8208 22
            '/([\\p{N}|\\p{Lu}])/u',
8209
            /**
8210
             * @param string[] $matches
8211
             *
8212
             * @psalm-pure
8213
             *
8214
             * @return string
8215
             */
8216
            static function (array $matches) use ($encoding): string {
8217 9
                $match = $matches[1];
8218 9
                $match_int = (int) $match;
8219
8220 9
                if ((string) $match_int === $match) {
8221 4
                    return '_' . $match . '_';
8222
                }
8223
8224 5
                if ($encoding === 'UTF-8') {
8225 5
                    return '_' . \mb_strtolower($match);
8226
                }
8227
8228
                return '_' . self::strtolower($match, $encoding);
8229 22
            },
8230 22
            $str
8231
        );
8232
8233 22
        $str = (string) \preg_replace(
8234
            [
8235 22
                '/\\s+/u',           // convert spaces to "_"
8236
                '/^\\s+|\\s+$/u', // trim leading & trailing spaces
8237
                '/_+/',                 // remove double "_"
8238
            ],
8239
            [
8240 22
                '_',
8241
                '',
8242
                '_',
8243
            ],
8244 22
            $str
8245
        );
8246
8247 22
        return \trim(\trim($str, '_')); // trim leading & trailing "_" + whitespace
8248
    }
8249
8250
    /**
8251
     * Sort all characters according to code points.
8252
     *
8253
     * EXAMPLE: <code>UTF8::str_sort('  -ABC-中文空白-  '); // '    ---ABC中文白空'</code>
8254
     *
8255
     * @param string $str    <p>A UTF-8 string.</p>
8256
     * @param bool   $unique <p>Sort unique. If <strong>true</strong>, repeated characters are ignored.</p>
8257
     * @param bool   $desc   <p>If <strong>true</strong>, will sort characters in reverse code point order.</p>
8258
     *
8259
     * @psalm-pure
8260
     *
8261
     * @return string
8262
     *                <p>A string of sorted characters.</p>
8263
     */
8264 2
    public static function str_sort(string $str, bool $unique = false, bool $desc = false): string
8265
    {
8266 2
        $array = self::codepoints($str);
8267
8268 2
        if ($unique) {
8269 2
            $array = \array_flip(\array_flip($array));
8270
        }
8271
8272 2
        if ($desc) {
8273 2
            \arsort($array);
8274
        } else {
8275 2
            \asort($array);
8276
        }
8277
8278 2
        return self::string($array);
8279
    }
8280
8281
    /**
8282
     * Convert a string to an array of Unicode characters.
8283
     *
8284
     * EXAMPLE: <code>
8285
     * UTF8::str_split_array(['中文空白', 'test'], 2); // [['中文', '空白'], ['te', 'st']]
8286
     * </code>
8287
     *
8288
     * @param int[]|string[] $input                   <p>The string[] or int[] to split into array.</p>
8289
     * @param int            $length                  [optional] <p>Max character length of each array
8290
     *                                                lement.</p>
8291
     * @param bool           $clean_utf8              [optional] <p>Remove non UTF-8 chars from the
8292
     *                                                string.</p>
8293
     * @param bool           $try_to_use_mb_functions [optional] <p>Set to false, if you don't want to use
8294
     *                                                "mb_substr"</p>
8295
     *
8296
     * @psalm-pure
8297
     *
8298
     * @return string[][]
8299
     *                    <p>An array containing chunks of the input.</p>
8300
     */
8301 1
    public static function str_split_array(
8302
        array $input,
8303
        int $length = 1,
8304
        bool $clean_utf8 = false,
8305
        bool $try_to_use_mb_functions = true
8306
    ): array {
8307 1
        foreach ($input as $k => &$v) {
8308 1
            $v = self::str_split(
8309 1
                $v,
8310 1
                $length,
8311 1
                $clean_utf8,
8312 1
                $try_to_use_mb_functions
8313
            );
8314
        }
8315
8316
        /** @var string[][] $input */
8317 1
        return $input;
8318
    }
8319
8320
    /**
8321
     * Convert a string to an array of unicode characters.
8322
     *
8323
     * EXAMPLE: <code>UTF8::str_split('中文空白'); // array('中', '文', '空', '白')</code>
8324
     *
8325
     * @param int|string $input                   <p>The string or int to split into array.</p>
8326
     * @param int        $length                  [optional] <p>Max character length of each array
8327
     *                                            element.</p>
8328
     * @param bool       $clean_utf8              [optional] <p>Remove non UTF-8 chars from the
8329
     *                                            string.</p>
8330
     * @param bool       $try_to_use_mb_functions [optional] <p>Set to false, if you don't want to use
8331
     *                                            "mb_substr"</p>
8332
     *
8333
     * @psalm-pure
8334
     *
8335
     * @return string[]
8336
     *                  <p>An array containing chunks of chars from the input.</p>
8337
     *
8338
     * @noinspection SuspiciousBinaryOperationInspection
8339
     * @noinspection OffsetOperationsInspection
8340
     */
8341 90
    public static function str_split(
8342
        $input,
8343
        int $length = 1,
8344
        bool $clean_utf8 = false,
8345
        bool $try_to_use_mb_functions = true
8346
    ): array {
8347 90
        if ($length <= 0) {
8348 3
            return [];
8349
        }
8350
8351
        // this is only an old fallback
8352
        /** @noinspection PhpSillyAssignmentInspection - hack for phpstan */
8353
        /** @var int|int[]|string|string[] $input */
8354 89
        $input = $input;
8355 89
        if (\is_array($input)) {
8356
            /**
8357
             * @psalm-suppress InvalidReturnStatement
8358
             */
8359
            return self::str_split_array(
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::str_split_a...ry_to_use_mb_functions) returns the type array<mixed,string[]> which is incompatible with the documented return type string[].
Loading history...
8360
                $input,
8361
                $length,
8362
                $clean_utf8,
8363
                $try_to_use_mb_functions
8364
            );
8365
        }
8366
8367
        // init
8368 89
        $input = (string) $input;
8369
8370 89
        if ($input === '') {
8371 14
            return [];
8372
        }
8373
8374 86
        if ($clean_utf8) {
8375 19
            $input = self::clean($input);
8376
        }
8377
8378
        if (
8379 86
            $try_to_use_mb_functions
8380
            &&
8381 86
            self::$SUPPORT['mbstring'] === true
8382
        ) {
8383 82
            if (\function_exists('mb_str_split')) {
8384
                /**
8385
                 * @psalm-suppress ImpureFunctionCall - why?
8386
                 */
8387 82
                $return = \mb_str_split($input, $length);
8388 82
                if ($return !== false) {
8389 82
                    return $return;
8390
                }
8391
            }
8392
8393
            $i_max = \mb_strlen($input);
8394
            if ($i_max <= 127) {
8395
                $ret = [];
8396
                for ($i = 0; $i < $i_max; ++$i) {
8397
                    $ret[] = \mb_substr($input, $i, 1);
8398
                }
8399
            } else {
8400
                $return_array = [];
8401
                \preg_match_all('/./us', $input, $return_array);
8402
                $ret = $return_array[0] ?? [];
8403
            }
8404 23
        } elseif (self::$SUPPORT['pcre_utf8'] === true) {
8405 17
            $return_array = [];
8406 17
            \preg_match_all('/./us', $input, $return_array);
8407 17
            $ret = $return_array[0] ?? [];
8408
        } else {
8409
8410
            // fallback
8411
8412 8
            $ret = [];
8413 8
            $len = \strlen($input);
8414
8415
            /** @noinspection ForeachInvariantsInspection */
8416 8
            for ($i = 0; $i < $len; ++$i) {
8417 8
                if (($input[$i] & "\x80") === "\x00") {
8418 8
                    $ret[] = $input[$i];
8419
                } elseif (
8420 8
                    isset($input[$i + 1])
8421
                    &&
8422 8
                    ($input[$i] & "\xE0") === "\xC0"
8423
                ) {
8424 4
                    if (($input[$i + 1] & "\xC0") === "\x80") {
8425 4
                        $ret[] = $input[$i] . $input[$i + 1];
8426
8427 4
                        ++$i;
8428
                    }
8429
                } elseif (
8430 6
                    isset($input[$i + 2])
8431
                    &&
8432 6
                    ($input[$i] & "\xF0") === "\xE0"
8433
                ) {
8434
                    if (
8435 6
                        ($input[$i + 1] & "\xC0") === "\x80"
8436
                        &&
8437 6
                        ($input[$i + 2] & "\xC0") === "\x80"
8438
                    ) {
8439 6
                        $ret[] = $input[$i] . $input[$i + 1] . $input[$i + 2];
8440
8441 6
                        $i += 2;
8442
                    }
8443
                } elseif (
8444
                    isset($input[$i + 3])
8445
                    &&
8446
                    ($input[$i] & "\xF8") === "\xF0"
8447
                ) {
8448
                    if (
8449
                        ($input[$i + 1] & "\xC0") === "\x80"
8450
                        &&
8451
                        ($input[$i + 2] & "\xC0") === "\x80"
8452
                        &&
8453
                        ($input[$i + 3] & "\xC0") === "\x80"
8454
                    ) {
8455
                        $ret[] = $input[$i] . $input[$i + 1] . $input[$i + 2] . $input[$i + 3];
8456
8457
                        $i += 3;
8458
                    }
8459
                }
8460
            }
8461
        }
8462
8463 23
        if ($length > 1) {
8464 2
            return \array_map(
8465
                static function (array $item): string {
8466 2
                    return \implode('', $item);
8467 2
                },
8468 2
                \array_chunk($ret, $length)
8469
            );
8470
        }
8471
8472 23
        if (isset($ret[0]) && $ret[0] === '') {
8473
            return [];
8474
        }
8475
8476 23
        return $ret;
8477
    }
8478
8479
    /**
8480
     * Splits the string with the provided regular expression, returning an
8481
     * array of strings. An optional integer $limit will truncate the
8482
     * results.
8483
     *
8484
     * @param string $str
8485
     * @param string $pattern <p>The regex with which to split the string.</p>
8486
     * @param int    $limit   [optional] <p>Maximum number of results to return. Default: -1 === no limit</p>
8487
     *
8488
     * @psalm-pure
8489
     *
8490
     * @return string[]
8491
     *                  <p>An array of strings.</p>
8492
     */
8493 16
    public static function str_split_pattern(string $str, string $pattern, int $limit = -1): array
8494
    {
8495 16
        if ($limit === 0) {
8496 2
            return [];
8497
        }
8498
8499 14
        if ($pattern === '') {
8500 1
            return [$str];
8501
        }
8502
8503 13
        if (self::$SUPPORT['mbstring'] === true) {
8504 13
            if ($limit >= 0) {
8505
                /** @noinspection PhpComposerExtensionStubsInspection */
8506 8
                $result_tmp = \mb_split($pattern, $str);
8507
8508 8
                $result = [];
8509 8
                foreach ($result_tmp as $item_tmp) {
8510 8
                    if ($limit === 0) {
8511 4
                        break;
8512
                    }
8513 8
                    --$limit;
8514
8515 8
                    $result[] = $item_tmp;
8516
                }
8517
8518 8
                return $result;
8519
            }
8520
8521
            /** @noinspection PhpComposerExtensionStubsInspection */
8522 5
            return \mb_split($pattern, $str);
8523
        }
8524
8525
        if ($limit > 0) {
8526
            ++$limit;
8527
        } else {
8528
            $limit = -1;
8529
        }
8530
8531
        $array = \preg_split('/' . \preg_quote($pattern, '/') . '/u', $str, $limit);
8532
8533
        if ($array === false) {
8534
            return [];
8535
        }
8536
8537
        if ($limit > 0 && \count($array) === $limit) {
8538
            \array_pop($array);
8539
        }
8540
8541
        return $array;
8542
    }
8543
8544
    /**
8545
     * Check if the string starts with the given substring.
8546
     *
8547
     * EXAMPLE: <code>
8548
     * UTF8::str_starts_with('ΚόσμεMiddleEnd', 'Κόσμε'); // true
8549
     * UTF8::str_starts_with('ΚόσμεMiddleEnd', 'κόσμε'); // false
8550
     * </code>
8551
     *
8552
     * @param string $haystack <p>The string to search in.</p>
8553
     * @param string $needle   <p>The substring to search for.</p>
8554
     *
8555
     * @psalm-pure
8556
     *
8557
     * @return bool
8558
     */
8559 19
    public static function str_starts_with(string $haystack, string $needle): bool
8560
    {
8561 19
        if ($needle === '') {
8562 2
            return true;
8563
        }
8564
8565 19
        if ($haystack === '') {
8566
            return false;
8567
        }
8568
8569 19
        if (\PHP_VERSION_ID >= 80000) {
8570
            /** @phpstan-ignore-next-line - only for PHP8 */
8571
            return \str_starts_with($haystack, $needle);
8572
        }
8573
8574 19
        return \strncmp($haystack, $needle, \strlen($needle)) === 0;
8575
    }
8576
8577
    /**
8578
     * Returns true if the string begins with any of $substrings, false otherwise.
8579
     *
8580
     * - case-sensitive
8581
     *
8582
     * @param string $str        <p>The input string.</p>
8583
     * @param array  $substrings <p>Substrings to look for.</p>
8584
     *
8585
     * @psalm-pure
8586
     *
8587
     * @return bool
8588
     *              <p>Whether or not $str starts with $substring.</p>
8589
     */
8590 8
    public static function str_starts_with_any(string $str, array $substrings): bool
8591
    {
8592 8
        if ($str === '') {
8593
            return false;
8594
        }
8595
8596 8
        if ($substrings === []) {
8597
            return false;
8598
        }
8599
8600 8
        foreach ($substrings as &$substring) {
8601 8
            if (self::str_starts_with($str, $substring)) {
8602 8
                return true;
8603
            }
8604
        }
8605
8606 6
        return false;
8607
    }
8608
8609
    /**
8610
     * Gets the substring after the first occurrence of a separator.
8611
     *
8612
     * @param string $str       <p>The input string.</p>
8613
     * @param string $separator <p>The string separator.</p>
8614
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
8615
     *
8616
     * @psalm-pure
8617
     *
8618
     * @return string
8619
     */
8620 1
    public static function str_substr_after_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
8621
    {
8622 1
        if ($separator === '' || $str === '') {
8623 1
            return '';
8624
        }
8625
8626 1
        if ($encoding === 'UTF-8') {
8627 1
            $offset = \mb_strpos($str, $separator);
8628 1
            if ($offset === false) {
8629 1
                return '';
8630
            }
8631
8632 1
            return (string) \mb_substr(
8633 1
                $str,
8634 1
                $offset + (int) \mb_strlen($separator)
8635
            );
8636
        }
8637
8638
        $offset = self::strpos($str, $separator, 0, $encoding);
8639
        if ($offset === false) {
8640
            return '';
8641
        }
8642
8643
        return (string) \mb_substr(
8644
            $str,
8645
            $offset + (int) self::strlen($separator, $encoding),
8646
            null,
8647
            $encoding
8648
        );
8649
    }
8650
8651
    /**
8652
     * Gets the substring after the last occurrence of a separator.
8653
     *
8654
     * @param string $str       <p>The input string.</p>
8655
     * @param string $separator <p>The string separator.</p>
8656
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
8657
     *
8658
     * @psalm-pure
8659
     *
8660
     * @return string
8661
     */
8662 1
    public static function str_substr_after_last_separator(
8663
        string $str,
8664
        string $separator,
8665
        string $encoding = 'UTF-8'
8666
    ): string {
8667 1
        if ($separator === '' || $str === '') {
8668 1
            return '';
8669
        }
8670
8671 1
        if ($encoding === 'UTF-8') {
8672 1
            $offset = \mb_strrpos($str, $separator);
8673 1
            if ($offset === false) {
8674 1
                return '';
8675
            }
8676
8677 1
            return (string) \mb_substr(
8678 1
                $str,
8679 1
                $offset + (int) \mb_strlen($separator)
8680
            );
8681
        }
8682
8683
        $offset = self::strrpos($str, $separator, 0, $encoding);
8684
        if ($offset === false) {
8685
            return '';
8686
        }
8687
8688
        return (string) self::substr(
8689
            $str,
8690
            $offset + (int) self::strlen($separator, $encoding),
8691
            null,
8692
            $encoding
8693
        );
8694
    }
8695
8696
    /**
8697
     * Gets the substring before the first occurrence of a separator.
8698
     *
8699
     * @param string $str       <p>The input string.</p>
8700
     * @param string $separator <p>The string separator.</p>
8701
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
8702
     *
8703
     * @psalm-pure
8704
     *
8705
     * @return string
8706
     */
8707 1
    public static function str_substr_before_first_separator(
8708
        string $str,
8709
        string $separator,
8710
        string $encoding = 'UTF-8'
8711
    ): string {
8712 1
        if ($separator === '' || $str === '') {
8713 1
            return '';
8714
        }
8715
8716 1
        if ($encoding === 'UTF-8') {
8717 1
            $offset = \mb_strpos($str, $separator);
8718 1
            if ($offset === false) {
8719 1
                return '';
8720
            }
8721
8722 1
            return (string) \mb_substr(
8723 1
                $str,
8724 1
                0,
8725 1
                $offset
8726
            );
8727
        }
8728
8729
        $offset = self::strpos($str, $separator, 0, $encoding);
8730
        if ($offset === false) {
8731
            return '';
8732
        }
8733
8734
        return (string) self::substr(
8735
            $str,
8736
            0,
8737
            $offset,
8738
            $encoding
8739
        );
8740
    }
8741
8742
    /**
8743
     * Gets the substring before the last occurrence of a separator.
8744
     *
8745
     * @param string $str       <p>The input string.</p>
8746
     * @param string $separator <p>The string separator.</p>
8747
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
8748
     *
8749
     * @psalm-pure
8750
     *
8751
     * @return string
8752
     */
8753 1
    public static function str_substr_before_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
8754
    {
8755 1
        if ($separator === '' || $str === '') {
8756 1
            return '';
8757
        }
8758
8759 1
        if ($encoding === 'UTF-8') {
8760 1
            $offset = \mb_strrpos($str, $separator);
8761 1
            if ($offset === false) {
8762 1
                return '';
8763
            }
8764
8765 1
            return (string) \mb_substr(
8766 1
                $str,
8767 1
                0,
8768 1
                $offset
8769
            );
8770
        }
8771
8772
        $offset = self::strrpos($str, $separator, 0, $encoding);
8773
        if ($offset === false) {
8774
            return '';
8775
        }
8776
8777
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
8778
8779
        return (string) self::substr(
8780
            $str,
8781
            0,
8782
            $offset,
8783
            $encoding
8784
        );
8785
    }
8786
8787
    /**
8788
     * Gets the substring after (or before via "$before_needle") the first occurrence of the "$needle".
8789
     *
8790
     * @param string $str           <p>The input string.</p>
8791
     * @param string $needle        <p>The string to look for.</p>
8792
     * @param bool   $before_needle [optional] <p>Default: false</p>
8793
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
8794
     *
8795
     * @psalm-pure
8796
     *
8797
     * @return string
8798
     */
8799 2
    public static function str_substr_first(
8800
        string $str,
8801
        string $needle,
8802
        bool $before_needle = false,
8803
        string $encoding = 'UTF-8'
8804
    ): string {
8805 2
        if ($str === '' || $needle === '') {
8806 2
            return '';
8807
        }
8808
8809 2
        if ($encoding === 'UTF-8') {
8810 2
            if ($before_needle) {
8811 1
                $part = \mb_strstr(
8812 1
                    $str,
8813 1
                    $needle,
8814 1
                    $before_needle
8815
                );
8816
            } else {
8817 1
                $part = \mb_strstr(
8818 1
                    $str,
8819 2
                    $needle
8820
                );
8821
            }
8822
        } else {
8823
            $part = self::strstr(
8824
                $str,
8825
                $needle,
8826
                $before_needle,
8827
                $encoding
8828
            );
8829
        }
8830
8831 2
        return $part === false ? '' : $part;
8832
    }
8833
8834
    /**
8835
     * Gets the substring after (or before via "$before_needle") the last occurrence of the "$needle".
8836
     *
8837
     * @param string $str           <p>The input string.</p>
8838
     * @param string $needle        <p>The string to look for.</p>
8839
     * @param bool   $before_needle [optional] <p>Default: false</p>
8840
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
8841
     *
8842
     * @psalm-pure
8843
     *
8844
     * @return string
8845
     */
8846 2
    public static function str_substr_last(
8847
        string $str,
8848
        string $needle,
8849
        bool $before_needle = false,
8850
        string $encoding = 'UTF-8'
8851
    ): string {
8852 2
        if ($str === '' || $needle === '') {
8853 2
            return '';
8854
        }
8855
8856 2
        if ($encoding === 'UTF-8') {
8857 2
            if ($before_needle) {
8858 1
                $part = \mb_strrchr(
8859 1
                    $str,
8860 1
                    $needle,
8861 1
                    $before_needle
8862
                );
8863
            } else {
8864 1
                $part = \mb_strrchr(
8865 1
                    $str,
8866 2
                    $needle
8867
                );
8868
            }
8869
        } else {
8870
            $part = self::strrchr(
8871
                $str,
8872
                $needle,
8873
                $before_needle,
8874
                $encoding
8875
            );
8876
        }
8877
8878 2
        return $part === false ? '' : $part;
8879
    }
8880
8881
    /**
8882
     * Surrounds $str with the given substring.
8883
     *
8884
     * @param string $str
8885
     * @param string $substring <p>The substring to add to both sides.</p>
8886
     *
8887
     * @psalm-pure
8888
     *
8889
     * @return string
8890
     *                <p>A string with the substring both prepended and appended.</p>
8891
     */
8892 5
    public static function str_surround(string $str, string $substring): string
8893
    {
8894 5
        return $substring . $str . $substring;
8895
    }
8896
8897
    /**
8898
     * Returns a trimmed string with the first letter of each word capitalized.
8899
     * Also accepts an array, $ignore, allowing you to list words not to be
8900
     * capitalized.
8901
     *
8902
     * @param string              $str
8903
     * @param array|string[]|null $ignore                        [optional] <p>An array of words not to capitalize or
8904
     *                                                           null. Default: null</p>
8905
     * @param string              $encoding                      [optional] <p>Default: 'UTF-8'</p>
8906
     * @param bool                $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the
8907
     *                                                           string.</p>
8908
     * @param string|null         $lang                          [optional] <p>Set the language for special cases: az,
8909
     *                                                           el, lt, tr</p>
8910
     * @param bool                $try_to_keep_the_string_length [optional] <p>true === try to keep the string length:
8911
     *                                                           e.g. ẞ -> ß</p>
8912
     * @param bool                $use_trim_first                [optional] <p>true === trim the input string,
8913
     *                                                           first</p>
8914
     * @param string|null         $word_define_chars             [optional] <p>An string of chars that will be used as
8915
     *                                                           whitespace separator === words.</p>
8916
     *
8917
     * @psalm-pure
8918
     *
8919
     * @return string
8920
     *                <p>The titleized string.</p>
8921
     *
8922
     * @noinspection PhpTooManyParametersInspection
8923
     */
8924 10
    public static function str_titleize(
8925
        string $str,
8926
        array $ignore = null,
8927
        string $encoding = 'UTF-8',
8928
        bool $clean_utf8 = false,
8929
        string $lang = null,
8930
        bool $try_to_keep_the_string_length = false,
8931
        bool $use_trim_first = true,
8932
        string $word_define_chars = null
8933
    ): string {
8934 10
        if ($str === '') {
8935
            return '';
8936
        }
8937
8938 10
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8939 9
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8940
        }
8941
8942 10
        if ($use_trim_first) {
8943 10
            $str = \trim($str);
8944
        }
8945
8946 10
        if ($clean_utf8) {
8947
            $str = self::clean($str);
8948
        }
8949
8950 10
        $use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
8951
8952 10
        if ($word_define_chars) {
8953 4
            $word_define_chars = \preg_quote($word_define_chars, '/');
8954
        } else {
8955 6
            $word_define_chars = '';
8956
        }
8957
8958 10
        $str = (string) \preg_replace_callback(
8959 10
            '/([^\\s' . $word_define_chars . ']+)/u',
8960
            static function (array $match) use ($try_to_keep_the_string_length, $lang, $ignore, $use_mb_functions, $encoding): string {
8961 10
                if ($ignore !== null && \in_array($match[0], $ignore, true)) {
8962 4
                    return $match[0];
8963
                }
8964
8965 10
                if ($use_mb_functions) {
8966 10
                    if ($encoding === 'UTF-8') {
8967 10
                        return \mb_strtoupper(\mb_substr($match[0], 0, 1))
8968 10
                               . \mb_strtolower(\mb_substr($match[0], 1));
8969
                    }
8970
8971
                    return \mb_strtoupper(\mb_substr($match[0], 0, 1, $encoding), $encoding)
8972
                           . \mb_strtolower(\mb_substr($match[0], 1, null, $encoding), $encoding);
8973
                }
8974
8975
                return self::ucfirst(
8976
                    self::strtolower(
8977
                        $match[0],
8978
                        $encoding,
8979
                        false,
8980
                        $lang,
8981
                        $try_to_keep_the_string_length
8982
                    ),
8983
                    $encoding,
8984
                    false,
8985
                    $lang,
8986
                    $try_to_keep_the_string_length
8987
                );
8988 10
            },
8989 10
            $str
8990
        );
8991
8992 10
        return $str;
8993
    }
8994
8995
    /**
8996
     * Convert a string into a obfuscate string.
8997
     *
8998
     * EXAMPLE: <code>
8999
     *
9000
     * UTF8::str_obfuscate('[email protected]', 0.5, '*', ['@', '.']); // e.g. "l***@m**lleke*.*r*"
9001
     * </code>
9002
     *
9003
     * @param string   $str
9004
     * @param float    $percent
9005
     * @param string   $obfuscateChar
9006
     * @param string[] $keepChars
9007
     *
9008
     * @psalm-pure
9009
     *
9010
     * @return string
9011
     *                <p>The obfuscate string.</p>
9012
     */
9013 1
    public static function str_obfuscate(
9014
        string $str,
9015
        float $percent = 0.5,
9016
        string $obfuscateChar = '*',
9017
        array $keepChars = []
9018
    ): string {
9019 1
        $obfuscateCharHelper = "\u{2603}";
9020 1
        $str = \str_replace($obfuscateChar, $obfuscateCharHelper, $str);
9021
9022 1
        $chars = self::chars($str);
9023 1
        $charsMax = \count($chars);
9024 1
        $charsMaxChange = \round($charsMax * $percent);
9025 1
        $charsCounter = 0;
9026 1
        $charKeyDone = [];
9027
9028 1
        while ($charsCounter < $charsMaxChange) {
9029 1
            foreach ($chars as $charKey => $char) {
9030 1
                if (isset($charKeyDone[$charKey])) {
9031 1
                    continue;
9032
                }
9033
9034 1
                if (\random_int(0, 100) > 50) {
9035 1
                    continue;
9036
                }
9037
9038 1
                if ($char === $obfuscateChar) {
9039
                    continue;
9040
                }
9041
9042 1
                ++$charsCounter;
9043 1
                $charKeyDone[$charKey] = true;
9044
9045 1
                if ($charsCounter > $charsMaxChange) {
9046
                    break;
9047
                }
9048
9049 1
                if (\in_array($char, $keepChars, true)) {
9050 1
                    continue;
9051
                }
9052
9053 1
                $chars[$charKey] = $obfuscateChar;
9054
            }
9055
        }
9056
9057 1
        $str = \implode('', $chars);
9058
9059 1
        return \str_replace($obfuscateCharHelper, $obfuscateChar, $str);
9060
    }
9061
9062
    /**
9063
     * Returns a trimmed string in proper title case.
9064
     *
9065
     * Also accepts an array, $ignore, allowing you to list words not to be
9066
     * capitalized.
9067
     *
9068
     * Adapted from John Gruber's script.
9069
     *
9070
     * @see https://gist.github.com/gruber/9f9e8650d68b13ce4d78
9071
     *
9072
     * @param string $str
9073
     * @param array  $ignore   <p>An array of words not to capitalize.</p>
9074
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
9075
     *
9076
     * @psalm-pure
9077
     *
9078
     * @return string
9079
     *                <p>The titleized string.</p>
9080
     */
9081 35
    public static function str_titleize_for_humans(
9082
        string $str,
9083
        array $ignore = [],
9084
        string $encoding = 'UTF-8'
9085
    ): string {
9086 35
        if ($str === '') {
9087
            return '';
9088
        }
9089
9090
        $small_words = [
9091 35
            '(?<!q&)a',
9092
            'an',
9093
            'and',
9094
            'as',
9095
            'at(?!&t)',
9096
            'but',
9097
            'by',
9098
            'en',
9099
            'for',
9100
            'if',
9101
            'in',
9102
            'of',
9103
            'on',
9104
            'or',
9105
            'the',
9106
            'to',
9107
            'v[.]?',
9108
            'via',
9109
            'vs[.]?',
9110
        ];
9111
9112 35
        if ($ignore !== []) {
9113 1
            $small_words = \array_merge($small_words, $ignore);
9114
        }
9115
9116 35
        $small_words_rx = \implode('|', $small_words);
9117 35
        $apostrophe_rx = '(?x: [\'’] [[:lower:]]* )?';
9118
9119 35
        $str = \trim($str);
9120
9121 35
        if (!self::has_lowercase($str)) {
9122 2
            $str = self::strtolower($str, $encoding);
9123
        }
9124
9125
        // the main substitutions
9126
        /** @noinspection RegExpDuplicateAlternationBranch - false-positive - https://youtrack.jetbrains.com/issue/WI-51002 */
9127 35
        $str = (string) \preg_replace_callback(
9128
            '~\\b (_*) (?:                                                           # 1. Leading underscore and
9129
                        ( (?<=[ ][/\\\\]) [[:alpha:]]+ [-_[:alpha:]/\\\\]+ |                # 2. file path or 
9130 35
                          [-_[:alpha:]]+ [@.:] [-_[:alpha:]@.:/]+ ' . $apostrophe_rx . ' )  #    URL, domain, or email
9131
                        |
9132 35
                        ( (?i: ' . $small_words_rx . ' ) ' . $apostrophe_rx . ' )           # 3. or small word (case-insensitive)
9133
                        |
9134 35
                        ( [[:alpha:]] [[:lower:]\'’()\[\]{}]* ' . $apostrophe_rx . ' )     # 4. or word w/o internal caps
9135
                        |
9136 35
                        ( [[:alpha:]] [[:alpha:]\'’()\[\]{}]* ' . $apostrophe_rx . ' )     # 5. or some other word
9137
                      ) (_*) \\b                                                          # 6. With trailing underscore
9138
                    ~ux',
9139
            /**
9140
             * @param string[] $matches
9141
             *
9142
             * @psalm-pure
9143
             *
9144
             * @return string
9145
             */
9146
            static function (array $matches) use ($encoding): string {
9147
                // preserve leading underscore
9148 35
                $str = $matches[1];
9149 35
                if ($matches[2]) {
9150
                    // preserve URLs, domains, emails and file paths
9151 5
                    $str .= $matches[2];
9152 35
                } elseif ($matches[3]) {
9153
                    // lower-case small words
9154 25
                    $str .= self::strtolower($matches[3], $encoding);
9155 35
                } elseif ($matches[4]) {
9156
                    // capitalize word w/o internal caps
9157 34
                    $str .= static::ucfirst($matches[4], $encoding);
9158
                } else {
9159
                    // preserve other kinds of word (iPhone)
9160 7
                    $str .= $matches[5];
9161
                }
9162
                // preserve trailing underscore
9163 35
                $str .= $matches[6];
9164
9165 35
                return $str;
9166 35
            },
9167 35
            $str
9168
        );
9169
9170
        // Exceptions for small words: capitalize at start of title...
9171 35
        $str = (string) \preg_replace_callback(
9172
            '~(  \\A [[:punct:]]*            # start of title...
9173
                      |  [:.;?!][ ]+                # or of subsentence...
9174
                      |  [ ][\'"“‘(\[][ ]* )        # or of inserted subphrase...
9175 35
                      ( ' . $small_words_rx . ' ) \\b # ...followed by small word
9176
                     ~uxi',
9177
            /**
9178
             * @param string[] $matches
9179
             *
9180
             * @psalm-pure
9181
             *
9182
             * @return string
9183
             */
9184
            static function (array $matches) use ($encoding): string {
9185 11
                return $matches[1] . static::ucfirst($matches[2], $encoding);
9186 35
            },
9187 35
            $str
9188
        );
9189
9190
        // ...and end of title
9191 35
        $str = (string) \preg_replace_callback(
9192 35
            '~\\b ( ' . $small_words_rx . ' ) # small word...
9193
                      (?= [[:punct:]]* \Z          # ...at the end of the title...
9194
                      |   [\'"’”)\]] [ ] )         # ...or of an inserted subphrase?
9195
                     ~uxi',
9196
            /**
9197
             * @param string[] $matches
9198
             *
9199
             * @psalm-pure
9200
             *
9201
             * @return string
9202
             */
9203
            static function (array $matches) use ($encoding): string {
9204 3
                return static::ucfirst($matches[1], $encoding);
9205 35
            },
9206 35
            $str
9207
        );
9208
9209
        // Exceptions for small words in hyphenated compound words.
9210
        // e.g. "in-flight" -> In-Flight
9211 35
        $str = (string) \preg_replace_callback(
9212
            '~\\b
9213
                        (?<! -)                   # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (in-flight)
9214 35
                        ( ' . $small_words_rx . ' )
9215
                        (?= -[[:alpha:]]+)        # lookahead for "-someword"
9216
                       ~uxi',
9217
            /**
9218
             * @param string[] $matches
9219
             *
9220
             * @psalm-pure
9221
             *
9222
             * @return string
9223
             */
9224
            static function (array $matches) use ($encoding): string {
9225
                return static::ucfirst($matches[1], $encoding);
9226 35
            },
9227 35
            $str
9228
        );
9229
9230
        // e.g. "Stand-in" -> "Stand-In" (Stand is already capped at this point)
9231 35
        $str = (string) \preg_replace_callback(
9232
            '~\\b
9233
                      (?<!…)                    # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (stand-in)
9234
                      ( [[:alpha:]]+- )         # $1 = first word and hyphen, should already be properly capped
9235 35
                      ( ' . $small_words_rx . ' ) # ...followed by small word
9236
                      (?!	- )                 # Negative lookahead for another -
9237
                     ~uxi',
9238
            /**
9239
             * @param string[] $matches
9240
             *
9241
             * @psalm-pure
9242
             *
9243
             * @return string
9244
             */
9245
            static function (array $matches) use ($encoding): string {
9246
                return $matches[1] . static::ucfirst($matches[2], $encoding);
9247 35
            },
9248 35
            $str
9249
        );
9250
9251 35
        return $str;
9252
    }
9253
9254
    /**
9255
     * Get a binary representation of a specific string.
9256
     *
9257
     * EXAPLE: <code>UTF8::str_to_binary('😃'); // '11110000100111111001100010000011'</code>
9258
     *
9259
     * @param string $str <p>The input string.</p>
9260
     *
9261
     * @psalm-pure
9262
     *
9263
     * @return false|string
9264
     *                      <p>false on error</p>
9265
     */
9266 2
    public static function str_to_binary(string $str)
9267
    {
9268
        /** @var array|false $value - needed for PhpStan (stubs error) */
9269 2
        $value = \unpack('H*', $str);
9270 2
        if ($value === false) {
9271
            return false;
9272
        }
9273
9274
        /** @noinspection OffsetOperationsInspection */
9275 2
        return \base_convert($value[1], 16, 2);
9276
    }
9277
9278
    /**
9279
     * @param string   $str
9280
     * @param bool     $remove_empty_values <p>Remove empty values.</p>
9281
     * @param int|null $remove_short_values <p>The min. string length or null to disable</p>
9282
     *
9283
     * @psalm-pure
9284
     *
9285
     * @return string[]
9286
     */
9287 17
    public static function str_to_lines(string $str, bool $remove_empty_values = false, int $remove_short_values = null): array
9288
    {
9289 17
        if ($str === '') {
9290 1
            return $remove_empty_values ? [] : [''];
9291
        }
9292
9293 16
        if (self::$SUPPORT['mbstring'] === true) {
9294
            /** @noinspection PhpComposerExtensionStubsInspection */
9295 16
            $return = \mb_split("[\r\n]{1,2}", $str);
9296
        } else {
9297
            $return = \preg_split("/[\r\n]{1,2}/u", $str);
9298
        }
9299
9300 16
        if ($return === false) {
9301
            return $remove_empty_values ? [] : [''];
9302
        }
9303
9304
        if (
9305 16
            $remove_short_values === null
9306
            &&
9307 16
            !$remove_empty_values
9308
        ) {
9309 16
            return $return;
9310
        }
9311
9312
        return self::reduce_string_array(
9313
            $return,
9314
            $remove_empty_values,
9315
            $remove_short_values
9316
        );
9317
    }
9318
9319
    /**
9320
     * Convert a string into an array of words.
9321
     *
9322
     * EXAMPLE: <code>UTF8::str_to_words('中文空白 oöäü#s', '#') // array('', '中文空白', ' ', 'oöäü#s', '')</code>
9323
     *
9324
     * @param string   $str
9325
     * @param string   $char_list           <p>Additional chars for the definition of "words".</p>
9326
     * @param bool     $remove_empty_values <p>Remove empty values.</p>
9327
     * @param int|null $remove_short_values <p>The min. string length or null to disable</p>
9328
     *
9329
     * @psalm-pure
9330
     *
9331
     * @return string[]
9332
     */
9333 13
    public static function str_to_words(
9334
        string $str,
9335
        string $char_list = '',
9336
        bool $remove_empty_values = false,
9337
        int $remove_short_values = null
9338
    ): array {
9339 13
        if ($str === '') {
9340 4
            return $remove_empty_values ? [] : [''];
9341
        }
9342
9343 13
        $char_list = self::rxClass($char_list, '\pL');
9344
9345 13
        $return = \preg_split("/({$char_list}+(?:[\p{Pd}’']{$char_list}+)*)/u", $str, -1, \PREG_SPLIT_DELIM_CAPTURE);
9346 13
        if ($return === false) {
9347
            return $remove_empty_values ? [] : [''];
9348
        }
9349
9350
        if (
9351 13
            $remove_short_values === null
9352
            &&
9353 13
            !$remove_empty_values
9354
        ) {
9355 13
            return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
9356
        }
9357
9358 2
        $tmp_return = self::reduce_string_array(
9359 2
            $return,
9360 2
            $remove_empty_values,
9361 2
            $remove_short_values
9362
        );
9363
9364 2
        foreach ($tmp_return as &$item) {
9365 2
            $item = (string) $item;
9366
        }
9367
9368 2
        return $tmp_return;
9369
    }
9370
9371
    /**
9372
     * alias for "UTF8::to_ascii()"
9373
     *
9374
     * @param string $str
9375
     * @param string $unknown
9376
     * @param bool   $strict
9377
     *
9378
     * @psalm-pure
9379
     *
9380
     * @return string
9381
     *
9382
     * @see        UTF8::to_ascii()
9383
     * @deprecated <p>please use "UTF8::to_ascii()"</p>
9384
     */
9385 7
    public static function str_transliterate(
9386
        string $str,
9387
        string $unknown = '?',
9388
        bool $strict = false
9389
    ): string {
9390 7
        return self::to_ascii($str, $unknown, $strict);
9391
    }
9392
9393
    /**
9394
     * Truncates the string to a given length. If $substring is provided, and
9395
     * truncating occurs, the string is further truncated so that the substring
9396
     * may be appended without exceeding the desired length.
9397
     *
9398
     * @param string $str
9399
     * @param int    $length    <p>Desired length of the truncated string.</p>
9400
     * @param string $substring [optional] <p>The substring to append if it can fit. Default: ''</p>
9401
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
9402
     *
9403
     * @psalm-pure
9404
     *
9405
     * @return string
9406
     *                <p>A string after truncating.</p>
9407
     */
9408 22
    public static function str_truncate(
9409
        string $str,
9410
        int $length,
9411
        string $substring = '',
9412
        string $encoding = 'UTF-8'
9413
    ): string {
9414 22
        if ($str === '') {
9415
            return '';
9416
        }
9417
9418 22
        if ($encoding === 'UTF-8') {
9419 10
            if ($length >= (int) \mb_strlen($str)) {
9420 2
                return $str;
9421
            }
9422
9423 8
            if ($substring !== '') {
9424 4
                $length -= (int) \mb_strlen($substring);
9425
9426
                /** @noinspection UnnecessaryCastingInspection */
9427 4
                return (string) \mb_substr($str, 0, $length) . $substring;
9428
            }
9429
9430
            /** @noinspection UnnecessaryCastingInspection */
9431 4
            return (string) \mb_substr($str, 0, $length);
9432
        }
9433
9434 12
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
9435
9436 12
        if ($length >= (int) self::strlen($str, $encoding)) {
9437 2
            return $str;
9438
        }
9439
9440 10
        if ($substring !== '') {
9441 6
            $length -= (int) self::strlen($substring, $encoding);
9442
        }
9443
9444
        return (
9445 10
               (string) self::substr(
9446 10
                   $str,
9447 10
                   0,
9448 10
                   $length,
9449 10
                   $encoding
9450
               )
9451 10
               ) . $substring;
9452
    }
9453
9454
    /**
9455
     * Truncates the string to a given length, while ensuring that it does not
9456
     * split words. If $substring is provided, and truncating occurs, the
9457
     * string is further truncated so that the substring may be appended without
9458
     * exceeding the desired length.
9459
     *
9460
     * @param string $str
9461
     * @param int    $length                                 <p>Desired length of the truncated string.</p>
9462
     * @param string $substring                              [optional] <p>The substring to append if it can fit.
9463
     *                                                       Default:
9464
     *                                                       ''</p>
9465
     * @param string $encoding                               [optional] <p>Default: 'UTF-8'</p>
9466
     * @param bool   $ignore_do_not_split_words_for_one_word [optional] <p>Default: false</p>
9467
     *
9468
     * @psalm-pure
9469
     *
9470
     * @return string
9471
     *                <p>A string after truncating.</p>
9472
     */
9473 47
    public static function str_truncate_safe(
9474
        string $str,
9475
        int $length,
9476
        string $substring = '',
9477
        string $encoding = 'UTF-8',
9478
        bool $ignore_do_not_split_words_for_one_word = false
9479
    ): string {
9480 47
        if ($str === '' || $length <= 0) {
9481 1
            return $substring;
9482
        }
9483
9484 47
        if ($encoding === 'UTF-8') {
9485 21
            if ($length >= (int) \mb_strlen($str)) {
9486 5
                return $str;
9487
            }
9488
9489
            // need to further trim the string so we can append the substring
9490 17
            $length -= (int) \mb_strlen($substring);
9491 17
            if ($length <= 0) {
9492 1
                return $substring;
9493
            }
9494
9495
            /** @var false|string $truncated - needed for PhpStan (stubs error) */
9496 17
            $truncated = \mb_substr($str, 0, $length);
9497 17
            if ($truncated === false) {
9498
                return '';
9499
            }
9500
9501
            // if the last word was truncated
9502 17
            $space_position = \mb_strpos($str, ' ', $length - 1);
9503 17
            if ($space_position !== $length) {
9504
                // find pos of the last occurrence of a space, get up to that
9505 13
                $last_position = \mb_strrpos($truncated, ' ', 0);
9506
9507
                if (
9508 13
                    $last_position !== false
9509
                    ||
9510
                    (
9511 3
                        $space_position !== false
9512
                        &&
9513 13
                        !$ignore_do_not_split_words_for_one_word
9514
                    )
9515
                ) {
9516 17
                    $truncated = (string) \mb_substr($truncated, 0, (int) $last_position);
9517
                }
9518
            }
9519
        } else {
9520 26
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9521
9522 26
            if ($length >= (int) self::strlen($str, $encoding)) {
9523 4
                return $str;
9524
            }
9525
9526
            // need to further trim the string so we can append the substring
9527 22
            $length -= (int) self::strlen($substring, $encoding);
9528 22
            if ($length <= 0) {
9529
                return $substring;
9530
            }
9531
9532 22
            $truncated = self::substr($str, 0, $length, $encoding);
9533
9534 22
            if ($truncated === false) {
9535
                return '';
9536
            }
9537
9538
            // if the last word was truncated
9539 22
            $space_position = self::strpos($str, ' ', $length - 1, $encoding);
9540 22
            if ($space_position !== $length) {
9541
                // find pos of the last occurrence of a space, get up to that
9542 12
                $last_position = self::strrpos($truncated, ' ', 0, $encoding);
9543
9544
                if (
9545 12
                    $last_position !== false
9546
                    ||
9547
                    (
9548 4
                        $space_position !== false
9549
                        &&
9550 12
                        !$ignore_do_not_split_words_for_one_word
9551
                    )
9552
                ) {
9553 9
                    $truncated = (string) self::substr($truncated, 0, (int) $last_position, $encoding);
9554
                }
9555
            }
9556
        }
9557
9558 39
        return $truncated . $substring;
9559
    }
9560
9561
    /**
9562
     * Returns a lowercase and trimmed string separated by underscores.
9563
     * Underscores are inserted before uppercase characters (with the exception
9564
     * of the first character of the string), and in place of spaces as well as
9565
     * dashes.
9566
     *
9567
     * @param string $str
9568
     *
9569
     * @psalm-pure
9570
     *
9571
     * @return string
9572
     *                <p>The underscored string.</p>
9573
     */
9574 16
    public static function str_underscored(string $str): string
9575
    {
9576 16
        return self::str_delimit($str, '_');
9577
    }
9578
9579
    /**
9580
     * Returns an UpperCamelCase version of the supplied string. It trims
9581
     * surrounding spaces, capitalizes letters following digits, spaces, dashes
9582
     * and underscores, and removes spaces, dashes, underscores.
9583
     *
9584
     * @param string      $str                           <p>The input string.</p>
9585
     * @param string      $encoding                      [optional] <p>Default: 'UTF-8'</p>
9586
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
9587
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
9588
     *                                                   tr</p>
9589
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
9590
     *                                                   -> ß</p>
9591
     *
9592
     * @psalm-pure
9593
     *
9594
     * @return string
9595
     *                <p>A string in UpperCamelCase.</p>
9596
     */
9597 13
    public static function str_upper_camelize(
9598
        string $str,
9599
        string $encoding = 'UTF-8',
9600
        bool $clean_utf8 = false,
9601
        string $lang = null,
9602
        bool $try_to_keep_the_string_length = false
9603
    ): string {
9604 13
        return self::ucfirst(self::str_camelize($str, $encoding), $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
9605
    }
9606
9607
    /**
9608
     * alias for "UTF8::ucfirst()"
9609
     *
9610
     * @param string      $str
9611
     * @param string      $encoding
9612
     * @param bool        $clean_utf8
9613
     * @param string|null $lang
9614
     * @param bool        $try_to_keep_the_string_length
9615
     *
9616
     * @psalm-pure
9617
     *
9618
     * @return string
9619
     *
9620
     * @see        UTF8::ucfirst()
9621
     * @deprecated <p>please use "UTF8::ucfirst()"</p>
9622
     */
9623 5
    public static function str_upper_first(
9624
        string $str,
9625
        string $encoding = 'UTF-8',
9626
        bool $clean_utf8 = false,
9627
        string $lang = null,
9628
        bool $try_to_keep_the_string_length = false
9629
    ): string {
9630 5
        return self::ucfirst(
9631 5
            $str,
9632 5
            $encoding,
9633 5
            $clean_utf8,
9634 5
            $lang,
9635 5
            $try_to_keep_the_string_length
9636
        );
9637
    }
9638
9639
    /**
9640
     * Get the number of words in a specific string.
9641
     *
9642
     * EXAMPLES: <code>
9643
     * // format: 0 -> return only word count (int)
9644
     * //
9645
     * UTF8::str_word_count('中文空白 öäü abc#c'); // 4
9646
     * UTF8::str_word_count('中文空白 öäü abc#c', 0, '#'); // 3
9647
     *
9648
     * // format: 1 -> return words (array)
9649
     * //
9650
     * UTF8::str_word_count('中文空白 öäü abc#c', 1); // array('中文空白', 'öäü', 'abc', 'c')
9651
     * UTF8::str_word_count('中文空白 öäü abc#c', 1, '#'); // array('中文空白', 'öäü', 'abc#c')
9652
     *
9653
     * // format: 2 -> return words with offset (array)
9654
     * //
9655
     * UTF8::str_word_count('中文空白 öäü ab#c', 2); // array(0 => '中文空白', 5 => 'öäü', 9 => 'abc', 13 => 'c')
9656
     * UTF8::str_word_count('中文空白 öäü ab#c', 2, '#'); // array(0 => '中文空白', 5 => 'öäü', 9 => 'abc#c')
9657
     * </code>
9658
     *
9659
     * @param string $str       <p>The input string.</p>
9660
     * @param int    $format    [optional] <p>
9661
     *                          <strong>0</strong> => return a number of words (default)<br>
9662
     *                          <strong>1</strong> => return an array of words<br>
9663
     *                          <strong>2</strong> => return an array of words with word-offset as key
9664
     *                          </p>
9665
     * @param string $char_list [optional] <p>Additional chars that contains to words and do not start a new word.</p>
9666
     *
9667
     * @psalm-pure
9668
     *
9669
     * @return int|string[]
9670
     *                      <p>The number of words in the string.</p>
9671
     */
9672 2
    public static function str_word_count(string $str, int $format = 0, string $char_list = '')
9673
    {
9674 2
        $str_parts = self::str_to_words($str, $char_list);
9675
9676 2
        $len = \count($str_parts);
9677
9678 2
        if ($format === 1) {
9679 2
            $number_of_words = [];
9680 2
            for ($i = 1; $i < $len; $i += 2) {
9681 2
                $number_of_words[] = $str_parts[$i];
9682
            }
9683 2
        } elseif ($format === 2) {
9684 2
            $number_of_words = [];
9685 2
            $offset = (int) self::strlen($str_parts[0]);
9686 2
            for ($i = 1; $i < $len; $i += 2) {
9687 2
                $number_of_words[$offset] = $str_parts[$i];
9688 2
                $offset += (int) self::strlen($str_parts[$i]) + (int) self::strlen($str_parts[$i + 1]);
9689
            }
9690
        } else {
9691 2
            $number_of_words = (int) (($len - 1) / 2);
9692
        }
9693
9694 2
        return $number_of_words;
9695
    }
9696
9697
    /**
9698
     * Case-insensitive string comparison.
9699
     *
9700
     * INFO: Case-insensitive version of UTF8::strcmp()
9701
     *
9702
     * EXAMPLE: <code>UTF8::strcasecmp("iñtërnâtiôn\nàlizætiøn", "Iñtërnâtiôn\nàlizætiøn"); // 0</code>
9703
     *
9704
     * @param string $str1     <p>The first string.</p>
9705
     * @param string $str2     <p>The second string.</p>
9706
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
9707
     *
9708
     * @psalm-pure
9709
     *
9710
     * @return int
9711
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
9712
     *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
9713
     *             <strong>0</strong> if they are equal
9714
     */
9715 23
    public static function strcasecmp(
9716
        string $str1,
9717
        string $str2,
9718
        string $encoding = 'UTF-8'
9719
    ): int {
9720 23
        return self::strcmp(
9721 23
            self::strtocasefold(
9722 23
                $str1,
9723 23
                true,
9724 23
                false,
9725 23
                $encoding,
9726 23
                null,
9727 23
                false
9728
            ),
9729 23
            self::strtocasefold(
9730 23
                $str2,
9731 23
                true,
9732 23
                false,
9733 23
                $encoding,
9734 23
                null,
9735 23
                false
9736
            )
9737
        );
9738
    }
9739
9740
    /**
9741
     * alias for "UTF8::strstr()"
9742
     *
9743
     * @param string $haystack
9744
     * @param string $needle
9745
     * @param bool   $before_needle
9746
     * @param string $encoding
9747
     * @param bool   $clean_utf8
9748
     *
9749
     * @psalm-pure
9750
     *
9751
     * @return false|string
9752
     *
9753
     * @see        UTF8::strstr()
9754
     * @deprecated <p>please use "UTF8::strstr()"</p>
9755
     */
9756 2
    public static function strchr(
9757
        string $haystack,
9758
        string $needle,
9759
        bool $before_needle = false,
9760
        string $encoding = 'UTF-8',
9761
        bool $clean_utf8 = false
9762
    ) {
9763 2
        return self::strstr(
9764 2
            $haystack,
9765 2
            $needle,
9766 2
            $before_needle,
9767 2
            $encoding,
9768 2
            $clean_utf8
9769
        );
9770
    }
9771
9772
    /**
9773
     * Case-sensitive string comparison.
9774
     *
9775
     * EXAMPLE: <code>UTF8::strcmp("iñtërnâtiôn\nàlizætiøn", "iñtërnâtiôn\nàlizætiøn"); // 0</code>
9776
     *
9777
     * @param string $str1 <p>The first string.</p>
9778
     * @param string $str2 <p>The second string.</p>
9779
     *
9780
     * @psalm-pure
9781
     *
9782
     * @return int
9783
     *             <strong>&lt; 0</strong> if str1 is less than str2<br>
9784
     *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
9785
     *             <strong>0</strong> if they are equal
9786
     */
9787 29
    public static function strcmp(string $str1, string $str2): int
9788
    {
9789 29
        if ($str1 === $str2) {
9790 21
            return 0;
9791
        }
9792
9793 24
        return \strcmp(
9794 24
            \Normalizer::normalize($str1, \Normalizer::NFD),
9795 24
            \Normalizer::normalize($str2, \Normalizer::NFD)
9796
        );
9797
    }
9798
9799
    /**
9800
     * Find length of initial segment not matching mask.
9801
     *
9802
     * @param string   $str
9803
     * @param string   $char_list
9804
     * @param int      $offset
9805
     * @param int|null $length
9806
     * @param string   $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
9807
     *
9808
     * @psalm-pure
9809
     *
9810
     * @return int
9811
     */
9812 12
    public static function strcspn(
9813
        string $str,
9814
        string $char_list,
9815
        int $offset = 0,
9816
        int $length = null,
9817
        string $encoding = 'UTF-8'
9818
    ): int {
9819 12
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9820
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9821
        }
9822
9823 12
        if ($char_list === '') {
9824 2
            return (int) self::strlen($str, $encoding);
9825
        }
9826
9827 11
        if ($offset || $length !== null) {
9828 3
            if ($encoding === 'UTF-8') {
9829 3
                if ($length === null) {
9830 2
                    $str_tmp = \mb_substr($str, $offset);
9831
                } else {
9832 3
                    $str_tmp = \mb_substr($str, $offset, $length);
9833
                }
9834
            } else {
9835
                $str_tmp = self::substr($str, $offset, $length, $encoding);
9836
            }
9837
9838 3
            if ($str_tmp === false) {
9839
                return 0;
9840
            }
9841
9842
            /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
9843 3
            $str = $str_tmp;
9844
        }
9845
9846 11
        if ($str === '') {
9847 2
            return 0;
9848
        }
9849
9850 10
        $matches = [];
9851 10
        if (\preg_match('/^(.*?)' . self::rxClass($char_list) . '/us', $str, $matches)) {
9852 9
            $return = self::strlen($matches[1], $encoding);
9853 9
            if ($return === false) {
9854
                return 0;
9855
            }
9856
9857 9
            return $return;
9858
        }
9859
9860 2
        return (int) self::strlen($str, $encoding);
9861
    }
9862
9863
    /**
9864
     * alias for "UTF8::stristr()"
9865
     *
9866
     * @param string $haystack
9867
     * @param string $needle
9868
     * @param bool   $before_needle
9869
     * @param string $encoding
9870
     * @param bool   $clean_utf8
9871
     *
9872
     * @psalm-pure
9873
     *
9874
     * @return false|string
9875
     *
9876
     * @see        UTF8::stristr()
9877
     * @deprecated <p>please use "UTF8::stristr()"</p>
9878
     */
9879 1
    public static function strichr(
9880
        string $haystack,
9881
        string $needle,
9882
        bool $before_needle = false,
9883
        string $encoding = 'UTF-8',
9884
        bool $clean_utf8 = false
9885
    ) {
9886 1
        return self::stristr(
9887 1
            $haystack,
9888 1
            $needle,
9889 1
            $before_needle,
9890 1
            $encoding,
9891 1
            $clean_utf8
9892
        );
9893
    }
9894
9895
    /**
9896
     * Create a UTF-8 string from code points.
9897
     *
9898
     * INFO: opposite to UTF8::codepoints()
9899
     *
9900
     * EXAMPLE: <code>UTF8::string(array(246, 228, 252)); // 'öäü'</code>
9901
     *
9902
     * @param int|int[]|string|string[] $intOrHex <p>Integer or Hexadecimal codepoints.</p>
9903
     *
9904
     * @phpstan-param int[]|numeric-string[]|int|numeric-string $intOrHex
9905
     *
9906
     * @psalm-pure
9907
     *
9908
     * @return string
9909
     *                <p>A UTF-8 encoded string.</p>
9910
     */
9911 4
    public static function string($intOrHex): string
9912
    {
9913 4
        if ($intOrHex === []) {
9914 4
            return '';
9915
        }
9916
9917 4
        if (!\is_array($intOrHex)) {
9918 1
            $intOrHex = [$intOrHex];
9919
        }
9920
9921 4
        $str = '';
9922 4
        foreach ($intOrHex as $strPart) {
9923 4
            $str .= '&#' . (int) $strPart . ';';
9924
        }
9925
9926 4
        return self::html_entity_decode($str, \ENT_QUOTES | \ENT_HTML5);
9927
    }
9928
9929
    /**
9930
     * Checks if string starts with "BOM" (Byte Order Mark Character) character.
9931
     *
9932
     * EXAMPLE: <code>UTF8::string_has_bom("\xef\xbb\xbf foobar"); // true</code>
9933
     *
9934
     * @param string $str <p>The input string.</p>
9935
     *
9936
     * @psalm-pure
9937
     *
9938
     * @return bool
9939
     *              <p>
9940
     *              <strong>true</strong> if the string has BOM at the start,<br>
9941
     *              <strong>false</strong> otherwise
9942
     *              </p>
9943
     */
9944 6
    public static function string_has_bom(string $str): bool
9945
    {
9946
        /** @noinspection PhpUnusedLocalVariableInspection */
9947 6
        foreach (self::$BOM as $bom_string => &$bom_byte_length) {
9948 6
            if (\strncmp($str, $bom_string, $bom_byte_length) === 0) {
9949 6
                return true;
9950
            }
9951
        }
9952
9953 6
        return false;
9954
    }
9955
9956
    /**
9957
     * Strip HTML and PHP tags from a string + clean invalid UTF-8.
9958
     *
9959
     * EXAMPLE: <code>UTF8::strip_tags("<span>κόσμε\xa0\xa1</span>"); // 'κόσμε'</code>
9960
     *
9961
     * @see http://php.net/manual/en/function.strip-tags.php
9962
     *
9963
     * @param string      $str            <p>
9964
     *                                    The input string.
9965
     *                                    </p>
9966
     * @param string|null $allowable_tags [optional] <p>
9967
     *                                    You can use the optional second parameter to specify tags which should
9968
     *                                    not be stripped.
9969
     *                                    </p>
9970
     *                                    <p>
9971
     *                                    HTML comments and PHP tags are also stripped. This is hardcoded and
9972
     *                                    can not be changed with allowable_tags.
9973
     *                                    </p>
9974
     * @param bool        $clean_utf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
9975
     *
9976
     * @psalm-pure
9977
     *
9978
     * @return string
9979
     *                <p>The stripped string.</p>
9980
     */
9981 4
    public static function strip_tags(
9982
        string $str,
9983
        string $allowable_tags = null,
9984
        bool $clean_utf8 = false
9985
    ): string {
9986 4
        if ($str === '') {
9987 1
            return '';
9988
        }
9989
9990 4
        if ($clean_utf8) {
9991 2
            $str = self::clean($str);
9992
        }
9993
9994 4
        if ($allowable_tags === null) {
9995 4
            return \strip_tags($str);
9996
        }
9997
9998 2
        return \strip_tags($str, $allowable_tags);
9999
    }
10000
10001
    /**
10002
     * Strip all whitespace characters. This includes tabs and newline
10003
     * characters, as well as multibyte whitespace such as the thin space
10004
     * and ideographic space.
10005
     *
10006
     * EXAMPLE: <code>UTF8::strip_whitespace('   Ο     συγγραφέας  '); // 'Οσυγγραφέας'</code>
10007
     *
10008
     * @param string $str
10009
     *
10010
     * @psalm-pure
10011
     *
10012
     * @return string
10013
     */
10014 36
    public static function strip_whitespace(string $str): string
10015
    {
10016 36
        if ($str === '') {
10017 3
            return '';
10018
        }
10019
10020 33
        return (string) \preg_replace('/[[:space:]]+/u', '', $str);
10021
    }
10022
10023
    /**
10024
     * Find the position of the first occurrence of a substring in a string, case-insensitive.
10025
     *
10026
     * INFO: use UTF8::stripos_in_byte() for the byte-length
10027
     *
10028
     * EXAMPLE: <code>UTF8::stripos('aσσb', 'ΣΣ'); // 1</code> (σσ == ΣΣ)
10029
     *
10030
     * @see http://php.net/manual/en/function.mb-stripos.php
10031
     *
10032
     * @param string $haystack   <p>The string from which to get the position of the first occurrence of needle.</p>
10033
     * @param string $needle     <p>The string to find in haystack.</p>
10034
     * @param int    $offset     [optional] <p>The position in haystack to start searching.</p>
10035
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
10036
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10037
     *
10038
     * @psalm-pure
10039
     *
10040
     * @return false|int
10041
     *                   Return the <strong>(int)</strong> numeric position of the first occurrence of needle in the
10042
     *                   haystack string,<br> or <strong>false</strong> if needle is not found
10043
     */
10044 25
    public static function stripos(
10045
        string $haystack,
10046
        string $needle,
10047
        int $offset = 0,
10048
        string $encoding = 'UTF-8',
10049
        bool $clean_utf8 = false
10050
    ) {
10051 25
        if ($haystack === '') {
10052 5
            if (\PHP_VERSION_ID >= 80000 && $needle === '') {
10053
                return 0;
10054
            }
10055
10056 5
            return false;
10057
        }
10058
10059 24
        if ($needle === '' && \PHP_VERSION_ID < 80000) {
10060 2
            return false;
10061
        }
10062
10063 24
        if ($clean_utf8) {
10064
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10065
            // if invalid characters are found in $haystack before $needle
10066 1
            $haystack = self::clean($haystack);
10067 1
            $needle = self::clean($needle);
10068
        }
10069
10070 24
        if (self::$SUPPORT['mbstring'] === true) {
10071 24
            if ($encoding === 'UTF-8') {
10072 24
                return \mb_stripos($haystack, $needle, $offset);
10073
            }
10074
10075 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10076
10077 2
            return \mb_stripos($haystack, $needle, $offset, $encoding);
10078
        }
10079
10080 2
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
10081
10082
        if (
10083 2
            $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
10084
            &&
10085 2
            $offset >= 0 // grapheme_stripos() can't handle negative offset
10086
            &&
10087 2
            self::$SUPPORT['intl'] === true
10088
        ) {
10089
            $return_tmp = \grapheme_stripos($haystack, $needle, $offset);
10090
            if ($return_tmp !== false) {
10091
                return $return_tmp;
10092
            }
10093
        }
10094
10095
        //
10096
        // fallback for ascii only
10097
        //
10098
10099 2
        if (ASCII::is_ascii($haystack . $needle)) {
10100 2
            return \stripos($haystack, $needle, $offset);
10101
        }
10102
10103
        //
10104
        // fallback via vanilla php
10105
        //
10106
10107 2
        $haystack = self::strtocasefold($haystack, true, false, $encoding, null, false);
10108 2
        $needle = self::strtocasefold($needle, true, false, $encoding, null, false);
10109
10110 2
        return self::strpos($haystack, $needle, $offset, $encoding);
10111
    }
10112
10113
    /**
10114
     * Returns all of haystack starting from and including the first occurrence of needle to the end.
10115
     *
10116
     * EXAMPLE: <code>
10117
     * $str = 'iñtërnâtiônàlizætiøn';
10118
     * $search = 'NÂT';
10119
     *
10120
     * UTF8::stristr($str, $search)); // 'nâtiônàlizætiøn'
10121
     * UTF8::stristr($str, $search, true)); // 'iñtër'
10122
     * </code>
10123
     *
10124
     * @param string $haystack      <p>The input string. Must be valid UTF-8.</p>
10125
     * @param string $needle        <p>The string to look for. Must be valid UTF-8.</p>
10126
     * @param bool   $before_needle [optional] <p>
10127
     *                              If <b>TRUE</b>, it returns the part of the
10128
     *                              haystack before the first occurrence of the needle (excluding the needle).
10129
     *                              </p>
10130
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
10131
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
10132
     *
10133
     * @psalm-pure
10134
     *
10135
     * @return false|string
10136
     *                      <p>A sub-string,<br>or <strong>false</strong> if needle is not found.</p>
10137
     */
10138 13
    public static function stristr(
10139
        string $haystack,
10140
        string $needle,
10141
        bool $before_needle = false,
10142
        string $encoding = 'UTF-8',
10143
        bool $clean_utf8 = false
10144
    ) {
10145 13
        if ($haystack === '') {
10146 3
            if (\PHP_VERSION_ID >= 80000 && $needle === '') {
10147
                return '';
10148
            }
10149
10150 3
            return false;
10151
        }
10152
10153 11
        if ($clean_utf8) {
10154
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10155
            // if invalid characters are found in $haystack before $needle
10156 1
            $needle = self::clean($needle);
10157 1
            $haystack = self::clean($haystack);
10158
        }
10159
10160 11
        if ($needle === '') {
10161 2
            if (\PHP_VERSION_ID >= 80000) {
10162
                return $haystack;
10163
            }
10164
10165 2
            return false;
10166
        }
10167
10168 10
        if (self::$SUPPORT['mbstring'] === true) {
10169 10
            if ($encoding === 'UTF-8') {
10170 10
                return \mb_stristr($haystack, $needle, $before_needle);
10171
            }
10172
10173 1
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10174
10175 1
            return \mb_stristr($haystack, $needle, $before_needle, $encoding);
10176
        }
10177
10178
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
10179
10180
        if (
10181
            $encoding !== 'UTF-8'
10182
            &&
10183
            self::$SUPPORT['mbstring'] === false
10184
        ) {
10185
            /**
10186
             * @psalm-suppress ImpureFunctionCall - is is only a warning
10187
             */
10188
            \trigger_error('UTF8::stristr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10189
        }
10190
10191
        if (
10192
            $encoding === 'UTF-8' // INFO: "grapheme_stristr()" can't handle other encodings
10193
            &&
10194
            self::$SUPPORT['intl'] === true
10195
        ) {
10196
            $return_tmp = \grapheme_stristr($haystack, $needle, $before_needle);
10197
            if ($return_tmp !== false) {
10198
                return $return_tmp;
10199
            }
10200
        }
10201
10202
        if (ASCII::is_ascii($needle . $haystack)) {
10203
            return \stristr($haystack, $needle, $before_needle);
10204
        }
10205
10206
        \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/usi', $haystack, $match);
10207
10208
        if (!isset($match[1])) {
10209
            return false;
10210
        }
10211
10212
        if ($before_needle) {
10213
            return $match[1];
10214
        }
10215
10216
        return self::substr($haystack, (int) self::strlen($match[1], $encoding), null, $encoding);
10217
    }
10218
10219
    /**
10220
     * Get the string length, not the byte-length!
10221
     *
10222
     * INFO: use UTF8::strwidth() for the char-length
10223
     *
10224
     * EXAMPLE: <code>UTF8::strlen("Iñtërnâtiôn\xE9àlizætiøn")); // 20</code>
10225
     *
10226
     * @see http://php.net/manual/en/function.mb-strlen.php
10227
     *
10228
     * @param string $str        <p>The string being checked for length.</p>
10229
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
10230
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10231
     *
10232
     * @psalm-pure
10233
     *
10234
     * @return false|int
10235
     *                   <p>
10236
     *                   The number <strong>(int)</strong> of characters in the string $str having character encoding
10237
     *                   $encoding.
10238
     *                   (One multi-byte character counted as +1).
10239
     *                   <br>
10240
     *                   Can return <strong>false</strong>, if e.g. mbstring is not installed and we process invalid
10241
     *                   chars.
10242
     *                   </p>
10243
     */
10244 174
    public static function strlen(
10245
        string $str,
10246
        string $encoding = 'UTF-8',
10247
        bool $clean_utf8 = false
10248
    ) {
10249 174
        if ($str === '') {
10250 21
            return 0;
10251
        }
10252
10253 172
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10254 12
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10255
        }
10256
10257 172
        if ($clean_utf8) {
10258
            // "mb_strlen" and "\iconv_strlen" returns wrong length,
10259
            // if invalid characters are found in $str
10260 5
            $str = self::clean($str);
10261
        }
10262
10263
        //
10264
        // fallback via mbstring
10265
        //
10266
10267 172
        if (self::$SUPPORT['mbstring'] === true) {
10268 166
            if ($encoding === 'UTF-8') {
10269
                /** @noinspection PhpUsageOfSilenceOperatorInspection - ignore warnings, it's working anyway */
10270 166
                return @\mb_strlen($str);
10271
            }
10272
10273
            /** @noinspection PhpUsageOfSilenceOperatorInspection - ignore warnings, it's working anyway */
10274 4
            return @\mb_strlen($str, $encoding);
10275
        }
10276
10277
        //
10278
        // fallback for binary || ascii only
10279
        //
10280
10281
        if (
10282 8
            $encoding === 'CP850'
10283
            ||
10284 8
            $encoding === 'ASCII'
10285
        ) {
10286
            return \strlen($str);
10287
        }
10288
10289
        if (
10290 8
            $encoding !== 'UTF-8'
10291
            &&
10292 8
            self::$SUPPORT['mbstring'] === false
10293
            &&
10294 8
            self::$SUPPORT['iconv'] === false
10295
        ) {
10296
            /**
10297
             * @psalm-suppress ImpureFunctionCall - is is only a warning
10298
             */
10299 2
            \trigger_error('UTF8::strlen() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10300
        }
10301
10302
        //
10303
        // fallback via iconv
10304
        //
10305
10306 8
        if (self::$SUPPORT['iconv'] === true) {
10307
            $return_tmp = \iconv_strlen($str, $encoding);
10308
            if ($return_tmp !== false) {
10309
                return $return_tmp;
10310
            }
10311
        }
10312
10313
        //
10314
        // fallback via intl
10315
        //
10316
10317
        if (
10318 8
            $encoding === 'UTF-8' // INFO: "grapheme_strlen()" can't handle other encodings
10319
            &&
10320 8
            self::$SUPPORT['intl'] === true
10321
        ) {
10322
            $return_tmp = \grapheme_strlen($str);
10323
            if ($return_tmp !== null) {
10324
                return $return_tmp;
10325
            }
10326
        }
10327
10328
        //
10329
        // fallback for ascii only
10330
        //
10331
10332 8
        if (ASCII::is_ascii($str)) {
10333 4
            return \strlen($str);
10334
        }
10335
10336
        //
10337
        // fallback via vanilla php
10338
        //
10339
10340 8
        \preg_match_all('/./us', $str, $parts);
10341
10342 8
        $return_tmp = \count($parts[0]);
10343 8
        if ($return_tmp === 0) {
10344
            return false;
10345
        }
10346
10347 8
        return $return_tmp;
10348
    }
10349
10350
    /**
10351
     * Get string length in byte.
10352
     *
10353
     * @param string $str
10354
     *
10355
     * @psalm-pure
10356
     *
10357
     * @return int
10358
     */
10359 1
    public static function strlen_in_byte(string $str): int
10360
    {
10361 1
        if ($str === '') {
10362
            return 0;
10363
        }
10364
10365 1
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10366
            // "mb_" is available if overload is used, so use it ...
10367
            return \mb_strlen($str, 'CP850'); // 8-BIT
10368
        }
10369
10370 1
        return \strlen($str);
10371
    }
10372
10373
    /**
10374
     * Case-insensitive string comparisons using a "natural order" algorithm.
10375
     *
10376
     * INFO: natural order version of UTF8::strcasecmp()
10377
     *
10378
     * EXAMPLES: <code>
10379
     * UTF8::strnatcasecmp('2', '10Hello WORLD 中文空白!'); // -1
10380
     * UTF8::strcasecmp('2Hello world 中文空白!', '10Hello WORLD 中文空白!'); // 1
10381
     *
10382
     * UTF8::strnatcasecmp('10Hello world 中文空白!', '2Hello WORLD 中文空白!'); // 1
10383
     * UTF8::strcasecmp('10Hello world 中文空白!', '2Hello WORLD 中文空白!'); // -1
10384
     * </code>
10385
     *
10386
     * @param string $str1     <p>The first string.</p>
10387
     * @param string $str2     <p>The second string.</p>
10388
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
10389
     *
10390
     * @psalm-pure
10391
     *
10392
     * @return int
10393
     *             <strong>&lt; 0</strong> if str1 is less than str2<br>
10394
     *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
10395
     *             <strong>0</strong> if they are equal
10396
     */
10397 2
    public static function strnatcasecmp(string $str1, string $str2, string $encoding = 'UTF-8'): int
10398
    {
10399 2
        return self::strnatcmp(
10400 2
            self::strtocasefold($str1, true, false, $encoding, null, false),
10401 2
            self::strtocasefold($str2, true, false, $encoding, null, false)
10402
        );
10403
    }
10404
10405
    /**
10406
     * String comparisons using a "natural order" algorithm
10407
     *
10408
     * INFO: natural order version of UTF8::strcmp()
10409
     *
10410
     * EXAMPLES: <code>
10411
     * UTF8::strnatcmp('2Hello world 中文空白!', '10Hello WORLD 中文空白!'); // -1
10412
     * UTF8::strcmp('2Hello world 中文空白!', '10Hello WORLD 中文空白!'); // 1
10413
     *
10414
     * UTF8::strnatcmp('10Hello world 中文空白!', '2Hello WORLD 中文空白!'); // 1
10415
     * UTF8::strcmp('10Hello world 中文空白!', '2Hello WORLD 中文空白!'); // -1
10416
     * </code>
10417
     *
10418
     * @see http://php.net/manual/en/function.strnatcmp.php
10419
     *
10420
     * @param string $str1 <p>The first string.</p>
10421
     * @param string $str2 <p>The second string.</p>
10422
     *
10423
     * @psalm-pure
10424
     *
10425
     * @return int
10426
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
10427
     *             <strong>&gt; 0</strong> if str1 is greater than str2;<br>
10428
     *             <strong>0</strong> if they are equal
10429
     */
10430 4
    public static function strnatcmp(string $str1, string $str2): int
10431
    {
10432 4
        if ($str1 === $str2) {
10433 4
            return 0;
10434
        }
10435
10436 4
        return \strnatcmp(
10437 4
            (string) self::strtonatfold($str1),
10438 4
            (string) self::strtonatfold($str2)
10439
        );
10440
    }
10441
10442
    /**
10443
     * Case-insensitive string comparison of the first n characters.
10444
     *
10445
     * EXAMPLE: <code>
10446
     * UTF8::strcasecmp("iñtërnâtiôn\nàlizætiøn321", "iñtërnâtiôn\nàlizætiøn123", 5); // 0
10447
     * </code>
10448
     *
10449
     * @see http://php.net/manual/en/function.strncasecmp.php
10450
     *
10451
     * @param string $str1     <p>The first string.</p>
10452
     * @param string $str2     <p>The second string.</p>
10453
     * @param int    $len      <p>The length of strings to be used in the comparison.</p>
10454
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
10455
     *
10456
     * @psalm-pure
10457
     *
10458
     * @return int
10459
     *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
10460
     *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
10461
     *             <strong>0</strong> if they are equal
10462
     */
10463 2
    public static function strncasecmp(
10464
        string $str1,
10465
        string $str2,
10466
        int $len,
10467
        string $encoding = 'UTF-8'
10468
    ): int {
10469 2
        return self::strncmp(
10470 2
            self::strtocasefold($str1, true, false, $encoding, null, false),
10471 2
            self::strtocasefold($str2, true, false, $encoding, null, false),
10472 2
            $len
10473
        );
10474
    }
10475
10476
    /**
10477
     * String comparison of the first n characters.
10478
     *
10479
     * EXAMPLE: <code>
10480
     * UTF8::strncmp("Iñtërnâtiôn\nàlizætiøn321", "Iñtërnâtiôn\nàlizætiøn123", 5); // 0
10481
     * </code>
10482
     *
10483
     * @see http://php.net/manual/en/function.strncmp.php
10484
     *
10485
     * @param string $str1     <p>The first string.</p>
10486
     * @param string $str2     <p>The second string.</p>
10487
     * @param int    $len      <p>Number of characters to use in the comparison.</p>
10488
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
10489
     *
10490
     * @psalm-pure
10491
     *
10492
     * @return int
10493
     *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
10494
     *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
10495
     *             <strong>0</strong> if they are equal
10496
     */
10497 4
    public static function strncmp(
10498
        string $str1,
10499
        string $str2,
10500
        int $len,
10501
        string $encoding = 'UTF-8'
10502
    ): int {
10503 4
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10504
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10505
        }
10506
10507 4
        if ($encoding === 'UTF-8') {
10508 4
            $str1 = (string) \mb_substr($str1, 0, $len);
10509 4
            $str2 = (string) \mb_substr($str2, 0, $len);
10510
        } else {
10511
            $str1 = (string) self::substr($str1, 0, $len, $encoding);
10512
            $str2 = (string) self::substr($str2, 0, $len, $encoding);
10513
        }
10514
10515 4
        return self::strcmp($str1, $str2);
10516
    }
10517
10518
    /**
10519
     * Search a string for any of a set of characters.
10520
     *
10521
     * EXAMPLE: <code>UTF8::strpbrk('-中文空白-', '白'); // '白-'</code>
10522
     *
10523
     * @see http://php.net/manual/en/function.strpbrk.php
10524
     *
10525
     * @param string $haystack  <p>The string where char_list is looked for.</p>
10526
     * @param string $char_list <p>This parameter is case-sensitive.</p>
10527
     *
10528
     * @psalm-pure
10529
     *
10530
     * @return false|string
10531
     *                      <p>The string starting from the character found, or false if it is not found.</p>
10532
     */
10533 2
    public static function strpbrk(string $haystack, string $char_list)
10534
    {
10535 2
        if ($haystack === '' || $char_list === '') {
10536 2
            return false;
10537
        }
10538
10539 2
        if (\preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
10540 2
            return \substr($haystack, (int) \strpos($haystack, $m[0]));
10541
        }
10542
10543 2
        return false;
10544
    }
10545
10546
    /**
10547
     * Find the position of the first occurrence of a substring in a string.
10548
     *
10549
     * INFO: use UTF8::strpos_in_byte() for the byte-length
10550
     *
10551
     * EXAMPLE: <code>UTF8::strpos('ABC-ÖÄÜ-中文空白-中文空白', '中'); // 8</code>
10552
     *
10553
     * @see http://php.net/manual/en/function.mb-strpos.php
10554
     *
10555
     * @param string     $haystack   <p>The string from which to get the position of the first occurrence of needle.</p>
10556
     * @param int|string $needle     <p>The string to find in haystack.<br>Or a code point as int.</p>
10557
     * @param int        $offset     [optional] <p>The search offset. If it is not specified, 0 is used.</p>
10558
     * @param string     $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
10559
     * @param bool       $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10560
     *
10561
     * @psalm-pure
10562
     *
10563
     * @return false|int
10564
     *                   The <strong>(int)</strong> numeric position of the first occurrence of needle in the haystack
10565
     *                   string.<br> If needle is not found it returns false.
10566
     */
10567 52
    public static function strpos(
10568
        string $haystack,
10569
        $needle,
10570
        int $offset = 0,
10571
        string $encoding = 'UTF-8',
10572
        bool $clean_utf8 = false
10573
    ) {
10574 52
        if ($haystack === '') {
10575 4
            if (\PHP_VERSION_ID >= 80000) {
10576
                if ($needle === '') {
10577
                    return 0;
10578
                }
10579
            } else {
10580 4
                return false;
10581
            }
10582
        }
10583
10584
        // iconv and mbstring do not support integer $needle
10585 51
        if ((int) $needle === $needle) {
10586
            $needle = (string) self::chr($needle);
10587
        }
10588 51
        $needle = (string) $needle;
10589
10590 51
        if ($haystack === '') {
10591
            if (\PHP_VERSION_ID >= 80000 && $needle === '') {
10592
                return 0;
10593
            }
10594
10595
            return false;
10596
        }
10597
10598 51
        if ($needle === '' && \PHP_VERSION_ID < 80000) {
10599 2
            return false;
10600
        }
10601
10602 51
        if ($clean_utf8) {
10603
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10604
            // if invalid characters are found in $haystack before $needle
10605 3
            $needle = self::clean($needle);
10606 3
            $haystack = self::clean($haystack);
10607
        }
10608
10609 51
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10610 10
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10611
        }
10612
10613
        //
10614
        // fallback via mbstring
10615
        //
10616
10617 51
        if (self::$SUPPORT['mbstring'] === true) {
10618 49
            if ($encoding === 'UTF-8') {
10619
                /** @noinspection PhpUsageOfSilenceOperatorInspection - Offset not contained in string */
10620 49
                return @\mb_strpos($haystack, $needle, $offset);
10621
            }
10622
10623
            /** @noinspection PhpUsageOfSilenceOperatorInspection - Offset not contained in string */
10624 2
            return @\mb_strpos($haystack, $needle, $offset, $encoding);
10625
        }
10626
10627
        //
10628
        // fallback for binary || ascii only
10629
        //
10630
        if (
10631 4
            $encoding === 'CP850'
10632
            ||
10633 4
            $encoding === 'ASCII'
10634
        ) {
10635 2
            return \strpos($haystack, $needle, $offset);
10636
        }
10637
10638
        if (
10639 4
            $encoding !== 'UTF-8'
10640
            &&
10641 4
            self::$SUPPORT['iconv'] === false
10642
            &&
10643 4
            self::$SUPPORT['mbstring'] === false
10644
        ) {
10645
            /**
10646
             * @psalm-suppress ImpureFunctionCall - is is only a warning
10647
             */
10648 2
            \trigger_error('UTF8::strpos() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10649
        }
10650
10651
        //
10652
        // fallback via intl
10653
        //
10654
10655
        if (
10656 4
            $encoding === 'UTF-8' // INFO: "grapheme_strpos()" can't handle other encodings
10657
            &&
10658 4
            $offset >= 0 // grapheme_strpos() can't handle negative offset
10659
            &&
10660 4
            self::$SUPPORT['intl'] === true
10661
        ) {
10662
            $return_tmp = \grapheme_strpos($haystack, $needle, $offset);
10663
            if ($return_tmp !== false) {
10664
                return $return_tmp;
10665
            }
10666
        }
10667
10668
        //
10669
        // fallback via iconv
10670
        //
10671
10672
        if (
10673 4
            $offset >= 0 // iconv_strpos() can't handle negative offset
10674
            &&
10675 4
            self::$SUPPORT['iconv'] === true
10676
        ) {
10677
            // ignore invalid negative offset to keep compatibility
10678
            // with php < 5.5.35, < 5.6.21, < 7.0.6
10679
            $return_tmp = \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
10680
            if ($return_tmp !== false) {
10681
                return $return_tmp;
10682
            }
10683
        }
10684
10685
        //
10686
        // fallback for ascii only
10687
        //
10688
10689 4
        if (ASCII::is_ascii($haystack . $needle)) {
10690
            /** @noinspection PhpUsageOfSilenceOperatorInspection - Offset not contained in string */
10691 2
            return @\strpos($haystack, $needle, $offset);
10692
        }
10693
10694
        //
10695
        // fallback via vanilla php
10696
        //
10697
10698 4
        $haystack_tmp = self::substr($haystack, $offset, null, $encoding);
10699 4
        if ($haystack_tmp === false) {
10700
            $haystack_tmp = '';
10701
        }
10702 4
        $haystack = (string) $haystack_tmp;
10703
10704 4
        if ($offset < 0) {
10705
            $offset = 0;
10706
        }
10707
10708 4
        $pos = \strpos($haystack, $needle);
10709 4
        if ($pos === false) {
10710 3
            return false;
10711
        }
10712
10713 4
        if ($pos) {
10714 4
            return $offset + (int) self::strlen(\substr($haystack, 0, $pos), $encoding);
10715
        }
10716
10717 2
        return $offset + 0;
10718
    }
10719
10720
    /**
10721
     * Find the position of the first occurrence of a substring in a string.
10722
     *
10723
     * @param string $haystack <p>
10724
     *                         The string being checked.
10725
     *                         </p>
10726
     * @param string $needle   <p>
10727
     *                         The position counted from the beginning of haystack.
10728
     *                         </p>
10729
     * @param int    $offset   [optional] <p>
10730
     *                         The search offset. If it is not specified, 0 is used.
10731
     *                         </p>
10732
     *
10733
     * @psalm-pure
10734
     *
10735
     * @return false|int
10736
     *                   <p>The numeric position of the first occurrence of needle in the
10737
     *                   haystack string. If needle is not found, it returns false.</p>
10738
     */
10739 2
    public static function strpos_in_byte(string $haystack, string $needle, int $offset = 0)
10740
    {
10741 2
        if ($haystack === '' || $needle === '') {
10742
            return false;
10743
        }
10744
10745 2
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10746
            // "mb_" is available if overload is used, so use it ...
10747
            return \mb_strpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
10748
        }
10749
10750 2
        return \strpos($haystack, $needle, $offset);
10751
    }
10752
10753
    /**
10754
     * Find the position of the first occurrence of a substring in a string, case-insensitive.
10755
     *
10756
     * @param string $haystack <p>
10757
     *                         The string being checked.
10758
     *                         </p>
10759
     * @param string $needle   <p>
10760
     *                         The position counted from the beginning of haystack.
10761
     *                         </p>
10762
     * @param int    $offset   [optional] <p>
10763
     *                         The search offset. If it is not specified, 0 is used.
10764
     *                         </p>
10765
     *
10766
     * @psalm-pure
10767
     *
10768
     * @return false|int
10769
     *                   <p>The numeric position of the first occurrence of needle in the
10770
     *                   haystack string. If needle is not found, it returns false.</p>
10771
     */
10772 2
    public static function stripos_in_byte(string $haystack, string $needle, int $offset = 0)
10773
    {
10774 2
        if ($haystack === '' || $needle === '') {
10775
            return false;
10776
        }
10777
10778 2
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10779
            // "mb_" is available if overload is used, so use it ...
10780
            return \mb_stripos($haystack, $needle, $offset, 'CP850'); // 8-BIT
10781
        }
10782
10783 2
        return \stripos($haystack, $needle, $offset);
10784
    }
10785
10786
    /**
10787
     * Find the last occurrence of a character in a string within another.
10788
     *
10789
     * EXAMPLE: <code>UTF8::strrchr('κόσμεκόσμε-äöü', 'κόσμε'); // 'κόσμε-äöü'</code>
10790
     *
10791
     * @see http://php.net/manual/en/function.mb-strrchr.php
10792
     *
10793
     * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
10794
     * @param string $needle        <p>The string to find in haystack</p>
10795
     * @param bool   $before_needle [optional] <p>
10796
     *                              Determines which portion of haystack
10797
     *                              this function returns.
10798
     *                              If set to true, it returns all of haystack
10799
     *                              from the beginning to the last occurrence of needle.
10800
     *                              If set to false, it returns all of haystack
10801
     *                              from the last occurrence of needle to the end,
10802
     *                              </p>
10803
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
10804
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
10805
     *
10806
     * @psalm-pure
10807
     *
10808
     * @return false|string
10809
     *                      <p>The portion of haystack or false if needle is not found.</p>
10810
     */
10811 2
    public static function strrchr(
10812
        string $haystack,
10813
        string $needle,
10814
        bool $before_needle = false,
10815
        string $encoding = 'UTF-8',
10816
        bool $clean_utf8 = false
10817
    ) {
10818 2
        if ($haystack === '' || $needle === '') {
10819 2
            return false;
10820
        }
10821
10822 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10823 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10824
        }
10825
10826 2
        if ($clean_utf8) {
10827
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10828
            // if invalid characters are found in $haystack before $needle
10829 2
            $needle = self::clean($needle);
10830 2
            $haystack = self::clean($haystack);
10831
        }
10832
10833
        //
10834
        // fallback via mbstring
10835
        //
10836
10837 2
        if (self::$SUPPORT['mbstring'] === true) {
10838 2
            if ($encoding === 'UTF-8') {
10839 2
                return \mb_strrchr($haystack, $needle, $before_needle);
10840
            }
10841
10842 2
            return \mb_strrchr($haystack, $needle, $before_needle, $encoding);
10843
        }
10844
10845
        //
10846
        // fallback for binary || ascii only
10847
        //
10848
10849
        if (
10850
            !$before_needle
10851
            &&
10852
            (
10853
                $encoding === 'CP850'
10854
                ||
10855
                $encoding === 'ASCII'
10856
            )
10857
        ) {
10858
            return \strrchr($haystack, $needle);
10859
        }
10860
10861
        if (
10862
            $encoding !== 'UTF-8'
10863
            &&
10864
            self::$SUPPORT['mbstring'] === false
10865
        ) {
10866
            /**
10867
             * @psalm-suppress ImpureFunctionCall - is is only a warning
10868
             */
10869
            \trigger_error('UTF8::strrchr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10870
        }
10871
10872
        //
10873
        // fallback via iconv
10874
        //
10875
10876
        if (self::$SUPPORT['iconv'] === true) {
10877
            $needle_tmp = self::substr($needle, 0, 1, $encoding);
10878
            if ($needle_tmp === false) {
10879
                return false;
10880
            }
10881
            $needle = (string) $needle_tmp;
10882
10883
            $pos = \iconv_strrpos($haystack, $needle, $encoding);
10884
            if ($pos === false) {
10885
                return false;
10886
            }
10887
10888
            if ($before_needle) {
10889
                return self::substr($haystack, 0, $pos, $encoding);
10890
            }
10891
10892
            return self::substr($haystack, $pos, null, $encoding);
10893
        }
10894
10895
        //
10896
        // fallback via vanilla php
10897
        //
10898
10899
        $needle_tmp = self::substr($needle, 0, 1, $encoding);
10900
        if ($needle_tmp === false) {
10901
            return false;
10902
        }
10903
        $needle = (string) $needle_tmp;
10904
10905
        $pos = self::strrpos($haystack, $needle, 0, $encoding);
10906
        if ($pos === false) {
10907
            return false;
10908
        }
10909
10910
        if ($before_needle) {
10911
            return self::substr($haystack, 0, $pos, $encoding);
10912
        }
10913
10914
        return self::substr($haystack, $pos, null, $encoding);
10915
    }
10916
10917
    /**
10918
     * Reverses characters order in the string.
10919
     *
10920
     * EXAMPLE: <code>UTF8::strrev('κ-öäü'); // 'üäö-κ'</code>
10921
     *
10922
     * @param string $str      <p>The input string.</p>
10923
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
10924
     *
10925
     * @psalm-pure
10926
     *
10927
     * @return string
10928
     *                <p>The string with characters in the reverse sequence.</p>
10929
     */
10930 10
    public static function strrev(string $str, string $encoding = 'UTF-8'): string
10931
    {
10932 10
        if ($str === '') {
10933 4
            return '';
10934
        }
10935
10936
        // init
10937 8
        $reversed = '';
10938
10939 8
        $str = self::emoji_encode($str, true);
10940
10941 8
        if ($encoding === 'UTF-8') {
10942 8
            if (self::$SUPPORT['intl'] === true) {
10943
                // try "grapheme" first: https://stackoverflow.com/questions/17496493/strrev-dosent-support-utf-8
10944 8
                $i = (int) \grapheme_strlen($str);
10945 8
                while ($i--) {
10946 8
                    $reversed_tmp = \grapheme_substr($str, $i, 1);
10947 8
                    if ($reversed_tmp !== false) {
10948 8
                        $reversed .= $reversed_tmp;
10949
                    }
10950
                }
10951
            } else {
10952
                $i = (int) \mb_strlen($str);
10953 8
                while ($i--) {
10954
                    $reversed_tmp = \mb_substr($str, $i, 1);
10955
                    if ($reversed_tmp !== false) {
10956
                        $reversed .= $reversed_tmp;
10957
                    }
10958
                }
10959
            }
10960
        } else {
10961
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10962
10963
            $i = (int) self::strlen($str, $encoding);
10964
            while ($i--) {
10965
                $reversed_tmp = self::substr($str, $i, 1, $encoding);
10966
                if ($reversed_tmp !== false) {
10967
                    $reversed .= $reversed_tmp;
10968
                }
10969
            }
10970
        }
10971
10972 8
        return self::emoji_decode($reversed, true);
10973
    }
10974
10975
    /**
10976
     * Find the last occurrence of a character in a string within another, case-insensitive.
10977
     *
10978
     * EXAMPLE: <code>UTF8::strrichr('Aκόσμεκόσμε-äöü', 'aκόσμε'); // 'Aκόσμεκόσμε-äöü'</code>
10979
     *
10980
     * @see http://php.net/manual/en/function.mb-strrichr.php
10981
     *
10982
     * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
10983
     * @param string $needle        <p>The string to find in haystack.</p>
10984
     * @param bool   $before_needle [optional] <p>
10985
     *                              Determines which portion of haystack
10986
     *                              this function returns.
10987
     *                              If set to true, it returns all of haystack
10988
     *                              from the beginning to the last occurrence of needle.
10989
     *                              If set to false, it returns all of haystack
10990
     *                              from the last occurrence of needle to the end,
10991
     *                              </p>
10992
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
10993
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
10994
     *
10995
     * @psalm-pure
10996
     *
10997
     * @return false|string
10998
     *                      <p>The portion of haystack or<br>false if needle is not found.</p>
10999
     */
11000 3
    public static function strrichr(
11001
        string $haystack,
11002
        string $needle,
11003
        bool $before_needle = false,
11004
        string $encoding = 'UTF-8',
11005
        bool $clean_utf8 = false
11006
    ) {
11007 3
        if ($haystack === '' || $needle === '') {
11008 2
            return false;
11009
        }
11010
11011 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
11012 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11013
        }
11014
11015 3
        if ($clean_utf8) {
11016
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11017
            // if invalid characters are found in $haystack before $needle
11018 2
            $needle = self::clean($needle);
11019 2
            $haystack = self::clean($haystack);
11020
        }
11021
11022
        //
11023
        // fallback via mbstring
11024
        //
11025
11026 3
        if (self::$SUPPORT['mbstring'] === true) {
11027 3
            if ($encoding === 'UTF-8') {
11028 3
                return \mb_strrichr($haystack, $needle, $before_needle);
11029
            }
11030
11031 2
            return \mb_strrichr($haystack, $needle, $before_needle, $encoding);
11032
        }
11033
11034
        //
11035
        // fallback via vanilla php
11036
        //
11037
11038
        $needle_tmp = self::substr($needle, 0, 1, $encoding);
11039
        if ($needle_tmp === false) {
11040
            return false;
11041
        }
11042
        $needle = (string) $needle_tmp;
11043
11044
        $pos = self::strripos($haystack, $needle, 0, $encoding);
11045
        if ($pos === false) {
11046
            return false;
11047
        }
11048
11049
        if ($before_needle) {
11050
            return self::substr($haystack, 0, $pos, $encoding);
11051
        }
11052
11053
        return self::substr($haystack, $pos, null, $encoding);
11054
    }
11055
11056
    /**
11057
     * Find the position of the last occurrence of a substring in a string, case-insensitive.
11058
     *
11059
     * EXAMPLE: <code>UTF8::strripos('ABC-ÖÄÜ-中文空白-中文空白', '中'); // 13</code>
11060
     *
11061
     * @param string     $haystack   <p>The string to look in.</p>
11062
     * @param int|string $needle     <p>The string to look for.</p>
11063
     * @param int        $offset     [optional] <p>Number of characters to ignore in the beginning or end.</p>
11064
     * @param string     $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
11065
     * @param bool       $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
11066
     *
11067
     * @psalm-pure
11068
     *
11069
     * @return false|int
11070
     *                   <p>The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
11071
     *                   string.<br>If needle is not found, it returns false.</p>
11072
     */
11073 14
    public static function strripos(
11074
        string $haystack,
11075
        $needle,
11076
        int $offset = 0,
11077
        string $encoding = 'UTF-8',
11078
        bool $clean_utf8 = false
11079
    ) {
11080 14
        if ($haystack === '') {
11081 3
            if (\PHP_VERSION_ID >= 80000) {
11082
                if ($needle === '') {
11083
                    return 0;
11084
                }
11085
            } else {
11086 3
                return false;
11087
            }
11088
        }
11089
11090
        // iconv and mbstring do not support integer $needle
11091 14
        if ((int) $needle === $needle && $needle >= 0) {
11092
            $needle = (string) self::chr($needle);
11093
        }
11094 14
        $needle = (string) $needle;
11095
11096 14
        if ($haystack === '') {
11097
            if (\PHP_VERSION_ID >= 80000 && $needle === '') {
11098
                return 0;
11099
            }
11100
11101
            return false;
11102
        }
11103
11104 14
        if ($needle === '' && \PHP_VERSION_ID < 80000) {
11105 3
            return false;
11106
        }
11107
11108 14
        if ($clean_utf8) {
11109
            // mb_strripos() && iconv_strripos() is not tolerant to invalid characters
11110 3
            $needle = self::clean($needle);
11111 3
            $haystack = self::clean($haystack);
11112
        }
11113
11114 14
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
11115 9
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11116
        }
11117
11118
        //
11119
        // fallback via mbstrig
11120
        //
11121
11122 14
        if (self::$SUPPORT['mbstring'] === true) {
11123 14
            if ($encoding === 'UTF-8') {
11124 14
                return \mb_strripos($haystack, $needle, $offset);
11125
            }
11126
11127
            return \mb_strripos($haystack, $needle, $offset, $encoding);
11128
        }
11129
11130
        //
11131
        // fallback for binary || ascii only
11132
        //
11133
11134
        if (
11135
            $encoding === 'CP850'
11136
            ||
11137
            $encoding === 'ASCII'
11138
        ) {
11139
            return \strripos($haystack, $needle, $offset);
11140
        }
11141
11142
        if (
11143
            $encoding !== 'UTF-8'
11144
            &&
11145
            self::$SUPPORT['mbstring'] === false
11146
        ) {
11147
            /**
11148
             * @psalm-suppress ImpureFunctionCall - is is only a warning
11149
             */
11150
            \trigger_error('UTF8::strripos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
11151
        }
11152
11153
        //
11154
        // fallback via intl
11155
        //
11156
11157
        if (
11158
            $encoding === 'UTF-8' // INFO: "grapheme_strripos()" can't handle other encodings
11159
            &&
11160
            $offset >= 0 // grapheme_strripos() can't handle negative offset
11161
            &&
11162
            self::$SUPPORT['intl'] === true
11163
        ) {
11164
            $return_tmp = \grapheme_strripos($haystack, $needle, $offset);
11165
            if ($return_tmp !== false) {
11166
                return $return_tmp;
11167
            }
11168
        }
11169
11170
        //
11171
        // fallback for ascii only
11172
        //
11173
11174
        if (ASCII::is_ascii($haystack . $needle)) {
11175
            return \strripos($haystack, $needle, $offset);
11176
        }
11177
11178
        //
11179
        // fallback via vanilla php
11180
        //
11181
11182
        $haystack = self::strtocasefold($haystack, true, false, $encoding);
11183
        $needle = self::strtocasefold($needle, true, false, $encoding);
11184
11185
        return self::strrpos($haystack, $needle, $offset, $encoding, $clean_utf8);
11186
    }
11187
11188
    /**
11189
     * Finds position of last occurrence of a string within another, case-insensitive.
11190
     *
11191
     * @param string $haystack <p>
11192
     *                         The string from which to get the position of the last occurrence
11193
     *                         of needle.
11194
     *                         </p>
11195
     * @param string $needle   <p>
11196
     *                         The string to find in haystack.
11197
     *                         </p>
11198
     * @param int    $offset   [optional] <p>
11199
     *                         The position in haystack
11200
     *                         to start searching.
11201
     *                         </p>
11202
     *
11203
     * @psalm-pure
11204
     *
11205
     * @return false|int
11206
     *                   <p>eturn the numeric position of the last occurrence of needle in the
11207
     *                   haystack string, or false if needle is not found.</p>
11208
     */
11209 2
    public static function strripos_in_byte(string $haystack, string $needle, int $offset = 0)
11210
    {
11211 2
        if ($haystack === '' || $needle === '') {
11212
            return false;
11213
        }
11214
11215 2
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
11216
            // "mb_" is available if overload is used, so use it ...
11217
            return \mb_strripos($haystack, $needle, $offset, 'CP850'); // 8-BIT
11218
        }
11219
11220 2
        return \strripos($haystack, $needle, $offset);
11221
    }
11222
11223
    /**
11224
     * Find the position of the last occurrence of a substring in a string.
11225
     *
11226
     * EXAMPLE: <code>UTF8::strrpos('ABC-ÖÄÜ-中文空白-中文空白', '中'); // 13</code>
11227
     *
11228
     * @see http://php.net/manual/en/function.mb-strrpos.php
11229
     *
11230
     * @param string     $haystack   <p>The string being checked, for the last occurrence of needle</p>
11231
     * @param int|string $needle     <p>The string to find in haystack.<br>Or a code point as int.</p>
11232
     * @param int        $offset     [optional] <p>May be specified to begin searching an arbitrary number of characters
11233
     *                               into the string. Negative values will stop searching at an arbitrary point prior to
11234
     *                               the end of the string.
11235
     *                               </p>
11236
     * @param string     $encoding   [optional] <p>Set the charset.</p>
11237
     * @param bool       $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
11238
     *
11239
     * @psalm-pure
11240
     *
11241
     * @return false|int
11242
     *                   <p>The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
11243
     *                   string.<br>If needle is not found, it returns false.</p>
11244
     */
11245 35
    public static function strrpos(
11246
        string $haystack,
11247
        $needle,
11248
        int $offset = 0,
11249
        string $encoding = 'UTF-8',
11250
        bool $clean_utf8 = false
11251
    ) {
11252 35
        if ($haystack === '') {
11253 4
            if (\PHP_VERSION_ID >= 80000) {
11254
                if ($needle === '') {
11255
                    return 0;
11256
                }
11257
            } else {
11258 4
                return false;
11259
            }
11260
        }
11261
11262
        // iconv and mbstring do not support integer $needle
11263 34
        if ((int) $needle === $needle && $needle >= 0) {
11264 1
            $needle = (string) self::chr($needle);
11265
        }
11266 34
        $needle = (string) $needle;
11267
11268 34
        if ($haystack === '') {
11269
            if (\PHP_VERSION_ID >= 80000 && $needle === '') {
11270
                return 0;
11271
            }
11272
11273
            return false;
11274
        }
11275
11276 34
        if ($needle === '' && \PHP_VERSION_ID < 80000) {
11277 2
            return false;
11278
        }
11279
11280 34
        if ($clean_utf8) {
11281
            // mb_strrpos && iconv_strrpos is not tolerant to invalid characters
11282 4
            $needle = self::clean($needle);
11283 4
            $haystack = self::clean($haystack);
11284
        }
11285
11286 34
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
11287 8
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11288
        }
11289
11290
        //
11291
        // fallback via mbstring
11292
        //
11293
11294 34
        if (self::$SUPPORT['mbstring'] === true) {
11295 34
            if ($encoding === 'UTF-8') {
11296 34
                return \mb_strrpos($haystack, $needle, $offset);
11297
            }
11298
11299 2
            return \mb_strrpos($haystack, $needle, $offset, $encoding);
11300
        }
11301
11302
        //
11303
        // fallback for binary || ascii only
11304
        //
11305
11306
        if (
11307
            $encoding === 'CP850'
11308
            ||
11309
            $encoding === 'ASCII'
11310
        ) {
11311
            return \strrpos($haystack, $needle, $offset);
11312
        }
11313
11314
        if (
11315
            $encoding !== 'UTF-8'
11316
            &&
11317
            self::$SUPPORT['mbstring'] === false
11318
        ) {
11319
            /**
11320
             * @psalm-suppress ImpureFunctionCall - is is only a warning
11321
             */
11322
            \trigger_error('UTF8::strrpos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
11323
        }
11324
11325
        //
11326
        // fallback via intl
11327
        //
11328
11329
        if (
11330
            $offset >= 0 // grapheme_strrpos() can't handle negative offset
11331
            &&
11332
            $encoding === 'UTF-8' // INFO: "grapheme_strrpos()" can't handle other encodings
11333
            &&
11334
            self::$SUPPORT['intl'] === true
11335
        ) {
11336
            $return_tmp = \grapheme_strrpos($haystack, $needle, $offset);
11337
            if ($return_tmp !== false) {
11338
                return $return_tmp;
11339
            }
11340
        }
11341
11342
        //
11343
        // fallback for ascii only
11344
        //
11345
11346
        if (ASCII::is_ascii($haystack . $needle)) {
11347
            return \strrpos($haystack, $needle, $offset);
11348
        }
11349
11350
        //
11351
        // fallback via vanilla php
11352
        //
11353
11354
        $haystack_tmp = null;
11355
        if ($offset > 0) {
11356
            $haystack_tmp = self::substr($haystack, $offset);
11357
        } elseif ($offset < 0) {
11358
            $haystack_tmp = self::substr($haystack, 0, $offset);
11359
            $offset = 0;
11360
        }
11361
11362
        if ($haystack_tmp !== null) {
11363
            if ($haystack_tmp === false) {
11364
                $haystack_tmp = '';
11365
            }
11366
            $haystack = (string) $haystack_tmp;
11367
        }
11368
11369
        $pos = \strrpos($haystack, $needle);
11370
        if ($pos === false) {
11371
            return false;
11372
        }
11373
11374
        /** @var false|string $str_tmp - needed for PhpStan (stubs error) */
11375
        $str_tmp = \substr($haystack, 0, $pos);
11376
        if ($str_tmp === false) {
11377
            return false;
11378
        }
11379
11380
        return $offset + (int) self::strlen($str_tmp);
11381
    }
11382
11383
    /**
11384
     * Find the position of the last occurrence of a substring in a string.
11385
     *
11386
     * @param string $haystack <p>
11387
     *                         The string being checked, for the last occurrence
11388
     *                         of needle.
11389
     *                         </p>
11390
     * @param string $needle   <p>
11391
     *                         The string to find in haystack.
11392
     *                         </p>
11393
     * @param int    $offset   [optional] <p>May be specified to begin searching an arbitrary number of characters into
11394
     *                         the string. Negative values will stop searching at an arbitrary point
11395
     *                         prior to the end of the string.
11396
     *                         </p>
11397
     *
11398
     * @psalm-pure
11399
     *
11400
     * @return false|int
11401
     *                   <p>The numeric position of the last occurrence of needle in the
11402
     *                   haystack string. If needle is not found, it returns false.</p>
11403
     */
11404 2
    public static function strrpos_in_byte(string $haystack, string $needle, int $offset = 0)
11405
    {
11406 2
        if ($haystack === '' || $needle === '') {
11407
            return false;
11408
        }
11409
11410 2
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
11411
            // "mb_" is available if overload is used, so use it ...
11412
            return \mb_strrpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
11413
        }
11414
11415 2
        return \strrpos($haystack, $needle, $offset);
11416
    }
11417
11418
    /**
11419
     * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
11420
     * mask.
11421
     *
11422
     * EXAMPLE: <code>UTF8::strspn('iñtërnâtiônàlizætiøn', 'itñ'); // '3'</code>
11423
     *
11424
     * @param string   $str      <p>The input string.</p>
11425
     * @param string   $mask     <p>The mask of chars</p>
11426
     * @param int      $offset   [optional]
11427
     * @param int|null $length   [optional]
11428
     * @param string   $encoding [optional] <p>Set the charset.</p>
11429
     *
11430
     * @psalm-pure
11431
     *
11432
     * @return false|int
11433
     */
11434 10
    public static function strspn(
11435
        string $str,
11436
        string $mask,
11437
        int $offset = 0,
11438
        int $length = null,
11439
        string $encoding = 'UTF-8'
11440
    ) {
11441 10
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
11442
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11443
        }
11444
11445 10
        if ($offset || $length !== null) {
11446 2
            if ($encoding === 'UTF-8') {
11447 2
                if ($length === null) {
11448
                    $str = (string) \mb_substr($str, $offset);
11449
                } else {
11450 2
                    $str = (string) \mb_substr($str, $offset, $length);
11451
                }
11452
            } else {
11453
                $str = (string) self::substr($str, $offset, $length, $encoding);
11454
            }
11455
        }
11456
11457 10
        if ($str === '' || $mask === '') {
11458 2
            return 0;
11459
        }
11460
11461 8
        $matches = [];
11462
11463 8
        return \preg_match('/^' . self::rxClass($mask) . '+/u', $str, $matches) ? (int) self::strlen($matches[0], $encoding) : 0;
11464
    }
11465
11466
    /**
11467
     * Returns part of haystack string from the first occurrence of needle to the end of haystack.
11468
     *
11469
     * EXAMPLE: <code>
11470
     * $str = 'iñtërnâtiônàlizætiøn';
11471
     * $search = 'nât';
11472
     *
11473
     * UTF8::strstr($str, $search)); // 'nâtiônàlizætiøn'
11474
     * UTF8::strstr($str, $search, true)); // 'iñtër'
11475
     * </code>
11476
     *
11477
     * @param string $haystack      <p>The input string. Must be valid UTF-8.</p>
11478
     * @param string $needle        <p>The string to look for. Must be valid UTF-8.</p>
11479
     * @param bool   $before_needle [optional] <p>
11480
     *                              If <b>TRUE</b>, strstr() returns the part of the
11481
     *                              haystack before the first occurrence of the needle (excluding the needle).
11482
     *                              </p>
11483
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
11484
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
11485
     *
11486
     * @psalm-pure
11487
     *
11488
     * @return false|string
11489
     *                      <p>A sub-string,<br>or <strong>false</strong> if needle is not found.</p>
11490
     */
11491 3
    public static function strstr(
11492
        string $haystack,
11493
        string $needle,
11494
        bool $before_needle = false,
11495
        string $encoding = 'UTF-8',
11496
        bool $clean_utf8 = false
11497
    ) {
11498 3
        if ($haystack === '') {
11499 2
            if (\PHP_VERSION_ID >= 80000 && $needle === '') {
11500
                return '';
11501
            }
11502
11503 2
            return false;
11504
        }
11505
11506 3
        if ($clean_utf8) {
11507
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11508
            // if invalid characters are found in $haystack before $needle
11509
            $needle = self::clean($needle);
11510
            $haystack = self::clean($haystack);
11511
        }
11512
11513 3
        if ($needle === '') {
11514 1
            if (\PHP_VERSION_ID >= 80000) {
11515
                return $haystack;
11516
            }
11517
11518 1
            return false;
11519
        }
11520
11521 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
11522 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11523
        }
11524
11525
        //
11526
        // fallback via mbstring
11527
        //
11528
11529 3
        if (self::$SUPPORT['mbstring'] === true) {
11530 3
            if ($encoding === 'UTF-8') {
11531 3
                return \mb_strstr($haystack, $needle, $before_needle);
11532
            }
11533
11534 2
            return \mb_strstr($haystack, $needle, $before_needle, $encoding);
11535
        }
11536
11537
        //
11538
        // fallback for binary || ascii only
11539
        //
11540
11541
        if (
11542
            $encoding === 'CP850'
11543
            ||
11544
            $encoding === 'ASCII'
11545
        ) {
11546
            return \strstr($haystack, $needle, $before_needle);
11547
        }
11548
11549
        if (
11550
            $encoding !== 'UTF-8'
11551
            &&
11552
            self::$SUPPORT['mbstring'] === false
11553
        ) {
11554
            /**
11555
             * @psalm-suppress ImpureFunctionCall - is is only a warning
11556
             */
11557
            \trigger_error('UTF8::strstr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
11558
        }
11559
11560
        //
11561
        // fallback via intl
11562
        //
11563
11564
        if (
11565
            $encoding === 'UTF-8' // INFO: "grapheme_strstr()" can't handle other encodings
11566
            &&
11567
            self::$SUPPORT['intl'] === true
11568
        ) {
11569
            $return_tmp = \grapheme_strstr($haystack, $needle, $before_needle);
11570
            if ($return_tmp !== false) {
11571
                return $return_tmp;
11572
            }
11573
        }
11574
11575
        //
11576
        // fallback for ascii only
11577
        //
11578
11579
        if (ASCII::is_ascii($haystack . $needle)) {
11580
            return \strstr($haystack, $needle, $before_needle);
11581
        }
11582
11583
        //
11584
        // fallback via vanilla php
11585
        //
11586
11587
        \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/us', $haystack, $match);
11588
11589
        if (!isset($match[1])) {
11590
            return false;
11591
        }
11592
11593
        if ($before_needle) {
11594
            return $match[1];
11595
        }
11596
11597
        return self::substr($haystack, (int) self::strlen($match[1]));
11598
    }
11599
11600
    /**
11601
     * Finds first occurrence of a string within another.
11602
     *
11603
     * @param string $haystack      <p>
11604
     *                              The string from which to get the first occurrence
11605
     *                              of needle.
11606
     *                              </p>
11607
     * @param string $needle        <p>
11608
     *                              The string to find in haystack.
11609
     *                              </p>
11610
     * @param bool   $before_needle [optional] <p>
11611
     *                              Determines which portion of haystack
11612
     *                              this function returns.
11613
     *                              If set to true, it returns all of haystack
11614
     *                              from the beginning to the first occurrence of needle.
11615
     *                              If set to false, it returns all of haystack
11616
     *                              from the first occurrence of needle to the end,
11617
     *                              </p>
11618
     *
11619
     * @psalm-pure
11620
     *
11621
     * @return false|string
11622
     *                      <p>The portion of haystack,
11623
     *                      or false if needle is not found.</p>
11624
     */
11625 2
    public static function strstr_in_byte(
11626
        string $haystack,
11627
        string $needle,
11628
        bool $before_needle = false
11629
    ) {
11630 2
        if ($haystack === '' || $needle === '') {
11631
            return false;
11632
        }
11633
11634 2
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
11635
            // "mb_" is available if overload is used, so use it ...
11636
            return \mb_strstr($haystack, $needle, $before_needle, 'CP850'); // 8-BIT
11637
        }
11638
11639 2
        return \strstr($haystack, $needle, $before_needle);
11640
    }
11641
11642
    /**
11643
     * Unicode transformation for case-less matching.
11644
     *
11645
     * EXAMPLE: <code>UTF8::strtocasefold('ǰ◌̱'); // 'ǰ◌̱'</code>
11646
     *
11647
     * @see http://unicode.org/reports/tr21/tr21-5.html
11648
     *
11649
     * @param string      $str        <p>The input string.</p>
11650
     * @param bool        $full       [optional] <p>
11651
     *                                <b>true</b>, replace full case folding chars (default)<br>
11652
     *                                <b>false</b>, use only limited static array [UTF8::$COMMON_CASE_FOLD]
11653
     *                                </p>
11654
     * @param bool        $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
11655
     * @param string      $encoding   [optional] <p>Set the charset.</p>
11656
     * @param string|null $lang       [optional] <p>Set the language for special cases: az, el, lt, tr</p>
11657
     * @param bool        $lower      [optional] <p>Use lowercase string, otherwise use uppercase string. PS: uppercase
11658
     *                                is for some languages better ...</p>
11659
     *
11660
     * @psalm-pure
11661
     *
11662
     * @return string
11663
     */
11664 32
    public static function strtocasefold(
11665
        string $str,
11666
        bool $full = true,
11667
        bool $clean_utf8 = false,
11668
        string $encoding = 'UTF-8',
11669
        string $lang = null,
11670
        bool $lower = true
11671
    ): string {
11672 32
        if ($str === '') {
11673 5
            return '';
11674
        }
11675
11676 31
        if ($clean_utf8) {
11677
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11678
            // if invalid characters are found in $haystack before $needle
11679 2
            $str = self::clean($str);
11680
        }
11681
11682 31
        $str = self::fixStrCaseHelper($str, $lower, $full);
11683
11684 31
        if ($lang === null && $encoding === 'UTF-8') {
11685 31
            if ($lower) {
11686 2
                return \mb_strtolower($str);
11687
            }
11688
11689 29
            return \mb_strtoupper($str);
11690
        }
11691
11692 2
        if ($lower) {
11693
            return self::strtolower($str, $encoding, false, $lang);
11694
        }
11695
11696 2
        return self::strtoupper($str, $encoding, false, $lang);
11697
    }
11698
11699
    /**
11700
     * Make a string lowercase.
11701
     *
11702
     * EXAMPLE: <code>UTF8::strtolower('DÉJÀ Σσς Iıİi'); // 'déjà σσς iıii'</code>
11703
     *
11704
     * @see http://php.net/manual/en/function.mb-strtolower.php
11705
     *
11706
     * @param string      $str                           <p>The string being lowercased.</p>
11707
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
11708
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
11709
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
11710
     *                                                   tr</p>
11711
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
11712
     *                                                   -> ß</p>
11713
     *
11714
     * @psalm-pure
11715
     *
11716
     * @return string
11717
     *                <p>String with all alphabetic characters converted to lowercase.</p>
11718
     */
11719 73
    public static function strtolower(
11720
        $str,
11721
        string $encoding = 'UTF-8',
11722
        bool $clean_utf8 = false,
11723
        string $lang = null,
11724
        bool $try_to_keep_the_string_length = false
11725
    ): string {
11726
        // init
11727 73
        $str = (string) $str;
11728
11729 73
        if ($str === '') {
11730 1
            return '';
11731
        }
11732
11733 72
        if ($clean_utf8) {
11734
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11735
            // if invalid characters are found in $haystack before $needle
11736 2
            $str = self::clean($str);
11737
        }
11738
11739
        // hack for old php version or for the polyfill ...
11740 72
        if ($try_to_keep_the_string_length) {
11741
            $str = self::fixStrCaseHelper($str, true);
11742
        }
11743
11744 72
        if ($lang === null && $encoding === 'UTF-8') {
11745 13
            return \mb_strtolower($str);
11746
        }
11747
11748 61
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
11749
11750 61
        if ($lang !== null) {
11751 2
            if (self::$SUPPORT['intl'] === true) {
11752 2
                if (self::$INTL_TRANSLITERATOR_LIST === null) {
11753
                    self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
11754
                }
11755
11756 2
                $language_code = $lang . '-Lower';
11757 2
                if (!\in_array($language_code, self::$INTL_TRANSLITERATOR_LIST, true)) {
11758
                    /**
11759
                     * @psalm-suppress ImpureFunctionCall - is is only a warning
11760
                     */
11761
                    \trigger_error('UTF8::strtolower() cannot handle special language: ' . $lang . ' | supported: ' . \print_r(self::$INTL_TRANSLITERATOR_LIST, true), \E_USER_WARNING);
0 ignored issues
show
Bug introduced by
Are you sure print_r(self::INTL_TRANSLITERATOR_LIST, true) of type string|true can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

11761
                    \trigger_error('UTF8::strtolower() cannot handle special language: ' . $lang . ' | supported: ' . /** @scrutinizer ignore-type */ \print_r(self::$INTL_TRANSLITERATOR_LIST, true), \E_USER_WARNING);
Loading history...
11762
11763
                    $language_code = 'Any-Lower';
11764
                }
11765
11766
                /** @noinspection PhpComposerExtensionStubsInspection */
11767
                /** @noinspection UnnecessaryCastingInspection */
11768 2
                return (string) \transliterator_transliterate($language_code, $str);
11769
            }
11770
11771
            /**
11772
             * @psalm-suppress ImpureFunctionCall - is is only a warning
11773
             */
11774
            \trigger_error('UTF8::strtolower() without intl cannot handle the "lang" parameter: ' . $lang, \E_USER_WARNING);
11775
        }
11776
11777
        // always fallback via symfony polyfill
11778 61
        return \mb_strtolower($str, $encoding);
11779
    }
11780
11781
    /**
11782
     * Make a string uppercase.
11783
     *
11784
     * EXAMPLE: <code>UTF8::strtoupper('Déjà Σσς Iıİi'); // 'DÉJÀ ΣΣΣ IIİI'</code>
11785
     *
11786
     * @see http://php.net/manual/en/function.mb-strtoupper.php
11787
     *
11788
     * @param string      $str                           <p>The string being uppercased.</p>
11789
     * @param string      $encoding                      [optional] <p>Set the charset.</p>
11790
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
11791
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
11792
     *                                                   tr</p>
11793
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
11794
     *                                                   -> ß</p>
11795
     *
11796
     * @psalm-pure
11797
     *
11798
     * @return string
11799
     *                <p>String with all alphabetic characters converted to uppercase.</p>
11800
     */
11801 17
    public static function strtoupper(
11802
        $str,
11803
        string $encoding = 'UTF-8',
11804
        bool $clean_utf8 = false,
11805
        string $lang = null,
11806
        bool $try_to_keep_the_string_length = false
11807
    ): string {
11808
        // init
11809 17
        $str = (string) $str;
11810
11811 17
        if ($str === '') {
11812 1
            return '';
11813
        }
11814
11815 16
        if ($clean_utf8) {
11816
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11817
            // if invalid characters are found in $haystack before $needle
11818 2
            $str = self::clean($str);
11819
        }
11820
11821
        // hack for old php version or for the polyfill ...
11822 16
        if ($try_to_keep_the_string_length) {
11823 2
            $str = self::fixStrCaseHelper($str);
11824
        }
11825
11826 16
        if ($lang === null && $encoding === 'UTF-8') {
11827 8
            return \mb_strtoupper($str);
11828
        }
11829
11830 10
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
11831
11832 10
        if ($lang !== null) {
11833 2
            if (self::$SUPPORT['intl'] === true) {
11834 2
                if (self::$INTL_TRANSLITERATOR_LIST === null) {
11835
                    self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
11836
                }
11837
11838 2
                $language_code = $lang . '-Upper';
11839 2
                if (!\in_array($language_code, self::$INTL_TRANSLITERATOR_LIST, true)) {
11840
                    /**
11841
                     * @psalm-suppress ImpureFunctionCall - is is only a warning
11842
                     */
11843
                    \trigger_error('UTF8::strtoupper() without intl for special language: ' . $lang, \E_USER_WARNING);
11844
11845
                    $language_code = 'Any-Upper';
11846
                }
11847
11848
                /** @noinspection PhpComposerExtensionStubsInspection */
11849
                /** @noinspection UnnecessaryCastingInspection */
11850 2
                return (string) \transliterator_transliterate($language_code, $str);
11851
            }
11852
11853
            /**
11854
             * @psalm-suppress ImpureFunctionCall - is is only a warning
11855
             */
11856
            \trigger_error('UTF8::strtolower() without intl cannot handle the "lang"-parameter: ' . $lang, \E_USER_WARNING);
11857
        }
11858
11859
        // always fallback via symfony polyfill
11860 10
        return \mb_strtoupper($str, $encoding);
11861
    }
11862
11863
    /**
11864
     * Translate characters or replace sub-strings.
11865
     *
11866
     * EXAMPLE:
11867
     * <code>
11868
     * $array = [
11869
     *     'Hello'   => '○●◎',
11870
     *     '中文空白' => 'earth',
11871
     * ];
11872
     * UTF8::strtr('Hello 中文空白', $array); // '○●◎ earth'
11873
     * </code>
11874
     *
11875
     * @see http://php.net/manual/en/function.strtr.php
11876
     *
11877
     * @param string          $str  <p>The string being translated.</p>
11878
     * @param string|string[] $from <p>The string replacing from.</p>
11879
     * @param string|string[] $to   [optional] <p>The string being translated to to.</p>
11880
     *
11881
     * @psalm-pure
11882
     *
11883
     * @return string
11884
     *                <p>This function returns a copy of str, translating all occurrences of each character in "from"
11885
     *                to the corresponding character in "to".</p>
11886
     */
11887 2
    public static function strtr(string $str, $from, $to = ''): string
11888
    {
11889 2
        if ($str === '') {
11890
            return '';
11891
        }
11892
11893 2
        if ($from === $to) {
11894
            return $str;
11895
        }
11896
11897 2
        if ($to !== '') {
11898 2
            if (!\is_array($from)) {
11899 2
                $from = self::str_split($from);
11900
            }
11901
11902 2
            if (!\is_array($to)) {
11903 2
                $to = self::str_split($to);
11904
            }
11905
11906 2
            $count_from = \count($from);
11907 2
            $count_to = \count($to);
11908
11909 2
            if ($count_from !== $count_to) {
11910 2
                if ($count_from > $count_to) {
11911 2
                    $from = \array_slice($from, 0, $count_to);
11912 2
                } elseif ($count_from < $count_to) {
11913 2
                    $to = \array_slice($to, 0, $count_from);
11914
                }
11915
            }
11916
11917 2
            $from = \array_combine($from, $to);
11918
            /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
11919 2
            if ($from === false) {
11920
                throw new \InvalidArgumentException('The number of elements for each array isn\'t equal or the arrays are empty: (from: ' . \print_r($from, true) . ' | to: ' . \print_r($to, true) . ')');
0 ignored issues
show
Bug introduced by
Are you sure print_r($from, true) of type string|true can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

11920
                throw new \InvalidArgumentException('The number of elements for each array isn\'t equal or the arrays are empty: (from: ' . /** @scrutinizer ignore-type */ \print_r($from, true) . ' | to: ' . \print_r($to, true) . ')');
Loading history...
Bug introduced by
Are you sure print_r($to, true) of type string|true can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

11920
                throw new \InvalidArgumentException('The number of elements for each array isn\'t equal or the arrays are empty: (from: ' . \print_r($from, true) . ' | to: ' . /** @scrutinizer ignore-type */ \print_r($to, true) . ')');
Loading history...
11921
            }
11922
        }
11923
11924 2
        if (\is_string($from)) {
11925 2
            return \str_replace($from, $to, $str);
0 ignored issues
show
Bug introduced by
It seems like $to can also be of type array<mixed,string[]>; however, parameter $replace of str_replace() does only seem to accept string|string[], maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

11925
            return \str_replace($from, /** @scrutinizer ignore-type */ $to, $str);
Loading history...
11926
        }
11927
11928 2
        return \strtr($str, $from);
11929
    }
11930
11931
    /**
11932
     * Return the width of a string.
11933
     *
11934
     * INFO: use UTF8::strlen() for the byte-length
11935
     *
11936
     * EXAMPLE: <code>UTF8::strwidth("Iñtërnâtiôn\xE9àlizætiøn")); // 21</code>
11937
     *
11938
     * @param string $str        <p>The input string.</p>
11939
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
11940
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
11941
     *
11942
     * @psalm-pure
11943
     *
11944
     * @return int
11945
     */
11946 2
    public static function strwidth(
11947
        string $str,
11948
        string $encoding = 'UTF-8',
11949
        bool $clean_utf8 = false
11950
    ): int {
11951 2
        if ($str === '') {
11952 2
            return 0;
11953
        }
11954
11955 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
11956 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11957
        }
11958
11959 2
        if ($clean_utf8) {
11960
            // iconv and mbstring are not tolerant to invalid encoding
11961
            // further, their behaviour is inconsistent with that of PHP's substr
11962 2
            $str = self::clean($str);
11963
        }
11964
11965
        //
11966
        // fallback via mbstring
11967
        //
11968
11969 2
        if (self::$SUPPORT['mbstring'] === true) {
11970 2
            if ($encoding === 'UTF-8') {
11971 2
                return \mb_strwidth($str);
11972
            }
11973
11974
            return \mb_strwidth($str, $encoding);
11975
        }
11976
11977
        //
11978
        // fallback via vanilla php
11979
        //
11980
11981
        if ($encoding !== 'UTF-8') {
11982
            $str = self::encode('UTF-8', $str, false, $encoding);
11983
        }
11984
11985
        $wide = 0;
11986
        $str = (string) \preg_replace('/[\x{1100}-\x{115F}\x{2329}\x{232A}\x{2E80}-\x{303E}\x{3040}-\x{A4CF}\x{AC00}-\x{D7A3}\x{F900}-\x{FAFF}\x{FE10}-\x{FE19}\x{FE30}-\x{FE6F}\x{FF00}-\x{FF60}\x{FFE0}-\x{FFE6}\x{20000}-\x{2FFFD}\x{30000}-\x{3FFFD}]/u', '', $str, -1, $wide);
11987
11988
        return ($wide << 1) + (int) self::strlen($str);
11989
    }
11990
11991
    /**
11992
     * Get part of a string.
11993
     *
11994
     * EXAMPLE: <code>UTF8::substr('中文空白', 1, 2); // '文空'</code>
11995
     *
11996
     * @see http://php.net/manual/en/function.mb-substr.php
11997
     *
11998
     * @param string   $str        <p>The string being checked.</p>
11999
     * @param int      $offset     <p>The first position used in str.</p>
12000
     * @param int|null $length     [optional] <p>The maximum length of the returned string.</p>
12001
     * @param string   $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
12002
     * @param bool     $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
12003
     *
12004
     * @psalm-pure
12005
     *
12006
     * @return false|string
12007
     *                      The portion of <i>str</i> specified by the <i>offset</i> and
12008
     *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
12009
     *                      characters long, <b>FALSE</b> will be returned.
12010
     */
12011 172
    public static function substr(
12012
        string $str,
12013
        int $offset = 0,
12014
        int $length = null,
12015
        string $encoding = 'UTF-8',
12016
        bool $clean_utf8 = false
12017
    ) {
12018
        // empty string
12019 172
        if ($str === '' || $length === 0) {
12020 8
            return '';
12021
        }
12022
12023 168
        if ($clean_utf8) {
12024
            // iconv and mbstring are not tolerant to invalid encoding
12025
            // further, their behaviour is inconsistent with that of PHP's substr
12026 2
            $str = self::clean($str);
12027
        }
12028
12029
        // whole string
12030 168
        if (!$offset && $length === null) {
12031 7
            return $str;
12032
        }
12033
12034 163
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
12035 19
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
12036
        }
12037
12038
        //
12039
        // fallback via mbstring
12040
        //
12041
12042 163
        if (self::$SUPPORT['mbstring'] === true && $encoding === 'UTF-8') {
12043 161
            if ($length === null) {
12044 64
                return \mb_substr($str, $offset);
12045
            }
12046
12047 102
            return \mb_substr($str, $offset, $length);
12048
        }
12049
12050
        //
12051
        // fallback for binary || ascii only
12052
        //
12053
12054
        if (
12055 4
            $encoding === 'CP850'
12056
            ||
12057 4
            $encoding === 'ASCII'
12058
        ) {
12059
            if ($length === null) {
12060
                return \substr($str, $offset);
12061
            }
12062
12063
            return \substr($str, $offset, $length);
12064
        }
12065
12066
        // otherwise we need the string-length
12067 4
        $str_length = 0;
12068 4
        if ($offset || $length === null) {
12069 4
            $str_length = self::strlen($str, $encoding);
12070
        }
12071
12072
        // e.g.: invalid chars + mbstring not installed
12073 4
        if ($str_length === false) {
12074
            return false;
12075
        }
12076
12077
        // empty string
12078 4
        if ($offset === $str_length && !$length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type integer|null is loosely compared to false; this is ambiguous if the integer can be 0. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
12079
            return '';
12080
        }
12081
12082
        // impossible
12083 4
        if ($offset && $offset > $str_length) {
12084
            return '';
12085
        }
12086
12087 4
        $length = $length ?? (int) $str_length;
12088
12089
        if (
12090 4
            $encoding !== 'UTF-8'
12091
            &&
12092 4
            self::$SUPPORT['mbstring'] === false
12093
        ) {
12094
            /**
12095
             * @psalm-suppress ImpureFunctionCall - is is only a warning
12096
             */
12097 2
            \trigger_error('UTF8::substr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
12098
        }
12099
12100
        //
12101
        // fallback via intl
12102
        //
12103
12104
        if (
12105 4
            $encoding === 'UTF-8' // INFO: "grapheme_substr()" can't handle other encodings
12106
            &&
12107 4
            $offset >= 0 // grapheme_substr() can't handle negative offset
12108
            &&
12109 4
            self::$SUPPORT['intl'] === true
12110
        ) {
12111
            $return_tmp = \grapheme_substr($str, $offset, $length);
12112
            if ($return_tmp !== false) {
12113
                return $return_tmp;
12114
            }
12115
        }
12116
12117
        //
12118
        // fallback via iconv
12119
        //
12120
12121
        if (
12122 4
            $length >= 0 // "iconv_substr()" can't handle negative length
12123
            &&
12124 4
            self::$SUPPORT['iconv'] === true
12125
        ) {
12126
            $return_tmp = \iconv_substr($str, $offset, $length);
12127
            if ($return_tmp !== false) {
12128
                return $return_tmp;
12129
            }
12130
        }
12131
12132
        //
12133
        // fallback for ascii only
12134
        //
12135
12136 4
        if (ASCII::is_ascii($str)) {
12137
            return \substr($str, $offset, $length);
12138
        }
12139
12140
        //
12141
        // fallback via vanilla php
12142
        //
12143
12144
        // split to array, and remove invalid characters
12145
        // &&
12146
        // extract relevant part, and join to make sting again
12147 4
        return \implode('', \array_slice(self::str_split($str), $offset, $length));
12148
    }
12149
12150
    /**
12151
     * Binary-safe comparison of two strings from an offset, up to a length of characters.
12152
     *
12153
     * EXAMPLE: <code>
12154
     * UTF8::substr_compare("○●◎\r", '●◎', 0, 2); // -1
12155
     * UTF8::substr_compare("○●◎\r", '◎●', 1, 2); // 1
12156
     * UTF8::substr_compare("○●◎\r", '●◎', 1, 2); // 0
12157
     * </code>
12158
     *
12159
     * @param string   $str1               <p>The main string being compared.</p>
12160
     * @param string   $str2               <p>The secondary string being compared.</p>
12161
     * @param int      $offset             [optional] <p>The start position for the comparison. If negative, it starts
12162
     *                                     counting from the end of the string.</p>
12163
     * @param int|null $length             [optional] <p>The length of the comparison. The default value is the largest
12164
     *                                     of the length of the str compared to the length of main_str less the
12165
     *                                     offset.</p>
12166
     * @param bool     $case_insensitivity [optional] <p>If case_insensitivity is TRUE, comparison is case
12167
     *                                     insensitive.</p>
12168
     * @param string   $encoding           [optional] <p>Set the charset for e.g. "mb_" function</p>
12169
     *
12170
     * @psalm-pure
12171
     *
12172
     * @return int
12173
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
12174
     *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
12175
     *             <strong>0</strong> if they are equal
12176
     */
12177 2
    public static function substr_compare(
12178
        string $str1,
12179
        string $str2,
12180
        int $offset = 0,
12181
        int $length = null,
12182
        bool $case_insensitivity = false,
12183
        string $encoding = 'UTF-8'
12184
    ): int {
12185
        if (
12186 2
            $offset !== 0
12187
            ||
12188 2
            $length !== null
12189
        ) {
12190 2
            if ($encoding === 'UTF-8') {
12191 2
                if ($length === null) {
12192 2
                    $str1 = (string) \mb_substr($str1, $offset);
12193
                } else {
12194 2
                    $str1 = (string) \mb_substr($str1, $offset, $length);
12195
                }
12196 2
                $str2 = (string) \mb_substr($str2, 0, (int) self::strlen($str1));
12197
            } else {
12198
                $encoding = self::normalize_encoding($encoding, 'UTF-8');
12199
12200
                $str1 = (string) self::substr($str1, $offset, $length, $encoding);
12201
                $str2 = (string) self::substr($str2, 0, (int) self::strlen($str1), $encoding);
12202
            }
12203
        }
12204
12205 2
        if ($case_insensitivity) {
12206 2
            return self::strcasecmp($str1, $str2, $encoding);
12207
        }
12208
12209 2
        return self::strcmp($str1, $str2);
12210
    }
12211
12212
    /**
12213
     * Count the number of substring occurrences.
12214
     *
12215
     * EXAMPLE: <code>UTF8::substr_count('中文空白', '文空', 1, 2); // 1</code>
12216
     *
12217
     * @see http://php.net/manual/en/function.substr-count.php
12218
     *
12219
     * @param string   $haystack   <p>The string to search in.</p>
12220
     * @param string   $needle     <p>The substring to search for.</p>
12221
     * @param int      $offset     [optional] <p>The offset where to start counting.</p>
12222
     * @param int|null $length     [optional] <p>
12223
     *                             The maximum length after the specified offset to search for the
12224
     *                             substring. It outputs a warning if the offset plus the length is
12225
     *                             greater than the haystack length.
12226
     *                             </p>
12227
     * @param string   $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
12228
     * @param bool     $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
12229
     *
12230
     * @psalm-pure
12231
     *
12232
     * @return false|int
12233
     *                   <p>This functions returns an integer or false if there isn't a string.</p>
12234
     */
12235 5
    public static function substr_count(
12236
        string $haystack,
12237
        string $needle,
12238
        int $offset = 0,
12239
        int $length = null,
12240
        string $encoding = 'UTF-8',
12241
        bool $clean_utf8 = false
12242
    ) {
12243 5
        if ($needle === '') {
12244 2
            return false;
12245
        }
12246
12247 5
        if ($haystack === '') {
12248 2
            if (\PHP_VERSION_ID >= 80000) {
12249
                return 0;
12250
            }
12251
12252 2
            return 0;
12253
        }
12254
12255 5
        if ($length === 0) {
12256 2
            return 0;
12257
        }
12258
12259 5
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
12260 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
12261
        }
12262
12263 5
        if ($clean_utf8) {
12264
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
12265
            // if invalid characters are found in $haystack before $needle
12266
            $needle = self::clean($needle);
12267
            $haystack = self::clean($haystack);
12268
        }
12269
12270 5
        if ($offset || $length > 0) {
12271 2
            if ($length === null) {
12272 2
                $length_tmp = self::strlen($haystack, $encoding);
12273 2
                if ($length_tmp === false) {
12274
                    return false;
12275
                }
12276 2
                $length = (int) $length_tmp;
12277
            }
12278
12279 2
            if ($encoding === 'UTF-8') {
12280 2
                $haystack = (string) \mb_substr($haystack, $offset, $length);
12281
            } else {
12282 2
                $haystack = (string) \mb_substr($haystack, $offset, $length, $encoding);
12283
            }
12284
        }
12285
12286
        if (
12287 5
            $encoding !== 'UTF-8'
12288
            &&
12289 5
            self::$SUPPORT['mbstring'] === false
12290
        ) {
12291
            /**
12292
             * @psalm-suppress ImpureFunctionCall - is is only a warning
12293
             */
12294
            \trigger_error('UTF8::substr_count() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
12295
        }
12296
12297 5
        if (self::$SUPPORT['mbstring'] === true) {
12298 5
            if ($encoding === 'UTF-8') {
12299 5
                return \mb_substr_count($haystack, $needle);
12300
            }
12301
12302 2
            return \mb_substr_count($haystack, $needle, $encoding);
12303
        }
12304
12305
        \preg_match_all('/' . \preg_quote($needle, '/') . '/us', $haystack, $matches, \PREG_SET_ORDER);
12306
12307
        return \count($matches);
12308
    }
12309
12310
    /**
12311
     * Count the number of substring occurrences.
12312
     *
12313
     * @param string   $haystack <p>
12314
     *                           The string being checked.
12315
     *                           </p>
12316
     * @param string   $needle   <p>
12317
     *                           The string being found.
12318
     *                           </p>
12319
     * @param int      $offset   [optional] <p>
12320
     *                           The offset where to start counting
12321
     *                           </p>
12322
     * @param int|null $length   [optional] <p>
12323
     *                           The maximum length after the specified offset to search for the
12324
     *                           substring. It outputs a warning if the offset plus the length is
12325
     *                           greater than the haystack length.
12326
     *                           </p>
12327
     *
12328
     * @psalm-pure
12329
     *
12330
     * @return false|int
12331
     *                   <p>The number of times the
12332
     *                   needle substring occurs in the
12333
     *                   haystack string.</p>
12334
     */
12335 4
    public static function substr_count_in_byte(
12336
        string $haystack,
12337
        string $needle,
12338
        int $offset = 0,
12339
        int $length = null
12340
    ) {
12341 4
        if ($haystack === '' || $needle === '') {
12342 1
            return 0;
12343
        }
12344
12345
        if (
12346 3
            ($offset || $length !== null)
12347
            &&
12348 3
            self::$SUPPORT['mbstring_func_overload'] === true
12349
        ) {
12350
            if ($length === null) {
12351
                $length_tmp = self::strlen($haystack);
12352
                if ($length_tmp === false) {
12353
                    return false;
12354
                }
12355
                $length = (int) $length_tmp;
12356
            }
12357
12358
            if (
12359
                (
12360
                    $length !== 0
12361
                    &&
12362
                    $offset !== 0
12363
                )
12364
                &&
12365
                ($length + $offset) <= 0
12366
                &&
12367
                \PHP_VERSION_ID < 71000 // output from "substr_count()" have changed in PHP 7.1
12368
            ) {
12369
                return false;
12370
            }
12371
12372
            /** @var false|string $haystack_tmp - needed for PhpStan (stubs error) */
12373
            $haystack_tmp = \substr($haystack, $offset, $length);
12374
            if ($haystack_tmp === false) {
12375
                $haystack_tmp = '';
12376
            }
12377
            $haystack = (string) $haystack_tmp;
12378
        }
12379
12380 3
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
12381
            // "mb_" is available if overload is used, so use it ...
12382
            return \mb_substr_count($haystack, $needle, 'CP850'); // 8-BIT
12383
        }
12384
12385 3
        if ($length === null) {
12386 3
            return \substr_count($haystack, $needle, $offset);
12387
        }
12388
12389
        return \substr_count($haystack, $needle, $offset, $length);
12390
    }
12391
12392
    /**
12393
     * Returns the number of occurrences of $substring in the given string.
12394
     * By default, the comparison is case-sensitive, but can be made insensitive
12395
     * by setting $case_sensitive to false.
12396
     *
12397
     * @param string $str            <p>The input string.</p>
12398
     * @param string $substring      <p>The substring to search for.</p>
12399
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
12400
     * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
12401
     *
12402
     * @psalm-pure
12403
     *
12404
     * @return int
12405
     */
12406 15
    public static function substr_count_simple(
12407
        string $str,
12408
        string $substring,
12409
        bool $case_sensitive = true,
12410
        string $encoding = 'UTF-8'
12411
    ): int {
12412 15
        if ($str === '' || $substring === '') {
12413 2
            return 0;
12414
        }
12415
12416 13
        if ($encoding === 'UTF-8') {
12417 7
            if ($case_sensitive) {
12418
                return (int) \mb_substr_count($str, $substring);
12419
            }
12420
12421 7
            return (int) \mb_substr_count(
12422 7
                \mb_strtoupper($str),
12423 7
                \mb_strtoupper($substring)
12424
            );
12425
        }
12426
12427 6
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
12428
12429 6
        if ($case_sensitive) {
12430 3
            return (int) \mb_substr_count($str, $substring, $encoding);
12431
        }
12432
12433 3
        return (int) \mb_substr_count(
12434 3
            self::strtocasefold($str, true, false, $encoding, null, false),
12435 3
            self::strtocasefold($substring, true, false, $encoding, null, false),
12436 3
            $encoding
12437
        );
12438
    }
12439
12440
    /**
12441
     * Removes a prefix ($needle) from the beginning of the string ($haystack), case-insensitive.
12442
     *
12443
     * EXMAPLE: <code>
12444
     * UTF8::substr_ileft('ΚόσμεMiddleEnd', 'Κόσμε'); // 'MiddleEnd'
12445
     * UTF8::substr_ileft('ΚόσμεMiddleEnd', 'κόσμε'); // 'MiddleEnd'
12446
     * </code>
12447
     *
12448
     * @param string $haystack <p>The string to search in.</p>
12449
     * @param string $needle   <p>The substring to search for.</p>
12450
     *
12451
     * @psalm-pure
12452
     *
12453
     * @return string
12454
     *                <p>Return the sub-string.</p>
12455
     */
12456 2
    public static function substr_ileft(string $haystack, string $needle): string
12457
    {
12458 2
        if ($haystack === '') {
12459 2
            return '';
12460
        }
12461
12462 2
        if ($needle === '') {
12463 2
            return $haystack;
12464
        }
12465
12466 2
        if (self::str_istarts_with($haystack, $needle)) {
12467 2
            $haystack = (string) \mb_substr($haystack, (int) self::strlen($needle));
12468
        }
12469
12470 2
        return $haystack;
12471
    }
12472
12473
    /**
12474
     * Get part of a string process in bytes.
12475
     *
12476
     * @param string   $str    <p>The string being checked.</p>
12477
     * @param int      $offset <p>The first position used in str.</p>
12478
     * @param int|null $length [optional] <p>The maximum length of the returned string.</p>
12479
     *
12480
     * @psalm-pure
12481
     *
12482
     * @return false|string
12483
     *                      The portion of <i>str</i> specified by the <i>offset</i> and
12484
     *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
12485
     *                      characters long, <b>FALSE</b> will be returned.
12486
     */
12487 1
    public static function substr_in_byte(string $str, int $offset = 0, int $length = null)
12488
    {
12489
        // empty string
12490 1
        if ($str === '' || $length === 0) {
12491
            return '';
12492
        }
12493
12494
        // whole string
12495 1
        if (!$offset && $length === null) {
12496
            return $str;
12497
        }
12498
12499 1
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
12500
            // "mb_" is available if overload is used, so use it ...
12501
            return \mb_substr($str, $offset, $length, 'CP850'); // 8-BIT
12502
        }
12503
12504 1
        return \substr($str, $offset, $length ?? 2147483647);
12505
    }
12506
12507
    /**
12508
     * Removes a suffix ($needle) from the end of the string ($haystack), case-insensitive.
12509
     *
12510
     * EXAMPLE: <code>
12511
     * UTF8::substr_iright('BeginMiddleΚόσμε', 'Κόσμε'); // 'BeginMiddle'
12512
     * UTF8::substr_iright('BeginMiddleΚόσμε', 'κόσμε'); // 'BeginMiddle'
12513
     * </code>
12514
     *
12515
     * @param string $haystack <p>The string to search in.</p>
12516
     * @param string $needle   <p>The substring to search for.</p>
12517
     *
12518
     * @psalm-pure
12519
     *
12520
     * @return string
12521
     *                <p>Return the sub-string.<p>
12522
     */
12523 2
    public static function substr_iright(string $haystack, string $needle): string
12524
    {
12525 2
        if ($haystack === '') {
12526 2
            return '';
12527
        }
12528
12529 2
        if ($needle === '') {
12530 2
            return $haystack;
12531
        }
12532
12533 2
        if (self::str_iends_with($haystack, $needle)) {
12534 2
            $haystack = (string) \mb_substr($haystack, 0, (int) self::strlen($haystack) - (int) self::strlen($needle));
12535
        }
12536
12537 2
        return $haystack;
12538
    }
12539
12540
    /**
12541
     * Removes a prefix ($needle) from the beginning of the string ($haystack).
12542
     *
12543
     * EXAMPLE: <code>
12544
     * UTF8::substr_left('ΚόσμεMiddleEnd', 'Κόσμε'); // 'MiddleEnd'
12545
     * UTF8::substr_left('ΚόσμεMiddleEnd', 'κόσμε'); // 'ΚόσμεMiddleEnd'
12546
     * </code>
12547
     *
12548
     * @param string $haystack <p>The string to search in.</p>
12549
     * @param string $needle   <p>The substring to search for.</p>
12550
     *
12551
     * @psalm-pure
12552
     *
12553
     * @return string
12554
     *                <p>Return the sub-string.</p>
12555
     */
12556 2
    public static function substr_left(string $haystack, string $needle): string
12557
    {
12558 2
        if ($haystack === '') {
12559 2
            return '';
12560
        }
12561
12562 2
        if ($needle === '') {
12563 2
            return $haystack;
12564
        }
12565
12566 2
        if (self::str_starts_with($haystack, $needle)) {
12567 2
            $haystack = (string) \mb_substr($haystack, (int) self::strlen($needle));
12568
        }
12569
12570 2
        return $haystack;
12571
    }
12572
12573
    /**
12574
     * Replace text within a portion of a string.
12575
     *
12576
     * EXAMPLE: <code>UTF8::substr_replace(array('Iñtërnâtiônàlizætiøn', 'foo'), 'æ', 1); // array('Iæñtërnâtiônàlizætiøn', 'fæoo')</code>
12577
     *
12578
     * source: https://gist.github.com/stemar/8287074
12579
     *
12580
     * @param string|string[] $str         <p>The input string or an array of stings.</p>
12581
     * @param string|string[] $replacement <p>The replacement string or an array of stings.</p>
12582
     * @param int|int[]       $offset      <p>
12583
     *                                     If start is positive, the replacing will begin at the start'th offset
12584
     *                                     into string.
12585
     *                                     <br><br>
12586
     *                                     If start is negative, the replacing will begin at the start'th character
12587
     *                                     from the end of string.
12588
     *                                     </p>
12589
     * @param int|int[]|null  $length      [optional] <p>If given and is positive, it represents the length of the
12590
     *                                     portion of string which is to be replaced. If it is negative, it
12591
     *                                     represents the number of characters from the end of string at which to
12592
     *                                     stop replacing. If it is not given, then it will default to strlen(
12593
     *                                     string ); i.e. end the replacing at the end of string. Of course, if
12594
     *                                     length is zero then this function will have the effect of inserting
12595
     *                                     replacement into string at the given start offset.</p>
12596
     * @param string          $encoding    [optional] <p>Set the charset for e.g. "mb_" function</p>
12597
     *
12598
     * @psalm-pure
12599
     *
12600
     * @return string|string[]
12601
     *                         <p>The result string is returned. If string is an array then array is returned.</p>
12602
     */
12603 10
    public static function substr_replace(
12604
        $str,
12605
        $replacement,
12606
        $offset,
12607
        $length = null,
12608
        string $encoding = 'UTF-8'
12609
    ) {
12610 10
        if (\is_array($str)) {
12611 1
            $num = \count($str);
12612
12613
            // the replacement
12614 1
            if (\is_array($replacement)) {
12615 1
                $replacement = \array_slice($replacement, 0, $num);
12616
            } else {
12617 1
                $replacement = \array_pad([$replacement], $num, $replacement);
12618
            }
12619
12620
            // the offset
12621 1
            if (\is_array($offset)) {
12622 1
                $offset = \array_slice($offset, 0, $num);
12623 1
                foreach ($offset as &$value_tmp) {
12624 1
                    $value_tmp = (int) $value_tmp === $value_tmp ? $value_tmp : 0;
12625
                }
12626 1
                unset($value_tmp);
12627
            } else {
12628 1
                $offset = \array_pad([$offset], $num, $offset);
12629
            }
12630
12631
            // the length
12632 1
            if ($length === null) {
12633 1
                $length = \array_fill(0, $num, 0);
12634 1
            } elseif (\is_array($length)) {
12635 1
                $length = \array_slice($length, 0, $num);
12636 1
                foreach ($length as &$value_tmp_V2) {
12637 1
                    $value_tmp_V2 = (int) $value_tmp_V2 === $value_tmp_V2 ? $value_tmp_V2 : $num;
12638
                }
12639 1
                unset($value_tmp_V2);
12640
            } else {
12641 1
                $length = \array_pad([$length], $num, $length);
12642
            }
12643
12644
            // recursive call
12645 1
            return \array_map([self::class, 'substr_replace'], $str, $replacement, $offset, $length);
12646
        }
12647
12648 10
        if (\is_array($replacement)) {
12649 1
            if ($replacement !== []) {
12650 1
                $replacement = $replacement[0];
12651
            } else {
12652 1
                $replacement = '';
12653
            }
12654
        }
12655
12656
        // init
12657 10
        $str = (string) $str;
12658 10
        $replacement = (string) $replacement;
12659
12660 10
        if (\is_array($length)) {
12661
            throw new \InvalidArgumentException('Parameter "$length" can only be an array, if "$str" is also an array.');
12662
        }
12663
12664 10
        if (\is_array($offset)) {
12665
            throw new \InvalidArgumentException('Parameter "$offset" can only be an array, if "$str" is also an array.');
12666
        }
12667
12668 10
        if ($str === '') {
12669 1
            return $replacement;
12670
        }
12671
12672 9
        if (self::$SUPPORT['mbstring'] === true) {
12673 9
            $string_length = (int) self::strlen($str, $encoding);
12674
12675 9
            if ($offset < 0) {
12676 1
                $offset = (int) \max(0, $string_length + $offset);
12677 9
            } elseif ($offset > $string_length) {
12678 1
                $offset = $string_length;
12679
            }
12680
12681 9
            if ($length !== null && $length < 0) {
12682 1
                $length = (int) \max(0, $string_length - $offset + $length);
12683 9
            } elseif ($length === null || $length > $string_length) {
12684 4
                $length = $string_length;
12685
            }
12686
12687
            /** @noinspection AdditionOperationOnArraysInspection */
12688 9
            if (($offset + $length) > $string_length) {
12689 4
                $length = $string_length - $offset;
12690
            }
12691
12692
            /** @noinspection AdditionOperationOnArraysInspection */
12693 9
            return ((string) \mb_substr($str, 0, $offset, $encoding)) .
12694 9
                   $replacement .
12695 9
                   ((string) \mb_substr($str, $offset + $length, $string_length - $offset - $length, $encoding));
12696
        }
12697
12698
        //
12699
        // fallback for ascii only
12700
        //
12701
12702
        if (ASCII::is_ascii($str)) {
12703
            return ($length === null) ?
12704
                \substr_replace($str, $replacement, $offset) :
12705
                \substr_replace($str, $replacement, $offset, $length);
12706
        }
12707
12708
        //
12709
        // fallback via vanilla php
12710
        //
12711
12712
        \preg_match_all('/./us', $str, $str_matches);
12713
        \preg_match_all('/./us', $replacement, $replacement_matches);
12714
12715
        if ($length === null) {
12716
            $length_tmp = self::strlen($str, $encoding);
12717
            if ($length_tmp === false) {
12718
                // e.g.: non mbstring support + invalid chars
12719
                return '';
12720
            }
12721
            $length = (int) $length_tmp;
12722
        }
12723
12724
        \array_splice($str_matches[0], $offset, $length, $replacement_matches[0]);
12725
12726
        return \implode('', $str_matches[0]);
12727
    }
12728
12729
    /**
12730
     * Removes a suffix ($needle) from the end of the string ($haystack).
12731
     *
12732
     * EXAMPLE: <code>
12733
     * UTF8::substr_right('BeginMiddleΚόσμε', 'Κόσμε'); // 'BeginMiddle'
12734
     * UTF8::substr_right('BeginMiddleΚόσμε', 'κόσμε'); // 'BeginMiddleΚόσμε'
12735
     * </code>
12736
     *
12737
     * @param string $haystack <p>The string to search in.</p>
12738
     * @param string $needle   <p>The substring to search for.</p>
12739
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
12740
     *
12741
     * @psalm-pure
12742
     *
12743
     * @return string
12744
     *                <p>Return the sub-string.</p>
12745
     */
12746 2
    public static function substr_right(
12747
        string $haystack,
12748
        string $needle,
12749
        string $encoding = 'UTF-8'
12750
    ): string {
12751 2
        if ($haystack === '') {
12752 2
            return '';
12753
        }
12754
12755 2
        if ($needle === '') {
12756 2
            return $haystack;
12757
        }
12758
12759
        if (
12760 2
            $encoding === 'UTF-8'
12761
            &&
12762 2
            \substr($haystack, -\strlen($needle)) === $needle
12763
        ) {
12764 2
            return (string) \mb_substr($haystack, 0, (int) \mb_strlen($haystack) - (int) \mb_strlen($needle));
12765
        }
12766
12767 2
        if (\substr($haystack, -\strlen($needle)) === $needle) {
12768
            return (string) self::substr(
12769
                $haystack,
12770
                0,
12771
                (int) self::strlen($haystack, $encoding) - (int) self::strlen($needle, $encoding),
12772
                $encoding
12773
            );
12774
        }
12775
12776 2
        return $haystack;
12777
    }
12778
12779
    /**
12780
     * Returns a case swapped version of the string.
12781
     *
12782
     * EXAMPLE: <code>UTF8::swapCase('déJÀ σσς iıII'); // 'DÉjà ΣΣΣ IIii'</code>
12783
     *
12784
     * @param string $str        <p>The input string.</p>
12785
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
12786
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
12787
     *
12788
     * @psalm-pure
12789
     *
12790
     * @return string
12791
     *                <p>Each character's case swapped.</p>
12792
     */
12793 6
    public static function swapCase(string $str, string $encoding = 'UTF-8', bool $clean_utf8 = false): string
12794
    {
12795 6
        if ($str === '') {
12796 1
            return '';
12797
        }
12798
12799 6
        if ($clean_utf8) {
12800
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
12801
            // if invalid characters are found in $haystack before $needle
12802 2
            $str = self::clean($str);
12803
        }
12804
12805 6
        if ($encoding === 'UTF-8') {
12806 4
            return (string) (\mb_strtolower($str) ^ \mb_strtoupper($str) ^ $str);
12807
        }
12808
12809 4
        return (string) (self::strtolower($str, $encoding) ^ self::strtoupper($str, $encoding) ^ $str);
12810
    }
12811
12812
    /**
12813
     * Checks whether symfony-polyfills are used.
12814
     *
12815
     * @psalm-pure
12816
     *
12817
     * @return bool
12818
     *              <p><strong>true</strong> if in use, <strong>false</strong> otherwise</p>
12819
     *
12820
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
12821
     */
12822
    public static function symfony_polyfill_used(): bool
12823
    {
12824
        // init
12825
        $return = false;
12826
12827
        $return_tmp = \extension_loaded('mbstring');
12828
        if (!$return_tmp && \function_exists('mb_strlen')) {
12829
            $return = true;
12830
        }
12831
12832
        $return_tmp = \extension_loaded('iconv');
12833
        if (!$return_tmp && \function_exists('iconv')) {
12834
            $return = true;
12835
        }
12836
12837
        return $return;
12838
    }
12839
12840
    /**
12841
     * @param string $str
12842
     * @param int    $tab_length
12843
     *
12844
     * @psalm-pure
12845
     *
12846
     * @return string
12847
     */
12848 6
    public static function tabs_to_spaces(string $str, int $tab_length = 4): string
12849
    {
12850 6
        if ($tab_length === 4) {
12851 3
            $spaces = '    ';
12852 3
        } elseif ($tab_length === 2) {
12853 1
            $spaces = '  ';
12854
        } else {
12855 2
            $spaces = \str_repeat(' ', $tab_length);
12856
        }
12857
12858 6
        return \str_replace("\t", $spaces, $str);
12859
    }
12860
12861
    /**
12862
     * Converts the first character of each word in the string to uppercase
12863
     * and all other chars to lowercase.
12864
     *
12865
     * @param string      $str                           <p>The input string.</p>
12866
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
12867
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
12868
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
12869
     *                                                   tr</p>
12870
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
12871
     *                                                   -> ß</p>
12872
     *
12873
     * @psalm-pure
12874
     *
12875
     * @return string
12876
     *                <p>A string with all characters of $str being title-cased.</p>
12877
     */
12878 5
    public static function titlecase(
12879
        string $str,
12880
        string $encoding = 'UTF-8',
12881
        bool $clean_utf8 = false,
12882
        string $lang = null,
12883
        bool $try_to_keep_the_string_length = false
12884
    ): string {
12885 5
        if ($clean_utf8) {
12886
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
12887
            // if invalid characters are found in $haystack before $needle
12888
            $str = self::clean($str);
12889
        }
12890
12891
        if (
12892 5
            $lang === null
12893
            &&
12894 5
            !$try_to_keep_the_string_length
12895
        ) {
12896 5
            if ($encoding === 'UTF-8') {
12897 3
                return \mb_convert_case($str, \MB_CASE_TITLE);
12898
            }
12899
12900 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
12901
12902 2
            return \mb_convert_case($str, \MB_CASE_TITLE, $encoding);
12903
        }
12904
12905
        return self::str_titleize(
12906
            $str,
12907
            null,
12908
            $encoding,
12909
            false,
12910
            $lang,
12911
            $try_to_keep_the_string_length,
12912
            false
12913
        );
12914
    }
12915
12916
    /**
12917
     * alias for "UTF8::to_ascii()"
12918
     *
12919
     * @param string $str
12920
     * @param string $subst_chr
12921
     * @param bool   $strict
12922
     *
12923
     * @psalm-pure
12924
     *
12925
     * @return string
12926
     *
12927
     * @see        UTF8::to_ascii()
12928
     * @deprecated <p>please use "UTF8::to_ascii()"</p>
12929
     */
12930 7
    public static function toAscii(
12931
        string $str,
12932
        string $subst_chr = '?',
12933
        bool $strict = false
12934
    ): string {
12935 7
        return self::to_ascii($str, $subst_chr, $strict);
12936
    }
12937
12938
    /**
12939
     * alias for "UTF8::to_iso8859()"
12940
     *
12941
     * @param string|string[] $str
12942
     *
12943
     * @psalm-pure
12944
     *
12945
     * @return string|string[]
12946
     *
12947
     * @see        UTF8::to_iso8859()
12948
     * @deprecated <p>please use "UTF8::to_iso8859()"</p>
12949
     */
12950 2
    public static function toIso8859($str)
12951
    {
12952 2
        return self::to_iso8859($str);
12953
    }
12954
12955
    /**
12956
     * alias for "UTF8::to_latin1()"
12957
     *
12958
     * @param string|string[] $str
12959
     *
12960
     * @psalm-pure
12961
     *
12962
     * @return string|string[]
12963
     *
12964
     * @see        UTF8::to_iso8859()
12965
     * @deprecated <p>please use "UTF8::to_iso8859()"</p>
12966
     */
12967 2
    public static function toLatin1($str)
12968
    {
12969 2
        return self::to_iso8859($str);
12970
    }
12971
12972
    /**
12973
     * alias for "UTF8::to_utf8()"
12974
     *
12975
     * @param string|string[] $str
12976
     *
12977
     * @psalm-pure
12978
     *
12979
     * @return string|string[]
12980
     *
12981
     * @see        UTF8::to_utf8()
12982
     * @deprecated <p>please use "UTF8::to_utf8()"</p>
12983
     */
12984 2
    public static function toUTF8($str)
12985
    {
12986 2
        return self::to_utf8($str);
12987
    }
12988
12989
    /**
12990
     * Convert a string into ASCII.
12991
     *
12992
     * EXAMPLE: <code>UTF8::to_ascii('déjà σσς iıii'); // 'deja sss iiii'</code>
12993
     *
12994
     * @param string $str     <p>The input string.</p>
12995
     * @param string $unknown [optional] <p>Character use if character unknown. (default is ?)</p>
12996
     * @param bool   $strict  [optional] <p>Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad
12997
     *                        performance</p>
12998
     *
12999
     * @psalm-pure
13000
     *
13001
     * @return string
13002
     */
13003 37
    public static function to_ascii(
13004
        string $str,
13005
        string $unknown = '?',
13006
        bool $strict = false
13007
    ): string {
13008 37
        return ASCII::to_transliterate($str, $unknown, $strict);
13009
    }
13010
13011
    /**
13012
     * @param bool|float|int|string $str
13013
     *
13014
     * @psalm-pure
13015
     *
13016
     * @return bool
13017
     */
13018 19
    public static function to_boolean($str): bool
13019
    {
13020
        // init
13021 19
        $str = (string) $str;
13022
13023 19
        if ($str === '') {
13024 2
            return false;
13025
        }
13026
13027
        // Info: http://php.net/manual/en/filter.filters.validate.php
13028
        $map = [
13029 17
            'true'  => true,
13030
            '1'     => true,
13031
            'on'    => true,
13032
            'yes'   => true,
13033
            'false' => false,
13034
            '0'     => false,
13035
            'off'   => false,
13036
            'no'    => false,
13037
        ];
13038
13039 17
        if (isset($map[$str])) {
13040 11
            return $map[$str];
13041
        }
13042
13043 6
        $key = \strtolower($str);
13044 6
        if (isset($map[$key])) {
13045 2
            return $map[$key];
13046
        }
13047
13048 4
        if (\is_numeric($str)) {
13049 2
            return ((float) $str + 0) > 0;
13050
        }
13051
13052 2
        return (bool) \trim($str);
13053
    }
13054
13055
    /**
13056
     * Convert given string to safe filename (and keep string case).
13057
     *
13058
     * @param string $str
13059
     * @param bool   $use_transliterate No transliteration, conversion etc. is done by default - unsafe characters are
13060
     *                                  simply replaced with hyphen.
13061
     * @param string $fallback_char
13062
     *
13063
     * @psalm-pure
13064
     *
13065
     * @return string
13066
     */
13067 1
    public static function to_filename(
13068
        string $str,
13069
        bool $use_transliterate = false,
13070
        string $fallback_char = '-'
13071
    ): string {
13072 1
        return ASCII::to_filename(
13073 1
            $str,
13074 1
            $use_transliterate,
13075 1
            $fallback_char
13076
        );
13077
    }
13078
13079
    /**
13080
     * Convert a string into "ISO-8859"-encoding (Latin-1).
13081
     *
13082
     * EXAMPLE: <code>UTF8::to_utf8(UTF8::to_iso8859('  -ABC-中文空白-  ')); // '  -ABC-????-  '</code>
13083
     *
13084
     * @param string|string[] $str
13085
     *
13086
     * @psalm-pure
13087
     *
13088
     * @return string|string[]
13089
     */
13090 8
    public static function to_iso8859($str)
13091
    {
13092 8
        if (\is_array($str)) {
13093 2
            foreach ($str as $k => &$v) {
13094 2
                $v = self::to_iso8859($v);
13095
            }
13096
13097 2
            return $str;
13098
        }
13099
13100 8
        $str = (string) $str;
13101 8
        if ($str === '') {
13102 2
            return '';
13103
        }
13104
13105 8
        return self::utf8_decode($str);
13106
    }
13107
13108
    /**
13109
     * alias for "UTF8::to_iso8859()"
13110
     *
13111
     * @param string|string[] $str
13112
     *
13113
     * @psalm-pure
13114
     *
13115
     * @return string|string[]
13116
     *
13117
     * @see        UTF8::to_iso8859()
13118
     * @deprecated <p>please use "UTF8::to_iso8859()"</p>
13119
     */
13120 2
    public static function to_latin1($str)
13121
    {
13122 2
        return self::to_iso8859($str);
13123
    }
13124
13125
    /**
13126
     * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
13127
     *
13128
     * <ul>
13129
     * <li>It decode UTF-8 codepoints and Unicode escape sequences.</li>
13130
     * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li>
13131
     * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
13132
     * case.</li>
13133
     * </ul>
13134
     *
13135
     * EXAMPLE: <code>UTF8::to_utf8(["\u0063\u0061\u0074"]); // array('cat')</code>
13136
     *
13137
     * @param string|string[] $str                        <p>Any string or array of strings.</p>
13138
     * @param bool            $decode_html_entity_to_utf8 <p>Set to true, if you need to decode html-entities.</p>
13139
     *
13140
     * @psalm-pure
13141
     *
13142
     * @return string|string[]
13143
     *                         <p>The UTF-8 encoded string</p>
13144
     *
13145
     * @template TToUtf8
13146
     * @phpstan-param TToUtf8 $str
13147
     * @phpstan-return TToUtf8
13148
     *
13149
     * @noinspection SuspiciousBinaryOperationInspection
13150
     */
13151 44
    public static function to_utf8($str, bool $decode_html_entity_to_utf8 = false)
13152
    {
13153 44
        if (\is_array($str)) {
13154 4
            foreach ($str as $k => &$v) {
13155 4
                $v = self::to_utf8_string($v, $decode_html_entity_to_utf8);
13156
            }
13157
13158 4
            return $str;
13159
        }
13160
13161
        /** @phpstan-var TToUtf8 $str */
13162 44
        $str = self::to_utf8_string($str, $decode_html_entity_to_utf8);
13163
13164 44
        return $str;
13165
    }
13166
13167
    /**
13168
     * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
13169
     *
13170
     * <ul>
13171
     * <li>It decode UTF-8 codepoints and Unicode escape sequences.</li>
13172
     * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li>
13173
     * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
13174
     * case.</li>
13175
     * </ul>
13176
     *
13177
     * EXAMPLE: <code>UTF8::to_utf8_string("\u0063\u0061\u0074"); // 'cat'</code>
13178
     *
13179
     * @param string $str                        <p>Any string.</p>
13180
     * @param bool   $decode_html_entity_to_utf8 <p>Set to true, if you need to decode html-entities.</p>
13181
     *
13182
     * @psalm-pure
13183
     *
13184
     * @return string
13185
     *                <p>The UTF-8 encoded string</p>
13186
     *
13187
     * @noinspection SuspiciousBinaryOperationInspection
13188
     */
13189 44
    public static function to_utf8_string(string $str, bool $decode_html_entity_to_utf8 = false): string
13190
    {
13191 44
        if ($str === '') {
13192 7
            return $str;
13193
        }
13194
13195 44
        $max = \strlen($str);
13196 44
        $buf = '';
13197
13198 44
        for ($i = 0; $i < $max; ++$i) {
13199 44
            $c1 = $str[$i];
13200
13201 44
            if ($c1 >= "\xC0") { // should be converted to UTF8, if it's not UTF8 already
13202
13203 40
                if ($c1 <= "\xDF") { // looks like 2 bytes UTF8
13204
13205 36
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
13206
13207 36
                    if ($c2 >= "\x80" && $c2 <= "\xBF") { // yeah, almost sure it's UTF8 already
13208 22
                        $buf .= $c1 . $c2;
13209 22
                        ++$i;
13210
                    } else { // not valid UTF8 - convert it
13211 36
                        $buf .= self::to_utf8_convert_helper($c1);
13212
                    }
13213 37
                } elseif ($c1 >= "\xE0" && $c1 <= "\xEF") { // looks like 3 bytes UTF8
13214
13215 36
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
13216 36
                    $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
13217
13218 36
                    if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF") { // yeah, almost sure it's UTF8 already
13219 17
                        $buf .= $c1 . $c2 . $c3;
13220 17
                        $i += 2;
13221
                    } else { // not valid UTF8 - convert it
13222 36
                        $buf .= self::to_utf8_convert_helper($c1);
13223
                    }
13224 28
                } elseif ($c1 >= "\xF0" && $c1 <= "\xF7") { // looks like 4 bytes UTF8
13225
13226 28
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
13227 28
                    $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
13228 28
                    $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
13229
13230 28
                    if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF" && $c4 >= "\x80" && $c4 <= "\xBF") { // yeah, almost sure it's UTF8 already
13231 10
                        $buf .= $c1 . $c2 . $c3 . $c4;
13232 10
                        $i += 3;
13233
                    } else { // not valid UTF8 - convert it
13234 28
                        $buf .= self::to_utf8_convert_helper($c1);
13235
                    }
13236
                } else { // doesn't look like UTF8, but should be converted
13237
13238 40
                    $buf .= self::to_utf8_convert_helper($c1);
13239
                }
13240 41
            } elseif (($c1 & "\xC0") === "\x80") { // needs conversion
13241
13242 4
                $buf .= self::to_utf8_convert_helper($c1);
13243
            } else { // it doesn't need conversion
13244
13245 41
                $buf .= $c1;
13246
            }
13247
        }
13248
13249
        // decode unicode escape sequences + unicode surrogate pairs
13250 44
        $buf = \preg_replace_callback(
13251 44
            '/\\\\u([dD][89abAB][0-9a-fA-F]{2})\\\\u([dD][cdefCDEF][\da-fA-F]{2})|\\\\u([0-9a-fA-F]{4})/',
13252
            /**
13253
             * @param array $matches
13254
             *
13255
             * @psalm-pure
13256
             *
13257
             * @return string
13258
             */
13259
            static function (array $matches): string {
13260 13
                if (isset($matches[3])) {
13261 13
                    $cp = (int) \hexdec($matches[3]);
13262
                } else {
13263
                    // http://unicode.org/faq/utf_bom.html#utf16-4
13264 1
                    $cp = ((int) \hexdec($matches[1]) << 10)
13265 1
                          + (int) \hexdec($matches[2])
13266 1
                          + 0x10000
13267 1
                          - (0xD800 << 10)
13268 1
                          - 0xDC00;
13269
                }
13270
13271
                // https://github.com/php/php-src/blob/php-7.3.2/ext/standard/html.c#L471
13272
                //
13273
                // php_utf32_utf8(unsigned char *buf, unsigned k)
13274
13275 13
                if ($cp < 0x80) {
13276 8
                    return (string) self::chr($cp);
13277
                }
13278
13279 10
                if ($cp < 0xA0) {
13280
                    /** @noinspection UnnecessaryCastingInspection */
13281
                    return (string) self::chr(0xC0 | $cp >> 6) . (string) self::chr(0x80 | $cp & 0x3F);
13282
                }
13283
13284 10
                return self::decimal_to_chr($cp);
13285 44
            },
13286 44
            $buf
13287
        );
13288
13289 44
        if ($buf === null) {
13290
            return '';
13291
        }
13292
13293
        // decode UTF-8 codepoints
13294 44
        if ($decode_html_entity_to_utf8) {
13295 3
            $buf = self::html_entity_decode($buf);
13296
        }
13297
13298 44
        return $buf;
13299
    }
13300
13301
    /**
13302
     * Returns the given string as an integer, or null if the string isn't numeric.
13303
     *
13304
     * @param string $str
13305
     *
13306
     * @psalm-pure
13307
     *
13308
     * @return int|null
13309
     *                  <p>null if the string isn't numeric</p>
13310
     */
13311 1
    public static function to_int(string $str)
13312
    {
13313 1
        if (\is_numeric($str)) {
13314 1
            return (int) $str;
13315
        }
13316
13317 1
        return null;
13318
    }
13319
13320
    /**
13321
     * Returns the given input as string, or null if the input isn't int|float|string
13322
     * and do not implement the "__toString()" method.
13323
     *
13324
     * @param float|int|object|string|null $input
13325
     *
13326
     * @psalm-pure
13327
     *
13328
     * @return string|null
13329
     *                     <p>null if the input isn't int|float|string and has no "__toString()" method</p>
13330
     */
13331 1
    public static function to_string($input)
13332
    {
13333 1
        if ($input === null) {
13334
            return null;
13335
        }
13336
13337
        /** @var string $input_type - hack for psalm */
13338 1
        $input_type = \gettype($input);
13339
13340
        if (
13341 1
            $input_type === 'string'
13342
            ||
13343 1
            $input_type === 'integer'
13344
            ||
13345 1
            $input_type === 'float'
13346
            ||
13347 1
            $input_type === 'double'
13348
        ) {
13349 1
            return (string) $input;
13350
        }
13351
13352 1
        if ($input_type === 'object') {
13353
            /** @noinspection PhpSillyAssignmentInspection */
13354
            /** @var object $input - hack for psalm / phpstan */
13355 1
            $input = $input;
13356
            /** @noinspection NestedPositiveIfStatementsInspection */
13357
            /** @noinspection MissingOrEmptyGroupStatementInspection */
13358 1
            if (\method_exists($input, '__toString')) {
13359 1
                return (string) $input;
13360
            }
13361
        }
13362
13363 1
        return null;
13364
    }
13365
13366
    /**
13367
     * Strip whitespace or other characters from the beginning and end of a UTF-8 string.
13368
     *
13369
     * INFO: This is slower then "trim()"
13370
     *
13371
     * We can only use the original-function, if we use <= 7-Bit in the string / chars
13372
     * but the check for ASCII (7-Bit) cost more time, then we can safe here.
13373
     *
13374
     * EXAMPLE: <code>UTF8::trim('   -ABC-中文空白-  '); // '-ABC-中文空白-'</code>
13375
     *
13376
     * @param string      $str   <p>The string to be trimmed</p>
13377
     * @param string|null $chars [optional] <p>Optional characters to be stripped</p>
13378
     *
13379
     * @psalm-pure
13380
     *
13381
     * @return string
13382
     *                <p>The trimmed string.</p>
13383
     */
13384 57
    public static function trim(string $str = '', string $chars = null): string
13385
    {
13386 57
        if ($str === '') {
13387 9
            return '';
13388
        }
13389
13390 50
        if (self::$SUPPORT['mbstring'] === true) {
13391 50
            if ($chars !== null) {
13392
                /** @noinspection PregQuoteUsageInspection */
13393 28
                $chars = \preg_quote($chars);
13394 28
                $pattern = "^[${chars}]+|[${chars}]+\$";
13395
            } else {
13396 22
                $pattern = '^[\\s]+|[\\s]+$';
13397
            }
13398
13399
            /** @noinspection PhpComposerExtensionStubsInspection */
13400 50
            return (string) \mb_ereg_replace($pattern, '', $str);
13401
        }
13402
13403 8
        if ($chars !== null) {
13404
            $chars = \preg_quote($chars, '/');
13405
            $pattern = "^[${chars}]+|[${chars}]+\$";
13406
        } else {
13407 8
            $pattern = '^[\\s]+|[\\s]+$';
13408
        }
13409
13410 8
        return self::regex_replace($str, $pattern, '');
13411
    }
13412
13413
    /**
13414
     * Makes string's first char uppercase.
13415
     *
13416
     * EXAMPLE: <code>UTF8::ucfirst('ñtërnâtiônàlizætiøn foo'); // 'Ñtërnâtiônàlizætiøn foo'</code>
13417
     *
13418
     * @param string      $str                           <p>The input string.</p>
13419
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
13420
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
13421
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
13422
     *                                                   tr</p>
13423
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
13424
     *                                                   -> ß</p>
13425
     *
13426
     * @psalm-pure
13427
     *
13428
     * @return string
13429
     *                <p>The resulting string with with char uppercase.</p>
13430
     */
13431 69
    public static function ucfirst(
13432
        string $str,
13433
        string $encoding = 'UTF-8',
13434
        bool $clean_utf8 = false,
13435
        string $lang = null,
13436
        bool $try_to_keep_the_string_length = false
13437
    ): string {
13438 69
        if ($str === '') {
13439 3
            return '';
13440
        }
13441
13442 68
        if ($clean_utf8) {
13443
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
13444
            // if invalid characters are found in $haystack before $needle
13445 1
            $str = self::clean($str);
13446
        }
13447
13448 68
        $use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
13449
13450 68
        if ($encoding === 'UTF-8') {
13451 22
            $str_part_two = (string) \mb_substr($str, 1);
13452
13453 22
            if ($use_mb_functions) {
13454 22
                $str_part_one = \mb_strtoupper(
13455 22
                    (string) \mb_substr($str, 0, 1)
13456
                );
13457
            } else {
13458
                $str_part_one = self::strtoupper(
13459
                    (string) \mb_substr($str, 0, 1),
13460
                    $encoding,
13461
                    false,
13462
                    $lang,
13463 22
                    $try_to_keep_the_string_length
13464
                );
13465
            }
13466
        } else {
13467 47
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
13468
13469 47
            $str_part_two = (string) self::substr($str, 1, null, $encoding);
13470
13471 47
            if ($use_mb_functions) {
13472 47
                $str_part_one = \mb_strtoupper(
13473 47
                    (string) \mb_substr($str, 0, 1, $encoding),
13474 47
                    $encoding
13475
                );
13476
            } else {
13477
                $str_part_one = self::strtoupper(
13478
                    (string) self::substr($str, 0, 1, $encoding),
13479
                    $encoding,
13480
                    false,
13481
                    $lang,
13482
                    $try_to_keep_the_string_length
13483
                );
13484
            }
13485
        }
13486
13487 68
        return $str_part_one . $str_part_two;
13488
    }
13489
13490
    /**
13491
     * alias for "UTF8::ucfirst()"
13492
     *
13493
     * @param string $str
13494
     * @param string $encoding
13495
     * @param bool   $clean_utf8
13496
     *
13497
     * @psalm-pure
13498
     *
13499
     * @return string
13500
     *
13501
     * @see        UTF8::ucfirst()
13502
     * @deprecated <p>please use "UTF8::ucfirst()"</p>
13503
     */
13504 1
    public static function ucword(
13505
        string $str,
13506
        string $encoding = 'UTF-8',
13507
        bool $clean_utf8 = false
13508
    ): string {
13509 1
        return self::ucfirst($str, $encoding, $clean_utf8);
13510
    }
13511
13512
    /**
13513
     * Uppercase for all words in the string.
13514
     *
13515
     * EXAMPLE: <code>UTF8::ucwords('iñt ërn âTi ônà liz æti øn'); // 'Iñt Ërn ÂTi Ônà Liz Æti Øn'</code>
13516
     *
13517
     * @param string   $str        <p>The input string.</p>
13518
     * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
13519
     * @param string   $char_list  [optional] <p>Additional chars that contains to words and do not start a new
13520
     *                             word.</p>
13521
     * @param string   $encoding   [optional] <p>Set the charset.</p>
13522
     * @param bool     $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
13523
     *
13524
     * @psalm-pure
13525
     *
13526
     * @return string
13527
     */
13528 8
    public static function ucwords(
13529
        string $str,
13530
        array $exceptions = [],
13531
        string $char_list = '',
13532
        string $encoding = 'UTF-8',
13533
        bool $clean_utf8 = false
13534
    ): string {
13535 8
        if (!$str) {
13536 2
            return '';
13537
        }
13538
13539
        // INFO: mb_convert_case($str, MB_CASE_TITLE);
13540
        // -> MB_CASE_TITLE didn't only uppercase the first letter, it also lowercase all other letters
13541
13542 7
        if ($clean_utf8) {
13543
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
13544
            // if invalid characters are found in $haystack before $needle
13545 1
            $str = self::clean($str);
13546
        }
13547
13548 7
        $use_php_default_functions = !(bool) ($char_list . \implode('', $exceptions));
13549
13550
        if (
13551 7
            $use_php_default_functions
13552
            &&
13553 7
            ASCII::is_ascii($str)
13554
        ) {
13555
            return \ucwords($str);
13556
        }
13557
13558 7
        $words = self::str_to_words($str, $char_list);
13559 7
        $use_exceptions = $exceptions !== [];
13560
13561 7
        $words_str = '';
13562 7
        foreach ($words as &$word) {
13563 7
            if (!$word) {
13564 7
                continue;
13565
            }
13566
13567
            if (
13568 7
                !$use_exceptions
13569
                ||
13570 7
                !\in_array($word, $exceptions, true)
13571
            ) {
13572 7
                $words_str .= self::ucfirst($word, $encoding);
13573
            } else {
13574 7
                $words_str .= $word;
13575
            }
13576
        }
13577
13578 7
        return $words_str;
13579
    }
13580
13581
    /**
13582
     * Multi decode HTML entity + fix urlencoded-win1252-chars.
13583
     *
13584
     * EXAMPLE: <code>UTF8::urldecode('tes%20öäü%20\u00edtest+test'); // 'tes öäü ítest test'</code>
13585
     *
13586
     * e.g:
13587
     * 'test+test'                     => 'test test'
13588
     * 'D&#252;sseldorf'               => 'Düsseldorf'
13589
     * 'D%FCsseldorf'                  => 'Düsseldorf'
13590
     * 'D&#xFC;sseldorf'               => 'Düsseldorf'
13591
     * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
13592
     * 'Düsseldorf'                   => 'Düsseldorf'
13593
     * 'D%C3%BCsseldorf'               => 'Düsseldorf'
13594
     * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
13595
     * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
13596
     *
13597
     * @param string $str          <p>The input string.</p>
13598
     * @param bool   $multi_decode <p>Decode as often as possible.</p>
13599
     *
13600
     * @psalm-pure
13601
     *
13602
     * @return string
13603
     */
13604 4
    public static function urldecode(string $str, bool $multi_decode = true): string
13605
    {
13606 4
        if ($str === '') {
13607 3
            return '';
13608
        }
13609
13610 4
        $str = self::urldecode_unicode_helper($str);
13611
13612 4
        if ($multi_decode) {
13613
            do {
13614 3
                $str_compare = $str;
13615
13616
                /**
13617
                 * @psalm-suppress PossiblyInvalidArgument
13618
                 */
13619 3
                $str = \urldecode(
13620 3
                    self::html_entity_decode(
13621 3
                        self::to_utf8($str),
13622 3
                        \ENT_QUOTES | \ENT_HTML5
13623
                    )
13624
                );
13625 3
            } while ($str_compare !== $str);
13626
        } else {
13627
            /**
13628
             * @psalm-suppress PossiblyInvalidArgument
13629
             */
13630 1
            $str = \urldecode(
13631 1
                self::html_entity_decode(
13632 1
                    self::to_utf8($str),
13633 1
                    \ENT_QUOTES | \ENT_HTML5
13634
                )
13635
            );
13636
        }
13637
13638 4
        return self::fix_simple_utf8($str);
13639
    }
13640
13641
    /**
13642
     * Return a array with "urlencoded"-win1252 -> UTF-8
13643
     *
13644
     * @psalm-pure
13645
     *
13646
     * @return string[]
13647
     *
13648
     * @deprecated <p>please use the "UTF8::urldecode()" function to decode a string</p>
13649
     */
13650 2
    public static function urldecode_fix_win1252_chars(): array
13651
    {
13652
        return [
13653 2
            '%20' => ' ',
13654
            '%21' => '!',
13655
            '%22' => '"',
13656
            '%23' => '#',
13657
            '%24' => '$',
13658
            '%25' => '%',
13659
            '%26' => '&',
13660
            '%27' => "'",
13661
            '%28' => '(',
13662
            '%29' => ')',
13663
            '%2A' => '*',
13664
            '%2B' => '+',
13665
            '%2C' => ',',
13666
            '%2D' => '-',
13667
            '%2E' => '.',
13668
            '%2F' => '/',
13669
            '%30' => '0',
13670
            '%31' => '1',
13671
            '%32' => '2',
13672
            '%33' => '3',
13673
            '%34' => '4',
13674
            '%35' => '5',
13675
            '%36' => '6',
13676
            '%37' => '7',
13677
            '%38' => '8',
13678
            '%39' => '9',
13679
            '%3A' => ':',
13680
            '%3B' => ';',
13681
            '%3C' => '<',
13682
            '%3D' => '=',
13683
            '%3E' => '>',
13684
            '%3F' => '?',
13685
            '%40' => '@',
13686
            '%41' => 'A',
13687
            '%42' => 'B',
13688
            '%43' => 'C',
13689
            '%44' => 'D',
13690
            '%45' => 'E',
13691
            '%46' => 'F',
13692
            '%47' => 'G',
13693
            '%48' => 'H',
13694
            '%49' => 'I',
13695
            '%4A' => 'J',
13696
            '%4B' => 'K',
13697
            '%4C' => 'L',
13698
            '%4D' => 'M',
13699
            '%4E' => 'N',
13700
            '%4F' => 'O',
13701
            '%50' => 'P',
13702
            '%51' => 'Q',
13703
            '%52' => 'R',
13704
            '%53' => 'S',
13705
            '%54' => 'T',
13706
            '%55' => 'U',
13707
            '%56' => 'V',
13708
            '%57' => 'W',
13709
            '%58' => 'X',
13710
            '%59' => 'Y',
13711
            '%5A' => 'Z',
13712
            '%5B' => '[',
13713
            '%5C' => '\\',
13714
            '%5D' => ']',
13715
            '%5E' => '^',
13716
            '%5F' => '_',
13717
            '%60' => '`',
13718
            '%61' => 'a',
13719
            '%62' => 'b',
13720
            '%63' => 'c',
13721
            '%64' => 'd',
13722
            '%65' => 'e',
13723
            '%66' => 'f',
13724
            '%67' => 'g',
13725
            '%68' => 'h',
13726
            '%69' => 'i',
13727
            '%6A' => 'j',
13728
            '%6B' => 'k',
13729
            '%6C' => 'l',
13730
            '%6D' => 'm',
13731
            '%6E' => 'n',
13732
            '%6F' => 'o',
13733
            '%70' => 'p',
13734
            '%71' => 'q',
13735
            '%72' => 'r',
13736
            '%73' => 's',
13737
            '%74' => 't',
13738
            '%75' => 'u',
13739
            '%76' => 'v',
13740
            '%77' => 'w',
13741
            '%78' => 'x',
13742
            '%79' => 'y',
13743
            '%7A' => 'z',
13744
            '%7B' => '{',
13745
            '%7C' => '|',
13746
            '%7D' => '}',
13747
            '%7E' => '~',
13748
            '%7F' => '',
13749
            '%80' => '`',
13750
            '%81' => '',
13751
            '%82' => '‚',
13752
            '%83' => 'ƒ',
13753
            '%84' => '„',
13754
            '%85' => '…',
13755
            '%86' => '†',
13756
            '%87' => '‡',
13757
            '%88' => 'ˆ',
13758
            '%89' => '‰',
13759
            '%8A' => 'Š',
13760
            '%8B' => '‹',
13761
            '%8C' => 'Œ',
13762
            '%8D' => '',
13763
            '%8E' => 'Ž',
13764
            '%8F' => '',
13765
            '%90' => '',
13766
            '%91' => '‘',
13767
            '%92' => '’',
13768
            '%93' => '“',
13769
            '%94' => '”',
13770
            '%95' => '•',
13771
            '%96' => '–',
13772
            '%97' => '—',
13773
            '%98' => '˜',
13774
            '%99' => '™',
13775
            '%9A' => 'š',
13776
            '%9B' => '›',
13777
            '%9C' => 'œ',
13778
            '%9D' => '',
13779
            '%9E' => 'ž',
13780
            '%9F' => 'Ÿ',
13781
            '%A0' => '',
13782
            '%A1' => '¡',
13783
            '%A2' => '¢',
13784
            '%A3' => '£',
13785
            '%A4' => '¤',
13786
            '%A5' => '¥',
13787
            '%A6' => '¦',
13788
            '%A7' => '§',
13789
            '%A8' => '¨',
13790
            '%A9' => '©',
13791
            '%AA' => 'ª',
13792
            '%AB' => '«',
13793
            '%AC' => '¬',
13794
            '%AD' => '',
13795
            '%AE' => '®',
13796
            '%AF' => '¯',
13797
            '%B0' => '°',
13798
            '%B1' => '±',
13799
            '%B2' => '²',
13800
            '%B3' => '³',
13801
            '%B4' => '´',
13802
            '%B5' => 'µ',
13803
            '%B6' => '¶',
13804
            '%B7' => '·',
13805
            '%B8' => '¸',
13806
            '%B9' => '¹',
13807
            '%BA' => 'º',
13808
            '%BB' => '»',
13809
            '%BC' => '¼',
13810
            '%BD' => '½',
13811
            '%BE' => '¾',
13812
            '%BF' => '¿',
13813
            '%C0' => 'À',
13814
            '%C1' => 'Á',
13815
            '%C2' => 'Â',
13816
            '%C3' => 'Ã',
13817
            '%C4' => 'Ä',
13818
            '%C5' => 'Å',
13819
            '%C6' => 'Æ',
13820
            '%C7' => 'Ç',
13821
            '%C8' => 'È',
13822
            '%C9' => 'É',
13823
            '%CA' => 'Ê',
13824
            '%CB' => 'Ë',
13825
            '%CC' => 'Ì',
13826
            '%CD' => 'Í',
13827
            '%CE' => 'Î',
13828
            '%CF' => 'Ï',
13829
            '%D0' => 'Ð',
13830
            '%D1' => 'Ñ',
13831
            '%D2' => 'Ò',
13832
            '%D3' => 'Ó',
13833
            '%D4' => 'Ô',
13834
            '%D5' => 'Õ',
13835
            '%D6' => 'Ö',
13836
            '%D7' => '×',
13837
            '%D8' => 'Ø',
13838
            '%D9' => 'Ù',
13839
            '%DA' => 'Ú',
13840
            '%DB' => 'Û',
13841
            '%DC' => 'Ü',
13842
            '%DD' => 'Ý',
13843
            '%DE' => 'Þ',
13844
            '%DF' => 'ß',
13845
            '%E0' => 'à',
13846
            '%E1' => 'á',
13847
            '%E2' => 'â',
13848
            '%E3' => 'ã',
13849
            '%E4' => 'ä',
13850
            '%E5' => 'å',
13851
            '%E6' => 'æ',
13852
            '%E7' => 'ç',
13853
            '%E8' => 'è',
13854
            '%E9' => 'é',
13855
            '%EA' => 'ê',
13856
            '%EB' => 'ë',
13857
            '%EC' => 'ì',
13858
            '%ED' => 'í',
13859
            '%EE' => 'î',
13860
            '%EF' => 'ï',
13861
            '%F0' => 'ð',
13862
            '%F1' => 'ñ',
13863
            '%F2' => 'ò',
13864
            '%F3' => 'ó',
13865
            '%F4' => 'ô',
13866
            '%F5' => 'õ',
13867
            '%F6' => 'ö',
13868
            '%F7' => '÷',
13869
            '%F8' => 'ø',
13870
            '%F9' => 'ù',
13871
            '%FA' => 'ú',
13872
            '%FB' => 'û',
13873
            '%FC' => 'ü',
13874
            '%FD' => 'ý',
13875
            '%FE' => 'þ',
13876
            '%FF' => 'ÿ',
13877
        ];
13878
    }
13879
13880
    /**
13881
     * Decodes a UTF-8 string to ISO-8859-1.
13882
     *
13883
     * EXAMPLE: <code>UTF8::encode('UTF-8', UTF8::utf8_decode('-ABC-中文空白-')); // '-ABC-????-'</code>
13884
     *
13885
     * @param string $str             <p>The input string.</p>
13886
     * @param bool   $keep_utf8_chars
13887
     *
13888
     * @psalm-pure
13889
     *
13890
     * @return string
13891
     *
13892
     * @noinspection SuspiciousBinaryOperationInspection
13893
     */
13894 14
    public static function utf8_decode(string $str, bool $keep_utf8_chars = false): string
13895
    {
13896 14
        if ($str === '') {
13897 6
            return '';
13898
        }
13899
13900
        // save for later comparision
13901 14
        $str_backup = $str;
13902 14
        $len = \strlen($str);
13903
13904 14
        if (self::$ORD === null) {
13905
            self::$ORD = self::getData('ord');
13906
        }
13907
13908 14
        if (self::$CHR === null) {
13909
            self::$CHR = self::getData('chr');
13910
        }
13911
13912 14
        $no_char_found = '?';
13913
        /** @noinspection ForeachInvariantsInspection */
13914 14
        for ($i = 0, $j = 0; $i < $len; ++$i, ++$j) {
13915 14
            switch ($str[$i] & "\xF0") {
13916 14
                case "\xC0":
13917 13
                case "\xD0":
13918 13
                    $c = (self::$ORD[$str[$i] & "\x1F"] << 6) | self::$ORD[$str[++$i] & "\x3F"];
13919 13
                    $str[$j] = $c < 256 ? self::$CHR[$c] : $no_char_found;
13920
13921 13
                    break;
13922
13923
                /** @noinspection PhpMissingBreakStatementInspection */
13924 13
                case "\xF0":
13925
                    ++$i;
13926
13927
                // no break
13928
13929 13
                case "\xE0":
13930 11
                    $str[$j] = $no_char_found;
13931 11
                    $i += 2;
13932
13933 11
                    break;
13934
13935
                default:
13936 12
                    $str[$j] = $str[$i];
13937
            }
13938
        }
13939
13940
        /** @var false|string $return - needed for PhpStan (stubs error) */
13941 14
        $return = \substr($str, 0, $j);
13942 14
        if ($return === false) {
13943
            $return = '';
13944
        }
13945
13946
        if (
13947 14
            $keep_utf8_chars
13948
            &&
13949 14
            (int) self::strlen($return) >= (int) self::strlen($str_backup)
13950
        ) {
13951 2
            return $str_backup;
13952
        }
13953
13954 14
        return $return;
13955
    }
13956
13957
    /**
13958
     * Encodes an ISO-8859-1 string to UTF-8.
13959
     *
13960
     * EXAMPLE: <code>UTF8::utf8_decode(UTF8::utf8_encode('-ABC-中文空白-')); // '-ABC-中文空白-'</code>
13961
     *
13962
     * @param string $str <p>The input string.</p>
13963
     *
13964
     * @psalm-pure
13965
     *
13966
     * @return string
13967
     */
13968 16
    public static function utf8_encode(string $str): string
13969
    {
13970 16
        if ($str === '') {
13971 14
            return '';
13972
        }
13973
13974
        /** @var false|string $str - the polyfill maybe return false */
13975 16
        $str = \utf8_encode($str);
0 ignored issues
show
Bug introduced by
It seems like $str can also be of type false; however, parameter $string of utf8_encode() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

13975
        $str = \utf8_encode(/** @scrutinizer ignore-type */ $str);
Loading history...
13976
13977
        /** @noinspection CallableParameterUseCaseInTypeContextInspection */
13978
        /** @psalm-suppress TypeDoesNotContainType */
13979 16
        if ($str === false) {
13980
            return '';
13981
        }
13982
13983 16
        return $str;
13984
    }
13985
13986
    /**
13987
     * fix -> utf8-win1252 chars
13988
     *
13989
     * @param string $str <p>The input string.</p>
13990
     *
13991
     * @psalm-pure
13992
     *
13993
     * @return string
13994
     *
13995
     * @deprecated <p>please use "UTF8::fix_simple_utf8()"</p>
13996
     */
13997 2
    public static function utf8_fix_win1252_chars(string $str): string
13998
    {
13999 2
        return self::fix_simple_utf8($str);
14000
    }
14001
14002
    /**
14003
     * Returns an array with all utf8 whitespace characters.
14004
     *
14005
     * @see http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
14006
     *
14007
     * @psalm-pure
14008
     *
14009
     * @return string[]
14010
     *                  An array with all known whitespace characters as values and the type of whitespace as keys
14011
     *                  as defined in above URL
14012
     */
14013 2
    public static function whitespace_table(): array
14014
    {
14015 2
        return self::$WHITESPACE_TABLE;
14016
    }
14017
14018
    /**
14019
     * Limit the number of words in a string.
14020
     *
14021
     * EXAMPLE: <code>UTF8::words_limit('fòô bàř fòô', 2, ''); // 'fòô bàř'</code>
14022
     *
14023
     * @param string $str        <p>The input string.</p>
14024
     * @param int    $limit      <p>The limit of words as integer.</p>
14025
     * @param string $str_add_on <p>Replacement for the striped string.</p>
14026
     *
14027
     * @psalm-pure
14028
     *
14029
     * @return string
14030
     */
14031 2
    public static function words_limit(
14032
        string $str,
14033
        int $limit = 100,
14034
        string $str_add_on = '…'
14035
    ): string {
14036 2
        if ($str === '' || $limit < 1) {
14037 2
            return '';
14038
        }
14039
14040 2
        \preg_match('/^\\s*+(?:[^\\s]++\\s*+){1,' . $limit . '}/u', $str, $matches);
14041
14042
        if (
14043 2
            !isset($matches[0])
14044
            ||
14045 2
            \mb_strlen($str) === (int) \mb_strlen($matches[0])
14046
        ) {
14047 2
            return $str;
14048
        }
14049
14050 2
        return \rtrim($matches[0]) . $str_add_on;
14051
    }
14052
14053
    /**
14054
     * Wraps a string to a given number of characters
14055
     *
14056
     * EXAMPLE: <code>UTF8::wordwrap('Iñtërnâtiônàlizætiøn', 2, '<br>', true)); // 'Iñ<br>të<br>rn<br>ât<br>iô<br>nà<br>li<br>zæ<br>ti<br>øn'</code>
14057
     *
14058
     * @see http://php.net/manual/en/function.wordwrap.php
14059
     *
14060
     * @param string $str   <p>The input string.</p>
14061
     * @param int    $width [optional] <p>The column width.</p>
14062
     * @param string $break [optional] <p>The line is broken using the optional break parameter.</p>
14063
     * @param bool   $cut   [optional] <p>
14064
     *                      If the cut is set to true, the string is
14065
     *                      always wrapped at or before the specified width. So if you have
14066
     *                      a word that is larger than the given width, it is broken apart.
14067
     *                      </p>
14068
     *
14069
     * @psalm-pure
14070
     *
14071
     * @return string
14072
     *                <p>The given string wrapped at the specified column.</p>
14073
     */
14074 12
    public static function wordwrap(
14075
        string $str,
14076
        int $width = 75,
14077
        string $break = "\n",
14078
        bool $cut = false
14079
    ): string {
14080 12
        if ($str === '' || $break === '') {
14081 4
            return '';
14082
        }
14083
14084 10
        $str_split = \explode($break, $str);
14085 10
        if ($str_split === false) {
14086
            return '';
14087
        }
14088
14089
        /** @var string[] $charsArray */
14090 10
        $charsArray = [];
14091 10
        $word_split = '';
14092 10
        foreach ($str_split as $i => $i_value) {
14093 10
            if ($i) {
14094 3
                $charsArray[] = $break;
14095 3
                $word_split .= '#';
14096
            }
14097
14098 10
            foreach (self::str_split($i_value) as $c) {
14099 10
                $charsArray[] = $c;
14100 10
                if ($c === ' ') {
14101 3
                    $word_split .= ' ';
14102
                } else {
14103 10
                    $word_split .= '?';
14104
                }
14105
            }
14106
        }
14107
14108 10
        $str_return = '';
14109 10
        $j = 0;
14110 10
        $b = -1;
14111 10
        $i = -1;
14112 10
        $word_split = \wordwrap($word_split, $width, '#', $cut);
14113
14114 10
        $max = \mb_strlen($word_split);
14115 10
        while (($b = \mb_strpos($word_split, '#', $b + 1)) !== false) {
14116 8
            for (++$i; $i < $b; ++$i) {
14117 8
                if (isset($charsArray[$j])) {
14118 8
                    $str_return .= $charsArray[$j];
14119 8
                    unset($charsArray[$j]);
14120
                }
14121 8
                ++$j;
14122
14123
                // prevent endless loop, e.g. if there is a error in the "mb_*" polyfill
14124 8
                if ($i > $max) {
14125
                    break 2;
14126
                }
14127
            }
14128
14129
            if (
14130 8
                $break === $charsArray[$j]
14131
                ||
14132 8
                $charsArray[$j] === ' '
14133
            ) {
14134 5
                unset($charsArray[$j++]);
14135
            }
14136
14137 8
            $str_return .= $break;
14138
14139
            // prevent endless loop, e.g. if there is a error in the "mb_*" polyfill
14140 8
            if ($b > $max) {
14141
                break;
14142
            }
14143
        }
14144
14145 10
        return $str_return . \implode('', $charsArray);
14146
    }
14147
14148
    /**
14149
     * Line-Wrap the string after $limit, but split the string by "$delimiter" before ...
14150
     *    ... so that we wrap the per line.
14151
     *
14152
     * @param string      $str             <p>The input string.</p>
14153
     * @param int         $width           [optional] <p>The column width.</p>
14154
     * @param string      $break           [optional] <p>The line is broken using the optional break parameter.</p>
14155
     * @param bool        $cut             [optional] <p>
14156
     *                                     If the cut is set to true, the string is
14157
     *                                     always wrapped at or before the specified width. So if you have
14158
     *                                     a word that is larger than the given width, it is broken apart.
14159
     *                                     </p>
14160
     * @param bool        $add_final_break [optional] <p>
14161
     *                                     If this flag is true, then the method will add a $break at the end
14162
     *                                     of the result string.
14163
     *                                     </p>
14164
     * @param string|null $delimiter       [optional] <p>
14165
     *                                     You can change the default behavior, where we split the string by newline.
14166
     *                                     </p>
14167
     *
14168
     * @psalm-pure
14169
     *
14170
     * @return string
14171
     */
14172 1
    public static function wordwrap_per_line(
14173
        string $str,
14174
        int $width = 75,
14175
        string $break = "\n",
14176
        bool $cut = false,
14177
        bool $add_final_break = true,
14178
        string $delimiter = null
14179
    ): string {
14180 1
        if ($delimiter === null) {
14181 1
            $strings = \preg_split('/\\r\\n|\\r|\\n/', $str);
14182
        } else {
14183 1
            $strings = \explode($delimiter, $str);
14184
        }
14185
14186 1
        $string_helper_array = [];
14187 1
        if ($strings !== false) {
14188 1
            foreach ($strings as $value) {
14189 1
                $string_helper_array[] = self::wordwrap($value, $width, $break, $cut);
14190
            }
14191
        }
14192
14193 1
        if ($add_final_break) {
14194 1
            $final_break = $break;
14195
        } else {
14196 1
            $final_break = '';
14197
        }
14198
14199 1
        return \implode($delimiter ?? "\n", $string_helper_array) . $final_break;
14200
    }
14201
14202
    /**
14203
     * Returns an array of Unicode White Space characters.
14204
     *
14205
     * @psalm-pure
14206
     *
14207
     * @return string[]
14208
     *                  <p>An array with numeric code point as key and White Space Character as value.</p>
14209
     */
14210 2
    public static function ws(): array
14211
    {
14212 2
        return self::$WHITESPACE;
14213
    }
14214
14215
    /**
14216
     * Checks whether the passed string contains only byte sequences that are valid UTF-8 characters.
14217
     *
14218
     * EXAMPLE: <code>
14219
     * UTF8::is_utf8_string('Iñtërnâtiônàlizætiøn']); // true
14220
     * //
14221
     * UTF8::is_utf8_string("Iñtërnâtiônàlizætiøn\xA0\xA1"); // false
14222
     * </code>
14223
     *
14224
     * @see          http://hsivonen.iki.fi/php-utf8/
14225
     *
14226
     * @param string $str    <p>The string to be checked.</p>
14227
     * @param bool   $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
14228
     *
14229
     * @psalm-pure
14230
     *
14231
     * @return bool
14232
     *
14233
     * @noinspection ReturnTypeCanBeDeclaredInspection
14234
     */
14235 110
    private static function is_utf8_string(string $str, bool $strict = false)
14236
    {
14237 110
        if ($str === '') {
14238 15
            return true;
14239
        }
14240
14241 103
        if ($strict) {
14242 2
            $is_binary = self::is_binary($str, true);
14243
14244 2
            if ($is_binary && self::is_utf16($str, false) !== false) {
14245 2
                return false;
14246
            }
14247
14248
            if ($is_binary && self::is_utf32($str, false) !== false) {
14249
                return false;
14250
            }
14251
        }
14252
14253 103
        if (self::$SUPPORT['pcre_utf8']) {
14254
            // If even just the first character can be matched, when the /u
14255
            // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
14256
            // invalid, nothing at all will match, even if the string contains
14257
            // some valid sequences
14258 103
            return \preg_match('/^./us', $str) === 1;
14259
        }
14260
14261 2
        $mState = 0; // cached expected number of octets after the current octet
14262
        // until the beginning of the next UTF8 character sequence
14263 2
        $mUcs4 = 0; // cached Unicode character
14264 2
        $mBytes = 1; // cached expected number of octets in the current sequence
14265
14266 2
        if (self::$ORD === null) {
14267
            self::$ORD = self::getData('ord');
14268
        }
14269
14270 2
        $len = \strlen($str);
14271
        /** @noinspection ForeachInvariantsInspection */
14272 2
        for ($i = 0; $i < $len; ++$i) {
14273 2
            $in = self::$ORD[$str[$i]];
14274
14275 2
            if ($mState === 0) {
14276
                // When mState is zero we expect either a US-ASCII character or a
14277
                // multi-octet sequence.
14278 2
                if ((0x80 & $in) === 0) {
14279
                    // US-ASCII, pass straight through.
14280 2
                    $mBytes = 1;
14281 2
                } elseif ((0xE0 & $in) === 0xC0) {
14282
                    // First octet of 2 octet sequence.
14283 2
                    $mUcs4 = $in;
14284 2
                    $mUcs4 = ($mUcs4 & 0x1F) << 6;
14285 2
                    $mState = 1;
14286 2
                    $mBytes = 2;
14287 2
                } elseif ((0xF0 & $in) === 0xE0) {
14288
                    // First octet of 3 octet sequence.
14289 2
                    $mUcs4 = $in;
14290 2
                    $mUcs4 = ($mUcs4 & 0x0F) << 12;
14291 2
                    $mState = 2;
14292 2
                    $mBytes = 3;
14293
                } elseif ((0xF8 & $in) === 0xF0) {
14294
                    // First octet of 4 octet sequence.
14295
                    $mUcs4 = $in;
14296
                    $mUcs4 = ($mUcs4 & 0x07) << 18;
14297
                    $mState = 3;
14298
                    $mBytes = 4;
14299
                } elseif ((0xFC & $in) === 0xF8) {
14300
                    /* First octet of 5 octet sequence.
14301
                     *
14302
                     * This is illegal because the encoded codepoint must be either
14303
                     * (a) not the shortest form or
14304
                     * (b) outside the Unicode range of 0-0x10FFFF.
14305
                     * Rather than trying to resynchronize, we will carry on until the end
14306
                     * of the sequence and let the later error handling code catch it.
14307
                     */
14308
                    $mUcs4 = $in;
14309
                    $mUcs4 = ($mUcs4 & 0x03) << 24;
14310
                    $mState = 4;
14311
                    $mBytes = 5;
14312
                } elseif ((0xFE & $in) === 0xFC) {
14313
                    // First octet of 6 octet sequence, see comments for 5 octet sequence.
14314
                    $mUcs4 = $in;
14315
                    $mUcs4 = ($mUcs4 & 1) << 30;
14316
                    $mState = 5;
14317
                    $mBytes = 6;
14318
                } else {
14319
                    // Current octet is neither in the US-ASCII range nor a legal first
14320
                    // octet of a multi-octet sequence.
14321 2
                    return false;
14322
                }
14323 2
            } elseif ((0xC0 & $in) === 0x80) {
14324
14325
                // When mState is non-zero, we expect a continuation of the multi-octet
14326
                // sequence
14327
14328
                // Legal continuation.
14329 2
                $shift = ($mState - 1) * 6;
14330 2
                $tmp = $in;
14331 2
                $tmp = ($tmp & 0x0000003F) << $shift;
14332 2
                $mUcs4 |= $tmp;
14333
                // Prefix: End of the multi-octet sequence. mUcs4 now contains the final
14334
                // Unicode code point to be output.
14335 2
                if (--$mState === 0) {
14336
                    // Check for illegal sequences and code points.
14337
                    //
14338
                    // From Unicode 3.1, non-shortest form is illegal
14339
                    if (
14340 2
                        ($mBytes === 2 && $mUcs4 < 0x0080)
14341
                        ||
14342 2
                        ($mBytes === 3 && $mUcs4 < 0x0800)
14343
                        ||
14344 2
                        ($mBytes === 4 && $mUcs4 < 0x10000)
14345
                        ||
14346 2
                        ($mBytes > 4)
14347
                        ||
14348
                        // From Unicode 3.2, surrogate characters are illegal.
14349 2
                        (($mUcs4 & 0xFFFFF800) === 0xD800)
14350
                        ||
14351
                        // Code points outside the Unicode range are illegal.
14352 2
                        ($mUcs4 > 0x10FFFF)
14353
                    ) {
14354
                        return false;
14355
                    }
14356
                    // initialize UTF8 cache
14357 2
                    $mState = 0;
14358 2
                    $mUcs4 = 0;
14359 2
                    $mBytes = 1;
14360
                }
14361
            } else {
14362
                // ((0xC0 & (*in) != 0x80) && (mState != 0))
14363
                // Incomplete multi-octet sequence.
14364
                return false;
14365
            }
14366
        }
14367
14368 2
        return $mState === 0;
14369
    }
14370
14371
    /**
14372
     * @param string $str
14373
     * @param bool   $use_lowercase      <p>Use uppercase by default, otherwise use lowercase.</p>
14374
     * @param bool   $use_full_case_fold <p>Convert not only common cases.</p>
14375
     *
14376
     * @psalm-pure
14377
     *
14378
     * @return string
14379
     *
14380
     * @noinspection ReturnTypeCanBeDeclaredInspection
14381
     */
14382 33
    private static function fixStrCaseHelper(
14383
        string $str,
14384
        bool $use_lowercase = false,
14385
        bool $use_full_case_fold = false
14386
    ) {
14387 33
        $upper = self::$COMMON_CASE_FOLD['upper'];
14388 33
        $lower = self::$COMMON_CASE_FOLD['lower'];
14389
14390 33
        if ($use_lowercase) {
14391 2
            $str = \str_replace(
14392 2
                $upper,
14393 2
                $lower,
14394 2
                $str
14395
            );
14396
        } else {
14397 31
            $str = \str_replace(
14398 31
                $lower,
14399 31
                $upper,
14400 31
                $str
14401
            );
14402
        }
14403
14404 33
        if ($use_full_case_fold) {
14405
            /**
14406
             * @psalm-suppress ImpureStaticVariable
14407
             *
14408
             * @var array<mixed>|null
14409
             */
14410 31
            static $FULL_CASE_FOLD = null;
14411 31
            if ($FULL_CASE_FOLD === null) {
14412 1
                $FULL_CASE_FOLD = self::getData('caseFolding_full');
14413
            }
14414
14415 31
            if ($use_lowercase) {
14416 2
                $str = \str_replace($FULL_CASE_FOLD[0], $FULL_CASE_FOLD[1], $str);
14417
            } else {
14418 29
                $str = \str_replace($FULL_CASE_FOLD[1], $FULL_CASE_FOLD[0], $str);
14419
            }
14420
        }
14421
14422 33
        return $str;
14423
    }
14424
14425
    /**
14426
     * get data from "/data/*.php"
14427
     *
14428
     * @param string $file
14429
     *
14430
     * @psalm-pure
14431
     *
14432
     * @return array
14433
     *
14434
     * @noinspection ReturnTypeCanBeDeclaredInspection
14435
     */
14436 6
    private static function getData(string $file)
14437
    {
14438
        /** @noinspection PhpIncludeInspection */
14439
        /** @noinspection UsingInclusionReturnValueInspection */
14440
        /** @psalm-suppress UnresolvableInclude */
14441 6
        return include __DIR__ . '/data/' . $file . '.php';
14442
    }
14443
14444
    /**
14445
     * @psalm-pure
14446
     *
14447
     * @return true|null
14448
     */
14449 12
    private static function initEmojiData()
14450
    {
14451 12
        if (self::$EMOJI_KEYS_CACHE === null) {
14452 1
            if (self::$EMOJI === null) {
14453 1
                self::$EMOJI = self::getData('emoji');
14454
            }
14455
14456
            /**
14457
             * @psalm-suppress ImpureFunctionCall - static sort function is used
14458
             */
14459 1
            \uksort(
14460 1
                self::$EMOJI,
14461
                static function (string $a, string $b): int {
14462 1
                    return \strlen($b) <=> \strlen($a);
14463 1
                }
14464
            );
14465
14466 1
            self::$EMOJI_KEYS_CACHE = \array_keys(self::$EMOJI);
14467 1
            self::$EMOJI_VALUES_CACHE = self::$EMOJI;
14468
14469 1
            foreach (self::$EMOJI_KEYS_CACHE as $key) {
14470 1
                $tmp_key = \crc32($key);
14471 1
                self::$EMOJI_KEYS_REVERSIBLE_CACHE[] = '_-_PORTABLE_UTF8_-_' . $tmp_key . '_-_' . \strrev((string) $tmp_key) . '_-_8FTU_ELBATROP_-_';
14472
            }
14473
14474 1
            return true;
14475
        }
14476
14477 12
        return null;
14478
    }
14479
14480
    /**
14481
     * Checks whether mbstring "overloaded" is active on the server.
14482
     *
14483
     * @psalm-pure
14484
     *
14485
     * @return bool
14486
     *
14487
     * @noinspection ReturnTypeCanBeDeclaredInspection
14488
     */
14489
    private static function mbstring_overloaded()
14490
    {
14491
        /**
14492
         * INI directive 'mbstring.func_overload' is deprecated since PHP 7.2
14493
         */
14494
14495
        /** @noinspection PhpComposerExtensionStubsInspection */
14496
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
14497
        return \defined('MB_OVERLOAD_STRING')
14498
               &&
14499
               ((int) @\ini_get('mbstring.func_overload') & \MB_OVERLOAD_STRING);
14500
    }
14501
14502
    /**
14503
     * @param array    $strings
14504
     * @param bool     $remove_empty_values
14505
     * @param int|null $remove_short_values
14506
     *
14507
     * @psalm-pure
14508
     *
14509
     * @return array
14510
     *
14511
     * @noinspection ReturnTypeCanBeDeclaredInspection
14512
     */
14513 2
    private static function reduce_string_array(
14514
        array $strings,
14515
        bool $remove_empty_values,
14516
        int $remove_short_values = null
14517
    ) {
14518
        // init
14519 2
        $return = [];
14520
14521 2
        foreach ($strings as &$str) {
14522
            if (
14523 2
                $remove_short_values !== null
14524
                &&
14525 2
                \mb_strlen($str) <= $remove_short_values
14526
            ) {
14527 2
                continue;
14528
            }
14529
14530
            if (
14531 2
                $remove_empty_values
14532
                &&
14533 2
                \trim($str) === ''
14534
            ) {
14535 2
                continue;
14536
            }
14537
14538 2
            $return[] = $str;
14539
        }
14540
14541 2
        return $return;
14542
    }
14543
14544
    /**
14545
     * rxClass
14546
     *
14547
     * @param string $s
14548
     * @param string $class
14549
     *
14550
     * @psalm-pure
14551
     *
14552
     * @return string
14553
     *
14554
     * @noinspection ReturnTypeCanBeDeclaredInspection
14555
     */
14556 33
    private static function rxClass(string $s, string $class = '')
14557
    {
14558
        /**
14559
         * @psalm-suppress ImpureStaticVariable
14560
         *
14561
         * @var array<string,string>
14562
         */
14563 33
        static $RX_CLASS_CACHE = [];
14564
14565 33
        $cache_key = $s . '_' . $class;
14566
14567 33
        if (isset($RX_CLASS_CACHE[$cache_key])) {
14568 21
            return $RX_CLASS_CACHE[$cache_key];
14569
        }
14570
14571
        /** @var string[] $class_array */
14572 16
        $class_array[] = $class;
0 ignored issues
show
Comprehensibility Best Practice introduced by
$class_array was never initialized. Although not strictly required by PHP, it is generally a good practice to add $class_array = array(); before regardless.
Loading history...
14573
14574
        /** @noinspection SuspiciousLoopInspection */
14575
        /** @noinspection AlterInForeachInspection */
14576 16
        foreach (self::str_split($s) as &$s) {
0 ignored issues
show
Bug introduced by
The expression self::str_split($s) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
14577 15
            if ($s === '-') {
14578
                $class_array[0] = '-' . $class_array[0];
14579 15
            } elseif (!isset($s[2])) {
14580 15
                $class_array[0] .= \preg_quote($s, '/');
14581 1
            } elseif (self::strlen($s) === 1) {
14582 1
                $class_array[0] .= $s;
14583
            } else {
14584 15
                $class_array[] = $s;
14585
            }
14586
        }
14587
14588 16
        if ($class_array[0]) {
14589 16
            $class_array[0] = '[' . $class_array[0] . ']';
14590
        }
14591
14592 16
        if (\count($class_array) === 1) {
14593 16
            $return = $class_array[0];
14594
        } else {
14595
            $return = '(?:' . \implode('|', $class_array) . ')';
14596
        }
14597
14598 16
        $RX_CLASS_CACHE[$cache_key] = $return;
14599
14600 16
        return $return;
14601
    }
14602
14603
    /**
14604
     * Personal names such as "Marcus Aurelius" are sometimes typed incorrectly using lowercase ("marcus aurelius").
14605
     *
14606
     * @param string $names
14607
     * @param string $delimiter
14608
     * @param string $encoding
14609
     *
14610
     * @psalm-pure
14611
     *
14612
     * @return string
14613
     *
14614
     * @noinspection ReturnTypeCanBeDeclaredInspection
14615
     */
14616 1
    private static function str_capitalize_name_helper(
14617
        string $names,
14618
        string $delimiter,
14619
        string $encoding = 'UTF-8'
14620
    ) {
14621
        // init
14622 1
        $name_helper_array = \explode($delimiter, $names);
14623 1
        if ($name_helper_array === false) {
14624
            return '';
14625
        }
14626
14627
        $special_cases = [
14628 1
            'names' => [
14629
                'ab',
14630
                'af',
14631
                'al',
14632
                'and',
14633
                'ap',
14634
                'bint',
14635
                'binte',
14636
                'da',
14637
                'de',
14638
                'del',
14639
                'den',
14640
                'der',
14641
                'di',
14642
                'dit',
14643
                'ibn',
14644
                'la',
14645
                'mac',
14646
                'nic',
14647
                'of',
14648
                'ter',
14649
                'the',
14650
                'und',
14651
                'van',
14652
                'von',
14653
                'y',
14654
                'zu',
14655
            ],
14656
            'prefixes' => [
14657
                'al-',
14658
                "d'",
14659
                'ff',
14660
                "l'",
14661
                'mac',
14662
                'mc',
14663
                'nic',
14664
            ],
14665
        ];
14666
14667 1
        foreach ($name_helper_array as &$name) {
14668 1
            if (\in_array($name, $special_cases['names'], true)) {
14669 1
                continue;
14670
            }
14671
14672 1
            $continue = false;
14673
14674 1
            if ($delimiter === '-') {
14675
                /** @noinspection AlterInForeachInspection */
14676 1
                foreach ((array) $special_cases['names'] as &$beginning) {
14677 1
                    if (\strncmp($name, $beginning, \strlen($beginning)) === 0) {
14678 1
                        $continue = true;
14679
14680 1
                        break;
14681
                    }
14682
                }
14683
            }
14684
14685
            /** @noinspection AlterInForeachInspection */
14686 1
            foreach ((array) $special_cases['prefixes'] as &$beginning) {
14687 1
                if (\strncmp($name, $beginning, \strlen($beginning)) === 0) {
14688 1
                    $continue = true;
14689
14690 1
                    break;
14691
                }
14692
            }
14693
14694 1
            if ($continue) {
14695 1
                continue;
14696
            }
14697
14698 1
            $name = self::ucfirst($name);
14699
        }
14700
14701 1
        return \implode($delimiter, $name_helper_array);
14702
    }
14703
14704
    /**
14705
     * Generic case-sensitive transformation for collation matching.
14706
     *
14707
     * @param string $str <p>The input string</p>
14708
     *
14709
     * @psalm-pure
14710
     *
14711
     * @return string|null
14712
     */
14713 6
    private static function strtonatfold(string $str)
14714
    {
14715 6
        $str = \Normalizer::normalize($str, \Normalizer::NFD);
14716
        /** @phpstan-ignore-next-line - https://github.com/JetBrains/phpstorm-stubs/pull/949 */
14717 6
        if ($str === false) {
14718 2
            return '';
14719
        }
14720
14721
        /** @noinspection PhpUndefinedClassInspection */
14722 6
        return \preg_replace(
14723 6
            '/\p{Mn}+/u',
14724 6
            '',
14725 6
            $str
14726
        );
14727
    }
14728
14729
    /**
14730
     * @param int|string $input
14731
     *
14732
     * @psalm-pure
14733
     *
14734
     * @return string
14735
     *
14736
     * @noinspection ReturnTypeCanBeDeclaredInspection
14737
     * @noinspection SuspiciousBinaryOperationInspection
14738
     */
14739 32
    private static function to_utf8_convert_helper($input)
14740
    {
14741
        // init
14742 32
        $buf = '';
14743
14744 32
        if (self::$ORD === null) {
14745 1
            self::$ORD = self::getData('ord');
14746
        }
14747
14748 32
        if (self::$CHR === null) {
14749 1
            self::$CHR = self::getData('chr');
14750
        }
14751
14752 32
        if (self::$WIN1252_TO_UTF8 === null) {
14753 1
            self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
14754
        }
14755
14756 32
        $ordC1 = self::$ORD[$input];
14757 32
        if (isset(self::$WIN1252_TO_UTF8[$ordC1])) { // found in Windows-1252 special cases
14758 32
            $buf .= self::$WIN1252_TO_UTF8[$ordC1];
14759
        } else {
14760
            /** @noinspection OffsetOperationsInspection */
14761 1
            $cc1 = self::$CHR[$ordC1 / 64] | "\xC0";
14762 1
            $cc2 = ((string) $input & "\x3F") | "\x80";
0 ignored issues
show
Bug introduced by
Are you sure you want to use the bitwise & or did you mean &&?
Loading history...
Bug introduced by
Are you sure you want to use the bitwise | or did you mean ||?
Loading history...
14763 1
            $buf .= $cc1 . $cc2;
14764
        }
14765
14766 32
        return $buf;
14767
    }
14768
14769
    /**
14770
     * @param string $str
14771
     *
14772
     * @psalm-pure
14773
     *
14774
     * @return string
14775
     *
14776
     * @noinspection ReturnTypeCanBeDeclaredInspection
14777
     */
14778 10
    private static function urldecode_unicode_helper(string $str)
14779
    {
14780 10
        if (\strpos($str, '%u') === false) {
14781 10
            return $str;
14782
        }
14783
14784 7
        $pattern = '/%u([0-9a-fA-F]{3,4})/';
14785 7
        if (\preg_match($pattern, $str)) {
14786 7
            $str = (string) \preg_replace($pattern, '&#x\\1;', $str);
14787
        }
14788
14789 7
        return $str;
14790
    }
14791
}
14792