Passed
Push — master ( 4a5f22...68aebc )
by Lars
03:15
created

UTF8::str_pad_left()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 12
Code Lines 6

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 7
CRAP Score 1

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 1
eloc 6
c 1
b 0
f 0
nc 1
nop 4
dl 0
loc 12
ccs 7
cts 7
cp 1
crap 1
rs 10
1
<?php
2
3
declare(strict_types=1);
4
5
namespace voku\helper;
6
7
/**
8
 * @psalm-immutable
9
 */
10
final class UTF8
11
{
12
    /**
13
     * (CRLF|([ZWNJ-ZWJ]|T+|L*(LV?V+|LV|LVT)T*|L+|[^Control])[Extend]*|[Control])
14
     * This regular expression is a work around for http://bugs.exim.org/1279
15
     *
16
     * @deprecated <p>please don't use it anymore</p>
17
     */
18
    const GRAPHEME_CLUSTER_RX = "(?:\r\n|(?:[ -~\x{200C}\x{200D}]|[ᆨ-ᇹ]+|[ᄀ-ᅟ]*(?:[가개갸걔거게겨계고과괘괴교구궈궤귀규그긔기까깨꺄꺠꺼께껴꼐꼬꽈꽤꾀꾜꾸꿔꿰뀌뀨끄끠끼나내냐냬너네녀녜노놔놰뇌뇨누눠눼뉘뉴느늬니다대댜댸더데뎌뎨도돠돼되됴두둬뒈뒤듀드듸디따때땨떄떠떼뗘뗴또똬뙈뙤뚀뚜뚸뛔뛰뜌뜨띄띠라래랴럐러레려례로롸뢔뢰료루뤄뤠뤼류르릐리마매먀먜머메며몌모뫄뫠뫼묘무뭐뭬뮈뮤므믜미바배뱌뱨버베벼볘보봐봬뵈뵤부붜붸뷔뷰브븨비빠빼뺘뺴뻐뻬뼈뼤뽀뽜뽸뾔뾰뿌뿨쀄쀠쀼쁘쁴삐사새샤섀서세셔셰소솨쇄쇠쇼수숴쉐쉬슈스싀시싸쌔쌰썌써쎄쎠쎼쏘쏴쐐쐬쑈쑤쒀쒜쒸쓔쓰씌씨아애야얘어에여예오와왜외요우워웨위유으의이자재쟈쟤저제져졔조좌좨죄죠주줘줴쥐쥬즈즤지짜째쨔쨰쩌쩨쪄쪠쪼쫘쫴쬐쬬쭈쭤쮀쮜쮸쯔쯰찌차채챠챼처체쳐쳬초촤쵀최쵸추춰췌취츄츠츼치카캐캬컈커케켜켸코콰쾌쾨쿄쿠쿼퀘퀴큐크킈키타태탸턔터테텨톄토톼퇘퇴툐투퉈퉤튀튜트틔티파패퍄퍠퍼페펴폐포퐈퐤푀표푸풔풰퓌퓨프픠피하해햐햬허헤혀혜호화홰회효후훠훼휘휴흐희히]?[ᅠ-ᆢ]+|[가-힣])[ᆨ-ᇹ]*|[ᄀ-ᅟ]+|[^\p{Cc}\p{Cf}\p{Zl}\p{Zp}])[\p{Mn}\p{Me}\x{09BE}\x{09D7}\x{0B3E}\x{0B57}\x{0BBE}\x{0BD7}\x{0CC2}\x{0CD5}\x{0CD6}\x{0D3E}\x{0D57}\x{0DCF}\x{0DDF}\x{200C}\x{200D}\x{1D165}\x{1D16E}-\x{1D172}]*|[\p{Cc}\p{Cf}\p{Zl}\p{Zp}])";
19
20
    /**
21
     * Bom => Byte-Length
22
     *
23
     * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
24
     *
25
     * @var array<string, int>
26
     */
27
    private static $BOM = [
28
        "\xef\xbb\xbf"     => 3, // UTF-8 BOM
29
        ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
30
        "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
31
        '  þÿ'             => 6, // UTF-32 (BE) BOM as "WINDOWS-1252"
32
        "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
33
        'ÿþ  '             => 6, // UTF-32 (LE) BOM as "WINDOWS-1252"
34
        "\xfe\xff"         => 2, // UTF-16 (BE) BOM
35
        'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
36
        "\xff\xfe"         => 2, // UTF-16 (LE) BOM
37
        'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
38
    ];
39
40
    /**
41
     * Numeric code point => UTF-8 Character
42
     *
43
     * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
44
     *
45
     * @var array<int, string>
46
     */
47
    private static $WHITESPACE = [
48
        // NULL Byte
49
        0 => "\x0",
50
        // Tab
51
        9 => "\x9",
52
        // New Line
53
        10 => "\xa",
54
        // Vertical Tab
55
        11 => "\xb",
56
        // Carriage Return
57
        13 => "\xd",
58
        // Ordinary Space
59
        32 => "\x20",
60
        // NO-BREAK SPACE
61
        160 => "\xc2\xa0",
62
        // OGHAM SPACE MARK
63
        5760 => "\xe1\x9a\x80",
64
        // MONGOLIAN VOWEL SEPARATOR
65
        6158 => "\xe1\xa0\x8e",
66
        // EN QUAD
67
        8192 => "\xe2\x80\x80",
68
        // EM QUAD
69
        8193 => "\xe2\x80\x81",
70
        // EN SPACE
71
        8194 => "\xe2\x80\x82",
72
        // EM SPACE
73
        8195 => "\xe2\x80\x83",
74
        // THREE-PER-EM SPACE
75
        8196 => "\xe2\x80\x84",
76
        // FOUR-PER-EM SPACE
77
        8197 => "\xe2\x80\x85",
78
        // SIX-PER-EM SPACE
79
        8198 => "\xe2\x80\x86",
80
        // FIGURE SPACE
81
        8199 => "\xe2\x80\x87",
82
        // PUNCTUATION SPACE
83
        8200 => "\xe2\x80\x88",
84
        // THIN SPACE
85
        8201 => "\xe2\x80\x89",
86
        // HAIR SPACE
87
        8202 => "\xe2\x80\x8a",
88
        // LINE SEPARATOR
89
        8232 => "\xe2\x80\xa8",
90
        // PARAGRAPH SEPARATOR
91
        8233 => "\xe2\x80\xa9",
92
        // NARROW NO-BREAK SPACE
93
        8239 => "\xe2\x80\xaf",
94
        // MEDIUM MATHEMATICAL SPACE
95
        8287 => "\xe2\x81\x9f",
96
        // HALFWIDTH HANGUL FILLER
97
        65440 => "\xef\xbe\xa0",
98
        // IDEOGRAPHIC SPACE
99
        12288 => "\xe3\x80\x80",
100
    ];
101
102
    /**
103
     * @var array<string, string>
104
     */
105
    private static $WHITESPACE_TABLE = [
106
        'SPACE'                     => "\x20",
107
        'NO-BREAK SPACE'            => "\xc2\xa0",
108
        'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
109
        'EN QUAD'                   => "\xe2\x80\x80",
110
        'EM QUAD'                   => "\xe2\x80\x81",
111
        'EN SPACE'                  => "\xe2\x80\x82",
112
        'EM SPACE'                  => "\xe2\x80\x83",
113
        'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
114
        'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
115
        'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
116
        'FIGURE SPACE'              => "\xe2\x80\x87",
117
        'PUNCTUATION SPACE'         => "\xe2\x80\x88",
118
        'THIN SPACE'                => "\xe2\x80\x89",
119
        'HAIR SPACE'                => "\xe2\x80\x8a",
120
        'LINE SEPARATOR'            => "\xe2\x80\xa8",
121
        'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
122
        'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
123
        'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
124
        'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
125
        'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
126
        'HALFWIDTH HANGUL FILLER'   => "\xef\xbe\xa0",
127
    ];
128
129
    /**
130
     * @var array
131
     *
132
     * @phpstan-var array{upper: string[], lower: string[]}
133
     */
134
    private static $COMMON_CASE_FOLD = [
135
        'upper' => [
136
            'µ',
137
            'ſ',
138
            "\xCD\x85",
139
            'ς',
140
            'ẞ',
141
            "\xCF\x90",
142
            "\xCF\x91",
143
            "\xCF\x95",
144
            "\xCF\x96",
145
            "\xCF\xB0",
146
            "\xCF\xB1",
147
            "\xCF\xB5",
148
            "\xE1\xBA\x9B",
149
            "\xE1\xBE\xBE",
150
        ],
151
        'lower' => [
152
            'μ',
153
            's',
154
            'ι',
155
            'σ',
156
            'ß',
157
            'β',
158
            'θ',
159
            'φ',
160
            'π',
161
            'κ',
162
            'ρ',
163
            'ε',
164
            "\xE1\xB9\xA1",
165
            'ι',
166
        ],
167
    ];
168
169
    /**
170
     * @var array
171
     *
172
     * @phpstan-var array<string, mixed>
173
     */
174
    private static $SUPPORT = [];
175
176
    /**
177
     * @var string[]|null
178
     *
179
     * @phpstan-var array<string, string>|null
180
     */
181
    private static $BROKEN_UTF8_FIX;
182
183
    /**
184
     * @var string[]|null
185
     *
186
     * @phpstan-var array<int, string>|null
187
     */
188
    private static $WIN1252_TO_UTF8;
189
190
    /**
191
     * @var string[]|null
192
     *
193
     * @phpstan-var array<int ,string>|null
194
     */
195
    private static $INTL_TRANSLITERATOR_LIST;
196
197
    /**
198
     * @var string[]|null
199
     *
200
     * @phpstan-var array<string>|null
201
     */
202
    private static $ENCODINGS;
203
204
    /**
205
     * @var int[]|null
206
     *
207
     * @phpstan-var array<string ,int>|null
208
     */
209
    private static $ORD;
210
211
    /**
212
     * @var string[]|null
213
     *
214
     * @phpstan-var array<string, string>|null
215
     */
216
    private static $EMOJI;
217
218
    /**
219
     * @var string[]|null
220
     *
221
     * @phpstan-var array<string>|null
222
     */
223
    private static $EMOJI_VALUES_CACHE;
224
225
    /**
226
     * @var string[]|null
227
     *
228
     * @phpstan-var array<string>|null
229
     */
230
    private static $EMOJI_KEYS_CACHE;
231
232
    /**
233
     * @var string[]|null
234
     *
235
     * @phpstan-var array<string>|null
236
     */
237
    private static $EMOJI_KEYS_REVERSIBLE_CACHE;
238
239
    /**
240
     * @var string[]|null
241
     *
242
     * @phpstan-var array<int, string>|null
243
     */
244
    private static $CHR;
245
246
    /**
247
     * __construct()
248
     */
249 34
    public function __construct()
250
    {
251 34
    }
252
253
    /**
254
     * Return the character at the specified position: $str[1] like functionality.
255
     *
256
     * EXAMPLE: <code>UTF8::access('fòô', 1); // 'ò'</code>
257
     *
258
     * @param string $str      <p>A UTF-8 string.</p>
259
     * @param int    $pos      <p>The position of character to return.</p>
260
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
261
     *
262
     * @psalm-pure
263
     *
264
     * @return string
265
     *                <p>Single multi-byte character.</p>
266
     */
267 3
    public static function access(string $str, int $pos, string $encoding = 'UTF-8'): string
268
    {
269 3
        if ($str === '' || $pos < 0) {
270 2
            return '';
271
        }
272
273 3
        if ($encoding === 'UTF-8') {
274 3
            return (string) \mb_substr($str, $pos, 1);
275
        }
276
277
        return (string) self::substr($str, $pos, 1, $encoding);
278
    }
279
280
    /**
281
     * Prepends UTF-8 BOM character to the string and returns the whole string.
282
     *
283
     * INFO: If BOM already existed there, the Input string is returned.
284
     *
285
     * EXAMPLE: <code>UTF8::add_bom_to_string('fòô'); // "\xEF\xBB\xBF" . 'fòô'</code>
286
     *
287
     * @param string $str <p>The input string.</p>
288
     *
289
     * @psalm-pure
290
     *
291
     * @return string
292
     *                <p>The output string that contains BOM.</p>
293
     */
294 2
    public static function add_bom_to_string(string $str): string
295
    {
296 2
        if (!self::string_has_bom($str)) {
297 2
            $str = self::bom() . $str;
298
        }
299
300 2
        return $str;
301
    }
302
303
    /**
304
     * Changes all keys in an array.
305
     *
306
     * @param array<string, mixed> $array    <p>The array to work on</p>
307
     * @param int                  $case     [optional] <p> Either <strong>CASE_UPPER</strong><br>
308
     *                                       or <strong>CASE_LOWER</strong> (default)</p>
309
     * @param string               $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
310
     *
311
     * @psalm-pure
312
     *
313
     * @return string[]
314
     *                  <p>An array with its keys lower- or uppercased.</p>
315
     */
316 2
    public static function array_change_key_case(
317
        array $array,
318
        int $case = \CASE_LOWER,
319
        string $encoding = 'UTF-8'
320
    ): array {
321
        if (
322 2
            $case !== \CASE_LOWER
323
            &&
324 2
            $case !== \CASE_UPPER
325
        ) {
326
            $case = \CASE_LOWER;
327
        }
328
329 2
        $return = [];
330 2
        foreach ($array as $key => &$value) {
331 2
            $key = $case === \CASE_LOWER
332 2
                ? self::strtolower((string) $key, $encoding)
333 2
                : self::strtoupper((string) $key, $encoding);
334
335 2
            $return[$key] = $value;
336
        }
337
338 2
        return $return;
339
    }
340
341
    /**
342
     * Returns the substring between $start and $end, if found, or an empty
343
     * string. An optional offset may be supplied from which to begin the
344
     * search for the start string.
345
     *
346
     * @param string $str
347
     * @param string $start    <p>Delimiter marking the start of the substring.</p>
348
     * @param string $end      <p>Delimiter marking the end of the substring.</p>
349
     * @param int    $offset   [optional] <p>Index from which to begin the search. Default: 0</p>
350
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
351
     *
352
     * @psalm-pure
353
     *
354
     * @return string
355
     */
356 16
    public static function between(
357
        string $str,
358
        string $start,
359
        string $end,
360
        int $offset = 0,
361
        string $encoding = 'UTF-8'
362
    ): string {
363 16
        if ($encoding === 'UTF-8') {
364 8
            $start_position = \mb_strpos($str, $start, $offset);
365 8
            if ($start_position === false) {
366 1
                return '';
367
            }
368
369 7
            $substr_index = $start_position + (int) \mb_strlen($start);
370 7
            $end_position = \mb_strpos($str, $end, $substr_index);
371
            if (
372 7
                $end_position === false
373
                ||
374 7
                $end_position === $substr_index
375
            ) {
376 2
                return '';
377
            }
378
379 5
            return (string) \mb_substr($str, $substr_index, $end_position - $substr_index);
380
        }
381
382 8
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
383
384 8
        $start_position = self::strpos($str, $start, $offset, $encoding);
385 8
        if ($start_position === false) {
386 1
            return '';
387
        }
388
389 7
        $substr_index = $start_position + (int) self::strlen($start, $encoding);
390 7
        $end_position = self::strpos($str, $end, $substr_index, $encoding);
391
        if (
392 7
            $end_position === false
393
            ||
394 7
            $end_position === $substr_index
395
        ) {
396 2
            return '';
397
        }
398
399 5
        return (string) self::substr(
400 5
            $str,
401 5
            $substr_index,
402 5
            $end_position - $substr_index,
403 5
            $encoding
404
        );
405
    }
406
407
    /**
408
     * Convert binary into a string.
409
     *
410
     * INFO: opposite to UTF8::str_to_binary()
411
     *
412
     * EXAMPLE: <code>UTF8::binary_to_str('11110000100111111001100010000011'); // '😃'</code>
413
     *
414
     * @param string $bin 1|0
415
     *
416
     * @psalm-pure
417
     *
418
     * @return string
419
     */
420 2
    public static function binary_to_str($bin): string
421
    {
422 2
        if (!isset($bin[0])) {
423
            return '';
424
        }
425
426 2
        $convert = \base_convert($bin, 2, 16);
427 2
        if ($convert === '0') {
428 1
            return '';
429
        }
430
431 2
        return \pack('H*', $convert);
432
    }
433
434
    /**
435
     * Returns the UTF-8 Byte Order Mark Character.
436
     *
437
     * INFO: take a look at UTF8::$bom for e.g. UTF-16 and UTF-32 BOM values
438
     *
439
     * EXAMPLE: <code>UTF8::bom(); // "\xEF\xBB\xBF"</code>
440
     *
441
     * @psalm-pure
442
     *
443
     * @return string
444
     *                <p>UTF-8 Byte Order Mark.</p>
445
     */
446 4
    public static function bom(): string
447
    {
448 4
        return "\xef\xbb\xbf";
449
    }
450
451
    /**
452
     * @alias of UTF8::chr_map()
453
     *
454
     * @param callable $callback
455
     * @param string   $str
456
     *
457
     * @psalm-pure
458
     *
459
     * @return string[]
460
     *
461
     * @see   UTF8::chr_map()
462
     */
463 2
    public static function callback($callback, string $str): array
464
    {
465 2
        return self::chr_map($callback, $str);
466
    }
467
468
    /**
469
     * Returns the character at $index, with indexes starting at 0.
470
     *
471
     * @param string $str      <p>The input string.</p>
472
     * @param int    $index    <p>Position of the character.</p>
473
     * @param string $encoding [optional] <p>Default is UTF-8</p>
474
     *
475
     * @psalm-pure
476
     *
477
     * @return string
478
     *                <p>The character at $index.</p>
479
     */
480 9
    public static function char_at(string $str, int $index, string $encoding = 'UTF-8'): string
481
    {
482 9
        if ($encoding === 'UTF-8') {
483 5
            return (string) \mb_substr($str, $index, 1);
484
        }
485
486 4
        return (string) self::substr($str, $index, 1, $encoding);
487
    }
488
489
    /**
490
     * Returns an array consisting of the characters in the string.
491
     *
492
     * @param string $str <p>The input string.</p>
493
     *
494
     * @psalm-pure
495
     *
496
     * @return string[]
497
     *                  <p>An array of chars.</p>
498
     */
499 4
    public static function chars(string $str): array
500
    {
501
        /** @var string[] */
502 4
        return self::str_split($str);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::str_split($str) returns an array which contains values of type array|string[] which are incompatible with the documented value type string.
Loading history...
503
    }
504
505
    /**
506
     * This method will auto-detect your server environment for UTF-8 support.
507
     *
508
     * @return true|null
509
     *
510
     * @internal <p>You don't need to run it manually, it will be triggered if it's needed.</p>
511
     */
512 5
    public static function checkForSupport()
513
    {
514 5
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
515
            self::$SUPPORT['already_checked_via_portable_utf8'] = true;
516
517
            // http://php.net/manual/en/book.mbstring.php
518
            self::$SUPPORT['mbstring'] = self::mbstring_loaded();
519
520
            self::$SUPPORT['mbstring_func_overload'] = self::mbstring_overloaded();
521
            if (self::$SUPPORT['mbstring'] === true) {
522
                \mb_internal_encoding('UTF-8');
523
                /** @noinspection UnusedFunctionResultInspection */
524
                /** @noinspection PhpComposerExtensionStubsInspection */
525
                \mb_regex_encoding('UTF-8');
526
                self::$SUPPORT['mbstring_internal_encoding'] = 'UTF-8';
527
            }
528
529
            // http://php.net/manual/en/book.iconv.php
530
            self::$SUPPORT['iconv'] = self::iconv_loaded();
531
532
            // http://php.net/manual/en/book.intl.php
533
            self::$SUPPORT['intl'] = self::intl_loaded();
534
535
            // http://php.net/manual/en/class.intlchar.php
536
            self::$SUPPORT['intlChar'] = self::intlChar_loaded();
537
538
            // http://php.net/manual/en/book.ctype.php
539
            self::$SUPPORT['ctype'] = self::ctype_loaded();
540
541
            // http://php.net/manual/en/class.finfo.php
542
            self::$SUPPORT['finfo'] = self::finfo_loaded();
543
544
            // http://php.net/manual/en/book.json.php
545
            self::$SUPPORT['json'] = self::json_loaded();
546
547
            // http://php.net/manual/en/book.pcre.php
548
            self::$SUPPORT['pcre_utf8'] = self::pcre_utf8_support();
549
550
            self::$SUPPORT['symfony_polyfill_used'] = self::symfony_polyfill_used();
551
            if (self::$SUPPORT['symfony_polyfill_used'] === true) {
552
                \mb_internal_encoding('UTF-8');
553
                self::$SUPPORT['mbstring_internal_encoding'] = 'UTF-8';
554
            }
555
556
            return true;
557
        }
558
559 5
        return null;
560
    }
561
562
    /**
563
     * Generates a UTF-8 encoded character from the given code point.
564
     *
565
     * INFO: opposite to UTF8::ord()
566
     *
567
     * EXAMPLE: <code>UTF8::chr(0x2603); // '☃'</code>
568
     *
569
     * @param int    $code_point <p>The code point for which to generate a character.</p>
570
     * @param string $encoding   [optional] <p>Default is UTF-8</p>
571
     *
572
     * @psalm-pure
573
     *
574
     * @return string|null
575
     *                     <p>Multi-byte character, returns null on failure or empty input.</p>
576
     */
577 21
    public static function chr($code_point, string $encoding = 'UTF-8')
578
    {
579
        // init
580
        /**
581
         * @psalm-suppress ImpureStaticVariable
582
         *
583
         * @var array<string,string>
584
         */
585 21
        static $CHAR_CACHE = [];
586
587 21
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
588 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
589
        }
590
591
        /** @noinspection InArrayCanBeUsedInspection */
592
        if (
593 21
            $encoding !== 'UTF-8'
594
            &&
595 21
            $encoding !== 'ISO-8859-1'
596
            &&
597 21
            $encoding !== 'WINDOWS-1252'
598
            &&
599 21
            self::$SUPPORT['mbstring'] === false
600
        ) {
601
            /**
602
             * @psalm-suppress ImpureFunctionCall - is is only a warning
603
             */
604
            \trigger_error('UTF8::chr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
605
        }
606
607 21
        if (!\is_int($code_point) || $code_point <= 0) {
0 ignored issues
show
introduced by
The condition is_int($code_point) is always true.
Loading history...
608 5
            return null;
609
        }
610
611 21
        $cache_key = $code_point . '_' . $encoding;
612 21
        if (isset($CHAR_CACHE[$cache_key])) {
613 19
            return $CHAR_CACHE[$cache_key];
614
        }
615
616 10
        if ($code_point <= 0x80) { // only for "simple"-chars
617
618 9
            if (self::$CHR === null) {
619
                self::$CHR = self::getData('chr');
620
            }
621
622
            /**
623
             * @psalm-suppress PossiblyNullArrayAccess
624
             */
625 9
            $chr = self::$CHR[$code_point];
626
627 9
            if ($encoding !== 'UTF-8') {
628 1
                $chr = self::encode($encoding, $chr);
629
            }
630
631 9
            return $CHAR_CACHE[$cache_key] = $chr;
632
        }
633
634
        //
635
        // fallback via "IntlChar"
636
        //
637
638 6
        if (self::$SUPPORT['intlChar'] === true) {
639
            /** @noinspection PhpComposerExtensionStubsInspection */
640 6
            $chr = \IntlChar::chr($code_point);
641
642 6
            if ($encoding !== 'UTF-8') {
643
                $chr = self::encode($encoding, $chr);
644
            }
645
646 6
            return $CHAR_CACHE[$cache_key] = $chr;
647
        }
648
649
        //
650
        // fallback via vanilla php
651
        //
652
653
        if (self::$CHR === null) {
654
            self::$CHR = self::getData('chr');
655
        }
656
657
        $code_point = (int) $code_point;
658
        if ($code_point <= 0x7FF) {
659
            /**
660
             * @psalm-suppress PossiblyNullArrayAccess
661
             */
662
            $chr = self::$CHR[($code_point >> 6) + 0xC0] .
663
                   self::$CHR[($code_point & 0x3F) + 0x80];
664
        } elseif ($code_point <= 0xFFFF) {
665
            /**
666
             * @psalm-suppress PossiblyNullArrayAccess
667
             */
668
            $chr = self::$CHR[($code_point >> 12) + 0xE0] .
669
                   self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
670
                   self::$CHR[($code_point & 0x3F) + 0x80];
671
        } else {
672
            /**
673
             * @psalm-suppress PossiblyNullArrayAccess
674
             */
675
            $chr = self::$CHR[($code_point >> 18) + 0xF0] .
676
                   self::$CHR[(($code_point >> 12) & 0x3F) + 0x80] .
677
                   self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
678
                   self::$CHR[($code_point & 0x3F) + 0x80];
679
        }
680
681
        if ($encoding !== 'UTF-8') {
682
            $chr = self::encode($encoding, $chr);
683
        }
684
685
        return $CHAR_CACHE[$cache_key] = $chr;
686
    }
687
688
    /**
689
     * Applies callback to all characters of a string.
690
     *
691
     * EXAMPLE: <code>UTF8::chr_map([UTF8::class, 'strtolower'], 'Κόσμε'); // ['κ','ό', 'σ', 'μ', 'ε']</code>
692
     *
693
     * @param callable $callback <p>The callback function.</p>
694
     * @param string   $str      <p>UTF-8 string to run callback on.</p>
695
     *
696
     * @psalm-pure
697
     *
698
     * @return string[]
699
     *                  <p>The outcome of the callback, as array.</p>
700
     */
701 2
    public static function chr_map($callback, string $str): array
702
    {
703 2
        return \array_map(
704 2
            $callback,
705 2
            self::str_split($str)
706
        );
707
    }
708
709
    /**
710
     * Generates an array of byte length of each character of a Unicode string.
711
     *
712
     * 1 byte => U+0000  - U+007F
713
     * 2 byte => U+0080  - U+07FF
714
     * 3 byte => U+0800  - U+FFFF
715
     * 4 byte => U+10000 - U+10FFFF
716
     *
717
     * EXAMPLE: <code>UTF8::chr_size_list('中文空白-test'); // [3, 3, 3, 3, 1, 1, 1, 1, 1]</code>
718
     *
719
     * @param string $str <p>The original unicode string.</p>
720
     *
721
     * @psalm-pure
722
     *
723
     * @return int[]
724
     *               <p>An array of byte lengths of each character.</p>
725
     */
726 4
    public static function chr_size_list(string $str): array
727
    {
728 4
        if ($str === '') {
729 4
            return [];
730
        }
731
732 4
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
733
            return \array_map(
734
                static function (string $data): int {
735
                    // "mb_" is available if overload is used, so use it ...
736
                    return \mb_strlen($data, 'CP850'); // 8-BIT
737
                },
738
                self::str_split($str)
739
            );
740
        }
741
742 4
        return \array_map('\strlen', self::str_split($str));
743
    }
744
745
    /**
746
     * Get a decimal code representation of a specific character.
747
     *
748
     * INFO: opposite to UTF8::decimal_to_chr()
749
     *
750
     * EXAMPLE: <code>UTF8::chr_to_decimal('§'); // 0xa7</code>
751
     *
752
     * @param string $char <p>The input character.</p>
753
     *
754
     * @psalm-pure
755
     *
756
     * @return int
757
     */
758 5
    public static function chr_to_decimal(string $char): int
759
    {
760 5
        if (self::$SUPPORT['iconv'] === true) {
761 5
            $chr_tmp = \iconv('UTF-8', 'UCS-4LE', $char);
762 5
            if ($chr_tmp !== false) {
763
                /** @noinspection OffsetOperationsInspection */
764 5
                return \unpack('V', $chr_tmp)[1];
765
            }
766
        }
767
768
        $code = self::ord($char[0]);
769
        $bytes = 1;
770
771
        if (!($code & 0x80)) {
772
            // 0xxxxxxx
773
            return $code;
774
        }
775
776
        if (($code & 0xe0) === 0xc0) {
777
            // 110xxxxx
778
            $bytes = 2;
779
            $code &= ~0xc0;
780
        } elseif (($code & 0xf0) === 0xe0) {
781
            // 1110xxxx
782
            $bytes = 3;
783
            $code &= ~0xe0;
784
        } elseif (($code & 0xf8) === 0xf0) {
785
            // 11110xxx
786
            $bytes = 4;
787
            $code &= ~0xf0;
788
        }
789
790
        for ($i = 2; $i <= $bytes; ++$i) {
791
            // 10xxxxxx
792
            $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
793
        }
794
795
        return $code;
796
    }
797
798
    /**
799
     * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
800
     *
801
     * EXAMPLE: <code>UTF8::chr_to_hex('§'); // U+00a7</code>
802
     *
803
     * @param int|string $char   <p>The input character</p>
804
     * @param string     $prefix [optional]
805
     *
806
     * @psalm-pure
807
     *
808
     * @return string
809
     *                <p>The code point encoded as U+xxxx.</p>
810
     */
811 2
    public static function chr_to_hex($char, string $prefix = 'U+'): string
812
    {
813 2
        if ($char === '') {
814 2
            return '';
815
        }
816
817 2
        if ($char === '&#0;') {
818 2
            $char = '';
819
        }
820
821 2
        return self::int_to_hex(self::ord((string) $char), $prefix);
822
    }
823
824
    /**
825
     * alias for "UTF8::chr_to_decimal()"
826
     *
827
     * @param string $chr
828
     *
829
     * @psalm-pure
830
     *
831
     * @return int
832
     *
833
     * @see        UTF8::chr_to_decimal()
834
     * @deprecated <p>please use "UTF8::chr_to_decimal()"</p>
835
     */
836 2
    public static function chr_to_int(string $chr): int
837
    {
838 2
        return self::chr_to_decimal($chr);
839
    }
840
841
    /**
842
     * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
843
     *
844
     * EXAMPLE: <code>UTF8::chunk_split('ABC-ÖÄÜ-中文空白-κόσμε', 3); // "ABC\r\n-ÖÄ\r\nÜ-中\r\n文空白\r\n-κό\r\nσμε"</code>
845
     *
846
     * @param string $body         <p>The original string to be split.</p>
847
     * @param int    $chunk_length [optional] <p>The maximum character length of a chunk.</p>
848
     * @param string $end          [optional] <p>The character(s) to be inserted at the end of each chunk.</p>
849
     *
850
     * @psalm-pure
851
     *
852
     * @return string
853
     *                <p>The chunked string.</p>
854
     */
855 4
    public static function chunk_split(string $body, int $chunk_length = 76, string $end = "\r\n"): string
856
    {
857 4
        return \implode($end, self::str_split($body, $chunk_length));
858
    }
859
860
    /**
861
     * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
862
     *
863
     * EXAMPLE: <code>UTF8::clean("\xEF\xBB\xBF„Abcdef\xc2\xa0\x20…” — 😃 - Düsseldorf", true, true); // '„Abcdef  …” — 😃 - Düsseldorf'</code>
864
     *
865
     * @param string $str                                     <p>The string to be sanitized.</p>
866
     * @param bool   $remove_bom                              [optional] <p>Set to true, if you need to remove
867
     *                                                        UTF-BOM.</p>
868
     * @param bool   $normalize_whitespace                    [optional] <p>Set to true, if you need to normalize the
869
     *                                                        whitespace.</p>
870
     * @param bool   $normalize_msword                        [optional] <p>Set to true, if you need to normalize MS
871
     *                                                        Word chars e.g.: "…"
872
     *                                                        => "..."</p>
873
     * @param bool   $keep_non_breaking_space                 [optional] <p>Set to true, to keep non-breaking-spaces,
874
     *                                                        in
875
     *                                                        combination with
876
     *                                                        $normalize_whitespace</p>
877
     * @param bool   $replace_diamond_question_mark           [optional] <p>Set to true, if you need to remove diamond
878
     *                                                        question mark e.g.: "�"</p>
879
     * @param bool   $remove_invisible_characters             [optional] <p>Set to false, if you not want to remove
880
     *                                                        invisible characters e.g.: "\0"</p>
881
     * @param bool   $remove_invisible_characters_url_encoded [optional] <p>Set to true, if you not want to remove
882
     *                                                        invisible url encoded characters e.g.: "%0B"<br> WARNING:
883
     *                                                        maybe contains false-positives e.g. aa%0Baa -> aaaa.
884
     *                                                        </p>
885
     *
886
     * @psalm-pure
887
     *
888
     * @return string
889
     *                <p>An clean UTF-8 encoded string.</p>
890
     *
891
     * @noinspection PhpTooManyParametersInspection
892
     */
893 90
    public static function clean(
894
        string $str,
895
        bool $remove_bom = false,
896
        bool $normalize_whitespace = false,
897
        bool $normalize_msword = false,
898
        bool $keep_non_breaking_space = false,
899
        bool $replace_diamond_question_mark = false,
900
        bool $remove_invisible_characters = true,
901
        bool $remove_invisible_characters_url_encoded = false
902
    ): string {
903
        // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
904
        // caused connection reset problem on larger strings
905
906 90
        $regex = '/
907
          (
908
            (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
909
            |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
910
            |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
911
            |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
912
            ){1,100}                      # ...one or more times
913
          )
914
        | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
915
        | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
916
        /x';
917
        /** @noinspection NotOptimalRegularExpressionsInspection */
918 90
        $str = (string) \preg_replace($regex, '$1', $str);
919
920 90
        if ($replace_diamond_question_mark) {
921 33
            $str = self::replace_diamond_question_mark($str);
922
        }
923
924 90
        if ($remove_invisible_characters) {
925 90
            $str = self::remove_invisible_characters($str, $remove_invisible_characters_url_encoded);
926
        }
927
928 90
        if ($normalize_whitespace) {
929 37
            $str = self::normalize_whitespace($str, $keep_non_breaking_space);
930
        }
931
932 90
        if ($normalize_msword) {
933 4
            $str = self::normalize_msword($str);
934
        }
935
936 90
        if ($remove_bom) {
937 37
            $str = self::remove_bom($str);
938
        }
939
940 90
        return $str;
941
    }
942
943
    /**
944
     * Clean-up a string and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
945
     *
946
     * EXAMPLE: <code>UTF8::cleanup("\xEF\xBB\xBF„Abcdef\xc2\xa0\x20…” — 😃 - Düsseldorf", true, true); // '„Abcdef  …” — 😃 - Düsseldorf'</code>
947
     *
948
     * @param string $str <p>The input string.</p>
949
     *
950
     * @psalm-pure
951
     *
952
     * @return string
953
     */
954 33
    public static function cleanup($str): string
955
    {
956
        // init
957 33
        $str = (string) $str;
958
959 33
        if ($str === '') {
960 5
            return '';
961
        }
962
963
        // fixed ISO <-> UTF-8 Errors
964 33
        $str = self::fix_simple_utf8($str);
965
966
        // remove all none UTF-8 symbols
967
        // && remove diamond question mark (�)
968
        // && remove remove invisible characters (e.g. "\0")
969
        // && remove BOM
970
        // && normalize whitespace chars (but keep non-breaking-spaces)
971 33
        return self::clean(
972 33
            $str,
973 33
            true,
974 33
            true,
975 33
            false,
976 33
            true,
977 33
            true
978
        );
979
    }
980
981
    /**
982
     * Accepts a string or a array of strings and returns an array of Unicode code points.
983
     *
984
     * INFO: opposite to UTF8::string()
985
     *
986
     * EXAMPLE: <code>
987
     * UTF8::codepoints('κöñ'); // array(954, 246, 241)
988
     * // ... OR ...
989
     * UTF8::codepoints('κöñ', true); // array('U+03ba', 'U+00f6', 'U+00f1')
990
     * </code>
991
     *
992
     * @param string|string[] $arg         <p>A UTF-8 encoded string or an array of such strings.</p>
993
     * @param bool            $use_u_style <p>If True, will return code points in U+xxxx format,
994
     *                                     default, code points will be returned as integers.</p>
995
     *
996
     * @psalm-pure
997
     *
998
     * @return int[]|string[]
999
     *                        <p>
1000
     *                        The array of code points:<br>
1001
     *                        int[] for $u_style === false<br>
1002
     *                        string[] for $u_style === true<br>
1003
     *                        </p>
1004
     */
1005 12
    public static function codepoints($arg, bool $use_u_style = false): array
1006
    {
1007 12
        if (\is_string($arg)) {
1008 12
            $arg = self::str_split($arg);
1009
        }
1010
1011
        /**
1012
         * @psalm-suppress DocblockTypeContradiction
1013
         */
1014 12
        if (!\is_array($arg)) {
0 ignored issues
show
introduced by
The condition is_array($arg) is always true.
Loading history...
1015 4
            return [];
1016
        }
1017
1018 12
        if ($arg === []) {
1019 7
            return [];
1020
        }
1021
1022 11
        $arg = \array_map(
1023
            [
1024 11
                self::class,
1025
                'ord',
1026
            ],
1027 11
            $arg
1028
        );
1029
1030 11
        if ($use_u_style) {
1031 2
            $arg = \array_map(
1032
                [
1033 2
                    self::class,
1034
                    'int_to_hex',
1035
                ],
1036 2
                $arg
1037
            );
1038
        }
1039
1040 11
        return $arg;
1041
    }
1042
1043
    /**
1044
     * Trims the string and replaces consecutive whitespace characters with a
1045
     * single space. This includes tabs and newline characters, as well as
1046
     * multibyte whitespace such as the thin space and ideographic space.
1047
     *
1048
     * @param string $str <p>The input string.</p>
1049
     *
1050
     * @psalm-pure
1051
     *
1052
     * @return string
1053
     *                <p>A string with trimmed $str and condensed whitespace.</p>
1054
     */
1055 13
    public static function collapse_whitespace(string $str): string
1056
    {
1057 13
        if (self::$SUPPORT['mbstring'] === true) {
1058
            /** @noinspection PhpComposerExtensionStubsInspection */
1059 13
            return \trim((string) \mb_ereg_replace('[[:space:]]+', ' ', $str));
1060
        }
1061
1062
        return \trim(self::regex_replace($str, '[[:space:]]+', ' '));
1063
    }
1064
1065
    /**
1066
     * Returns count of characters used in a string.
1067
     *
1068
     * EXAMPLE: <code>UTF8::count_chars('κaκbκc'); // array('κ' => 3, 'a' => 1, 'b' => 1, 'c' => 1)</code>
1069
     *
1070
     * @param string $str                     <p>The input string.</p>
1071
     * @param bool   $clean_utf8              [optional] <p>Remove non UTF-8 chars from the string.</p>
1072
     * @param bool   $try_to_use_mb_functions [optional] <p>Set to false, if you don't want to use
1073
     *
1074
     * @psalm-pure
1075
     *
1076
     * @return int[]
1077
     *               <p>An associative array of Character as keys and
1078
     *               their count as values.</p>
1079
     */
1080 19
    public static function count_chars(
1081
        string $str,
1082
        bool $clean_utf8 = false,
1083
        bool $try_to_use_mb_functions = true
1084
    ): array {
1085 19
        return \array_count_values(
1086 19
            self::str_split(
1087 19
                $str,
1088 19
                1,
1089 19
                $clean_utf8,
1090 19
                $try_to_use_mb_functions
1091
            )
1092
        );
1093
    }
1094
1095
    /**
1096
     * Create a valid CSS identifier for e.g. "class"- or "id"-attributes.
1097
     *
1098
     * EXAMPLE: <code>UTF8::css_identifier('123foo/bar!!!'); // _23foo-bar</code>
1099
     *
1100
     * copy&past from https://github.com/drupal/core/blob/8.8.x/lib/Drupal/Component/Utility/Html.php#L95
1101
     *
1102
     * @param string   $str        <p>INFO: if no identifier is given e.g. " " or "", we will create a unique string automatically</p>
1103
     * @param string[] $filter
1104
     * @param bool     $strip_tags
1105
     * @param bool     $strtolower
1106
     *
1107
     * @psalm-pure
1108
     *
1109
     * @return string
1110
     *
1111
     * @phpstan-param array<string,string> $filter
1112
     */
1113 1
    public static function css_identifier(
1114
        string $str = '',
1115
        array $filter = [
1116
            ' ' => '-',
1117
            '/' => '-',
1118
            '[' => '',
1119
            ']' => '',
1120
        ],
1121
        bool $strip_tags = false,
1122
        bool $strtolower = true
1123
    ): string {
1124
        // We could also use strtr() here but its much slower than str_replace(). In
1125
        // order to keep '__' to stay '__' we first replace it with a different
1126
        // placeholder after checking that it is not defined as a filter.
1127 1
        $double_underscore_replacements = 0;
1128
1129
        // Fallback ...
1130 1
        if (\trim($str) === '') {
1131 1
            $str = \uniqid('auto-generated-css-class', true);
1132
        } else {
1133 1
            $str = self::clean($str);
1134
        }
1135
1136 1
        if ($strip_tags) {
1137
            $str = \strip_tags($str);
1138
        }
1139
1140 1
        if ($strtolower) {
1141 1
            $str = \strtolower($str);
1142
        }
1143
1144 1
        if (!isset($filter['__'])) {
1145 1
            $str = \str_replace('__', '##', $str, $double_underscore_replacements);
1146
        }
1147
1148
        /* @noinspection ArrayValuesMissUseInspection */
1149 1
        $str = \str_replace(\array_keys($filter), \array_values($filter), $str);
1150
        // Replace temporary placeholder '##' with '__' only if the original
1151
        // $identifier contained '__'.
1152 1
        if ($double_underscore_replacements > 0) {
1153
            $str = \str_replace('##', '__', $str);
1154
        }
1155
1156
        // Valid characters in a CSS identifier are:
1157
        // - the hyphen (U+002D)
1158
        // - a-z (U+0030 - U+0039)
1159
        // - A-Z (U+0041 - U+005A)
1160
        // - the underscore (U+005F)
1161
        // - 0-9 (U+0061 - U+007A)
1162
        // - ISO 10646 characters U+00A1 and higher
1163
        // We strip out any character not in the above list.
1164 1
        $str = (string) \preg_replace('/[^\x{002D}\x{0030}-\x{0039}\x{0041}-\x{005A}\x{005F}\x{0061}-\x{007A}\x{00A1}-\x{FFFF}]/u', '', $str);
1165
        // Identifiers cannot start with a digit, two hyphens, or a hyphen followed by a digit.
1166 1
        $str = (string) \preg_replace(['/^[0-9]/', '/^(-[0-9])|^(--)/'], ['_', '__'], $str);
1167
1168 1
        return \trim($str, '-');
1169
    }
1170
1171
    /**
1172
     * Remove css media-queries.
1173
     *
1174
     * @param string $str
1175
     *
1176
     * @psalm-pure
1177
     *
1178
     * @return string
1179
     */
1180 1
    public static function css_stripe_media_queries(string $str): string
1181
    {
1182 1
        return (string) \preg_replace(
1183 1
            '#@media\\s+(?:only\\s)?(?:[\\s{(]|screen|all)\\s?[^{]+{.*}\\s*}\\s*#isumU',
1184 1
            '',
1185 1
            $str
1186
        );
1187
    }
1188
1189
    /**
1190
     * Checks whether ctype is available on the server.
1191
     *
1192
     * @psalm-pure
1193
     *
1194
     * @return bool
1195
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
1196
     *
1197
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
1198
     */
1199
    public static function ctype_loaded(): bool
1200
    {
1201
        return \extension_loaded('ctype');
1202
    }
1203
1204
    /**
1205
     * Converts an int value into a UTF-8 character.
1206
     *
1207
     * INFO: opposite to UTF8::string()
1208
     *
1209
     * EXAMPLE: <code>UTF8::decimal_to_chr(931); // 'Σ'</code>
1210
     *
1211
     * @param int|string $int
1212
     *
1213
     * @phpstan-param int|numeric-string $int
1214
     *
1215
     * @psalm-pure
1216
     *
1217
     * @return string
1218
     */
1219 20
    public static function decimal_to_chr($int): string
1220
    {
1221 20
        return self::html_entity_decode('&#' . $int . ';', \ENT_QUOTES | \ENT_HTML5);
1222
    }
1223
1224
    /**
1225
     * Decodes a MIME header field
1226
     *
1227
     * @param string $str
1228
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
1229
     *
1230
     * @psalm-pure
1231
     *
1232
     * @return false|string
1233
     *                      <p>A decoded MIME field on success,
1234
     *                      or false if an error occurs during the decoding.</p>
1235
     */
1236 2
    public static function decode_mimeheader($str, string $encoding = 'UTF-8')
1237
    {
1238 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
1239 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
1240
        }
1241
1242
        // always fallback via symfony polyfill
1243 2
        return \iconv_mime_decode($str, \ICONV_MIME_DECODE_CONTINUE_ON_ERROR, $encoding);
1244
    }
1245
1246
    /**
1247
     * Convert any two-letter country code (ISO 3166-1) to the corresponding Emoji.
1248
     *
1249
     * @see https://en.wikipedia.org/wiki/ISO_3166-1
1250
     *
1251
     * @param string $country_code_iso_3166_1 <p>e.g. DE</p>
1252
     *
1253
     * @return string
1254
     *                <p>Emoji or empty string on error.</p>
1255
     */
1256 1
    public static function emoji_from_country_code(string $country_code_iso_3166_1): string
1257
    {
1258 1
        if ($country_code_iso_3166_1 === '') {
1259 1
            return '';
1260
        }
1261
1262 1
        if (self::strlen($country_code_iso_3166_1) !== 2) {
1263 1
            return '';
1264
        }
1265
1266 1
        $country_code_iso_3166_1 = \strtoupper($country_code_iso_3166_1);
1267
1268 1
        $flagOffset = 0x1F1E6;
1269 1
        $asciiOffset = 0x41;
1270
1271 1
        return (self::chr((self::ord($country_code_iso_3166_1[0]) - $asciiOffset + $flagOffset)) ?? '') .
1272 1
               (self::chr((self::ord($country_code_iso_3166_1[1]) - $asciiOffset + $flagOffset)) ?? '');
1273
    }
1274
1275
    /**
1276
     * Decodes a string which was encoded by "UTF8::emoji_encode()".
1277
     *
1278
     * INFO: opposite to UTF8::emoji_encode()
1279
     *
1280
     * EXAMPLE: <code>
1281
     * UTF8::emoji_decode('foo CHARACTER_OGRE', false); // 'foo 👹'
1282
     * //
1283
     * UTF8::emoji_decode('foo _-_PORTABLE_UTF8_-_308095726_-_627590803_-_8FTU_ELBATROP_-_', true); // 'foo 👹'
1284
     * </code>
1285
     *
1286
     * @param string $str                            <p>The input string.</p>
1287
     * @param bool   $use_reversible_string_mappings [optional] <p>
1288
     *                                               When <b>TRUE</b>, we se a reversible string mapping
1289
     *                                               between "emoji_encode" and "emoji_decode".</p>
1290
     *
1291
     * @psalm-pure
1292
     *
1293
     * @return string
1294
     */
1295 9
    public static function emoji_decode(
1296
        string $str,
1297
        bool $use_reversible_string_mappings = false
1298
    ): string {
1299 9
        self::initEmojiData();
1300
1301 9
        if ($use_reversible_string_mappings) {
1302 9
            return (string) \str_replace(
1303 9
                (array) self::$EMOJI_KEYS_REVERSIBLE_CACHE,
1304 9
                (array) self::$EMOJI_VALUES_CACHE,
1305 9
                $str
1306
            );
1307
        }
1308
1309 1
        return (string) \str_replace(
1310 1
            (array) self::$EMOJI_KEYS_CACHE,
1311 1
            (array) self::$EMOJI_VALUES_CACHE,
1312 1
            $str
1313
        );
1314
    }
1315
1316
    /**
1317
     * Encode a string with emoji chars into a non-emoji string.
1318
     *
1319
     * INFO: opposite to UTF8::emoji_decode()
1320
     *
1321
     * EXAMPLE: <code>
1322
     * UTF8::emoji_encode('foo 👹', false)); // 'foo CHARACTER_OGRE'
1323
     * //
1324
     * UTF8::emoji_encode('foo 👹', true)); // 'foo _-_PORTABLE_UTF8_-_308095726_-_627590803_-_8FTU_ELBATROP_-_'
1325
     * </code>
1326
     *
1327
     * @param string $str                            <p>The input string</p>
1328
     * @param bool   $use_reversible_string_mappings [optional] <p>
1329
     *                                               when <b>TRUE</b>, we use a reversible string mapping
1330
     *                                               between "emoji_encode" and "emoji_decode"</p>
1331
     *
1332
     * @psalm-pure
1333
     *
1334
     * @return string
1335
     */
1336 12
    public static function emoji_encode(
1337
        string $str,
1338
        bool $use_reversible_string_mappings = false
1339
    ): string {
1340 12
        self::initEmojiData();
1341
1342 12
        if ($use_reversible_string_mappings) {
1343 9
            return (string) \str_replace(
1344 9
                (array) self::$EMOJI_VALUES_CACHE,
1345 9
                (array) self::$EMOJI_KEYS_REVERSIBLE_CACHE,
1346 9
                $str
1347
            );
1348
        }
1349
1350 4
        return (string) \str_replace(
1351 4
            (array) self::$EMOJI_VALUES_CACHE,
1352 4
            (array) self::$EMOJI_KEYS_CACHE,
1353 4
            $str
1354
        );
1355
    }
1356
1357
    /**
1358
     * Encode a string with a new charset-encoding.
1359
     *
1360
     * INFO:  This function will also try to fix broken / double encoding,
1361
     *        so you can call this function also on a UTF-8 string and you don't mess up the string.
1362
     *
1363
     * EXAMPLE: <code>
1364
     * UTF8::encode('ISO-8859-1', '-ABC-中文空白-'); // '-ABC-????-'
1365
     * //
1366
     * UTF8::encode('UTF-8', '-ABC-中文空白-'); // '-ABC-中文空白-'
1367
     * //
1368
     * UTF8::encode('HTML', '-ABC-中文空白-'); // '-ABC-&#20013;&#25991;&#31354;&#30333;-'
1369
     * //
1370
     * UTF8::encode('BASE64', '-ABC-中文空白-'); // 'LUFCQy3kuK3mlofnqbrnmb0t'
1371
     * </code>
1372
     *
1373
     * @param string $to_encoding                   <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.</p>
1374
     * @param string $str                           <p>The input string</p>
1375
     * @param bool   $auto_detect_the_from_encoding [optional] <p>Force the new encoding (we try to fix broken / double
1376
     *                                              encoding for UTF-8)<br> otherwise we auto-detect the current
1377
     *                                              string-encoding</p>
1378
     * @param string $from_encoding                 [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
1379
     *                                              A empty string will trigger the autodetect anyway.</p>
1380
     *
1381
     * @psalm-pure
1382
     *
1383
     * @return string
1384
     *
1385
     * @psalm-suppress InvalidReturnStatement
1386
     */
1387 29
    public static function encode(
1388
        string $to_encoding,
1389
        string $str,
1390
        bool $auto_detect_the_from_encoding = true,
1391
        string $from_encoding = ''
1392
    ): string {
1393 29
        if ($str === '' || $to_encoding === '') {
1394 13
            return $str;
1395
        }
1396
1397 29
        if ($to_encoding !== 'UTF-8' && $to_encoding !== 'CP850') {
1398 7
            $to_encoding = self::normalize_encoding($to_encoding, 'UTF-8');
1399
        }
1400
1401 29
        if ($from_encoding && $from_encoding !== 'UTF-8' && $from_encoding !== 'CP850') {
1402 2
            $from_encoding = self::normalize_encoding($from_encoding);
1403
        }
1404
1405
        if (
1406 29
            $to_encoding
1407
            &&
1408 29
            $from_encoding
1409
            &&
1410 29
            $from_encoding === $to_encoding
1411
        ) {
1412
            return $str;
1413
        }
1414
1415 29
        if ($to_encoding === 'JSON') {
1416 1
            $return = self::json_encode($str);
1417 1
            if ($return === false) {
1418
                throw new \InvalidArgumentException('The input string [' . $str . '] can not be used for json_encode().');
1419
            }
1420
1421 1
            return $return;
1422
        }
1423 29
        if ($from_encoding === 'JSON') {
1424 1
            $str = self::json_decode($str);
1425 1
            $from_encoding = '';
1426
        }
1427
1428 29
        if ($to_encoding === 'BASE64') {
1429 2
            return \base64_encode($str);
1430
        }
1431 29
        if ($from_encoding === 'BASE64') {
1432 2
            $str = \base64_decode($str, true);
1433 2
            $from_encoding = '';
1434
        }
1435
1436 29
        if ($to_encoding === 'HTML-ENTITIES') {
1437 2
            return self::html_encode($str, true);
1438
        }
1439 29
        if ($from_encoding === 'HTML-ENTITIES') {
1440 2
            $str = self::html_entity_decode($str, \ENT_COMPAT);
1441 2
            $from_encoding = '';
1442
        }
1443
1444 29
        $from_encoding_auto_detected = false;
1445
        if (
1446 29
            $auto_detect_the_from_encoding
1447
            ||
1448 29
            !$from_encoding
1449
        ) {
1450 29
            $from_encoding_auto_detected = self::str_detect_encoding($str);
1451
        }
1452
1453
        // DEBUG
1454
        //var_dump($to_encoding, $from_encoding, $from_encoding_auto_detected, $str, "\n\n");
1455
1456 29
        if ($from_encoding_auto_detected !== false) {
1457
            /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
1458 25
            $from_encoding = $from_encoding_auto_detected;
1459 7
        } elseif ($auto_detect_the_from_encoding) {
1460
            // fallback for the "autodetect"-mode
1461 7
            return self::to_utf8($str);
1462
        }
1463
1464
        if (
1465 25
            !$from_encoding
1466
            ||
1467 25
            $from_encoding === $to_encoding
1468
        ) {
1469 15
            return $str;
1470
        }
1471
1472
        if (
1473 20
            $to_encoding === 'UTF-8'
1474
            &&
1475
            (
1476 18
                $from_encoding === 'WINDOWS-1252'
1477
                ||
1478 20
                $from_encoding === 'ISO-8859-1'
1479
            )
1480
        ) {
1481 14
            return self::to_utf8($str);
1482
        }
1483
1484
        if (
1485 12
            $to_encoding === 'ISO-8859-1'
1486
            &&
1487
            (
1488 6
                $from_encoding === 'WINDOWS-1252'
1489
                ||
1490 12
                $from_encoding === 'UTF-8'
1491
            )
1492
        ) {
1493 6
            return self::to_iso8859($str);
1494
        }
1495
1496
        /** @noinspection InArrayCanBeUsedInspection */
1497
        if (
1498 10
            $to_encoding !== 'UTF-8'
1499
            &&
1500 10
            $to_encoding !== 'ISO-8859-1'
1501
            &&
1502 10
            $to_encoding !== 'WINDOWS-1252'
1503
            &&
1504 10
            self::$SUPPORT['mbstring'] === false
1505
        ) {
1506
            /**
1507
             * @psalm-suppress ImpureFunctionCall - is is only a warning
1508
             */
1509
            \trigger_error('UTF8::encode() without mbstring cannot handle "' . $to_encoding . '" encoding', \E_USER_WARNING);
1510
        }
1511
1512 10
        if (self::$SUPPORT['mbstring'] === true) {
1513
            // warning: do not use the symfony polyfill here
1514 10
            $str_encoded = \mb_convert_encoding(
1515 10
                $str,
1516 10
                $to_encoding,
1517 10
                $from_encoding
1518
            );
1519
1520 10
            if ($str_encoded) {
1521 10
                \assert(\is_string($str_encoded));
1522
1523 10
                return $str_encoded;
1524
            }
1525
        }
1526
1527
        /** @noinspection PhpUsageOfSilenceOperatorInspection - Detected an incomplete multibyte character in input string */
1528
        $return = @\iconv($from_encoding, $to_encoding, $str);
1529
        if ($return !== false) {
1530
            return $return;
1531
        }
1532
1533
        return $str;
1534
    }
1535
1536
    /**
1537
     * @param string $str
1538
     * @param string $from_charset      [optional] <p>Set the input charset.</p>
1539
     * @param string $to_charset        [optional] <p>Set the output charset.</p>
1540
     * @param string $transfer_encoding [optional] <p>Set the transfer encoding.</p>
1541
     * @param string $linefeed          [optional] <p>Set the used linefeed.</p>
1542
     * @param int    $indent            [optional] <p>Set the max length indent.</p>
1543
     *
1544
     * @psalm-pure
1545
     *
1546
     * @return false|string
1547
     *                      <p>An encoded MIME field on success,
1548
     *                      or false if an error occurs during the encoding.</p>
1549
     */
1550 1
    public static function encode_mimeheader(
1551
        string $str,
1552
        string $from_charset = 'UTF-8',
1553
        string $to_charset = 'UTF-8',
1554
        string $transfer_encoding = 'Q',
1555
        string $linefeed = "\r\n",
1556
        int $indent = 76
1557
    ) {
1558 1
        if ($from_charset !== 'UTF-8' && $from_charset !== 'CP850') {
1559
            $from_charset = self::normalize_encoding($from_charset, 'UTF-8');
1560
        }
1561
1562 1
        if ($to_charset !== 'UTF-8' && $to_charset !== 'CP850') {
1563 1
            $to_charset = self::normalize_encoding($to_charset, 'UTF-8');
1564
        }
1565
1566
        // always fallback via symfony polyfill
1567 1
        return \iconv_mime_encode(
1568 1
            '',
1569 1
            $str,
1570
            [
1571 1
                'scheme'           => $transfer_encoding,
1572 1
                'line-length'      => $indent,
1573 1
                'input-charset'    => $from_charset,
1574 1
                'output-charset'   => $to_charset,
1575 1
                'line-break-chars' => $linefeed,
1576
            ]
1577
        );
1578
    }
1579
1580
    /**
1581
     * Create an extract from a sentence, so if the search-string was found, it try to centered in the output.
1582
     *
1583
     * @param string   $str                       <p>The input string.</p>
1584
     * @param string   $search                    <p>The searched string.</p>
1585
     * @param int|null $length                    [optional] <p>Default: null === text->length / 2</p>
1586
     * @param string   $replacer_for_skipped_text [optional] <p>Default: …</p>
1587
     * @param string   $encoding                  [optional] <p>Set the charset for e.g. "mb_" function</p>
1588
     *
1589
     * @psalm-pure
1590
     *
1591
     * @return string
1592
     */
1593 1
    public static function extract_text(
1594
        string $str,
1595
        string $search = '',
1596
        int $length = null,
1597
        string $replacer_for_skipped_text = '…',
1598
        string $encoding = 'UTF-8'
1599
    ): string {
1600 1
        if ($str === '') {
1601 1
            return '';
1602
        }
1603
1604 1
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
1605
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
1606
        }
1607
1608 1
        $trim_chars = "\t\r\n -_()!~?=+/*\\,.:;\"'[]{}`&";
1609
1610 1
        if ($length === null) {
1611 1
            $length = (int) \round((int) self::strlen($str, $encoding) / 2);
1612
        }
1613
1614 1
        if ($search === '') {
1615 1
            if ($encoding === 'UTF-8') {
1616 1
                if ($length > 0) {
1617 1
                    $string_length = (int) \mb_strlen($str);
1618 1
                    $end = ($length - 1) > $string_length ? $string_length : ($length - 1);
1619
                } else {
1620 1
                    $end = 0;
1621
                }
1622
1623 1
                $pos = (int) \min(
1624 1
                    \mb_strpos($str, ' ', $end),
1625 1
                    \mb_strpos($str, '.', $end)
1626
                );
1627
            } else {
1628
                if ($length > 0) {
1629
                    $string_length = (int) self::strlen($str, $encoding);
1630
                    $end = ($length - 1) > $string_length ? $string_length : ($length - 1);
1631
                } else {
1632
                    $end = 0;
1633
                }
1634
1635
                $pos = (int) \min(
1636
                    self::strpos($str, ' ', $end, $encoding),
1637
                    self::strpos($str, '.', $end, $encoding)
1638
                );
1639
            }
1640
1641 1
            if ($pos) {
1642 1
                if ($encoding === 'UTF-8') {
1643 1
                    $str_sub = \mb_substr($str, 0, $pos);
1644
                } else {
1645
                    $str_sub = self::substr($str, 0, $pos, $encoding);
1646
                }
1647
1648 1
                if ($str_sub === false) {
1649
                    return '';
1650
                }
1651
1652 1
                return \rtrim($str_sub, $trim_chars) . $replacer_for_skipped_text;
1653
            }
1654
1655
            return $str;
1656
        }
1657
1658 1
        if ($encoding === 'UTF-8') {
1659 1
            $word_position = (int) \mb_stripos($str, $search);
1660 1
            $half_side = (int) ($word_position - $length / 2 + (int) \mb_strlen($search) / 2);
1661
        } else {
1662
            $word_position = (int) self::stripos($str, $search, 0, $encoding);
1663
            $half_side = (int) ($word_position - $length / 2 + (int) self::strlen($search, $encoding) / 2);
1664
        }
1665
1666 1
        $pos_start = 0;
1667 1
        if ($half_side > 0) {
1668 1
            if ($encoding === 'UTF-8') {
1669 1
                $half_text = \mb_substr($str, 0, $half_side);
1670
            } else {
1671
                $half_text = self::substr($str, 0, $half_side, $encoding);
1672
            }
1673 1
            if ($half_text !== false) {
1674 1
                if ($encoding === 'UTF-8') {
1675 1
                    $pos_start = (int) \max(
1676 1
                        \mb_strrpos($half_text, ' '),
1677 1
                        \mb_strrpos($half_text, '.')
1678
                    );
1679
                } else {
1680
                    $pos_start = (int) \max(
1681
                        self::strrpos($half_text, ' ', 0, $encoding),
1682
                        self::strrpos($half_text, '.', 0, $encoding)
1683
                    );
1684
                }
1685
            }
1686
        }
1687
1688 1
        if ($word_position && $half_side > 0) {
1689 1
            $offset = $pos_start + $length - 1;
1690 1
            $real_length = (int) self::strlen($str, $encoding);
1691
1692 1
            if ($offset > $real_length) {
1693
                $offset = $real_length;
1694
            }
1695
1696 1
            if ($encoding === 'UTF-8') {
1697 1
                $pos_end = (int) \min(
1698 1
                    \mb_strpos($str, ' ', $offset),
1699 1
                    \mb_strpos($str, '.', $offset)
1700 1
                ) - $pos_start;
1701
            } else {
1702
                $pos_end = (int) \min(
1703
                    self::strpos($str, ' ', $offset, $encoding),
1704
                    self::strpos($str, '.', $offset, $encoding)
1705
                ) - $pos_start;
1706
            }
1707
1708 1
            if (!$pos_end || $pos_end <= 0) {
1709 1
                if ($encoding === 'UTF-8') {
1710 1
                    $str_sub = \mb_substr($str, $pos_start, (int) \mb_strlen($str));
1711
                } else {
1712
                    $str_sub = self::substr($str, $pos_start, (int) self::strlen($str, $encoding), $encoding);
1713
                }
1714 1
                if ($str_sub !== false) {
1715 1
                    $extract = $replacer_for_skipped_text . \ltrim($str_sub, $trim_chars);
1716
                } else {
1717 1
                    $extract = '';
1718
                }
1719
            } else {
1720 1
                if ($encoding === 'UTF-8') {
1721 1
                    $str_sub = \mb_substr($str, $pos_start, $pos_end);
1722
                } else {
1723
                    $str_sub = self::substr($str, $pos_start, $pos_end, $encoding);
1724
                }
1725 1
                if ($str_sub !== false) {
1726 1
                    $extract = $replacer_for_skipped_text . \trim($str_sub, $trim_chars) . $replacer_for_skipped_text;
1727
                } else {
1728 1
                    $extract = '';
1729
                }
1730
            }
1731
        } else {
1732 1
            $offset = $length - 1;
1733 1
            $true_length = (int) self::strlen($str, $encoding);
1734
1735 1
            if ($offset > $true_length) {
1736
                $offset = $true_length;
1737
            }
1738
1739 1
            if ($encoding === 'UTF-8') {
1740 1
                $pos_end = (int) \min(
1741 1
                    \mb_strpos($str, ' ', $offset),
1742 1
                    \mb_strpos($str, '.', $offset)
1743
                );
1744
            } else {
1745
                $pos_end = (int) \min(
1746
                    self::strpos($str, ' ', $offset, $encoding),
1747
                    self::strpos($str, '.', $offset, $encoding)
1748
                );
1749
            }
1750
1751 1
            if ($pos_end) {
1752 1
                if ($encoding === 'UTF-8') {
1753 1
                    $str_sub = \mb_substr($str, 0, $pos_end);
1754
                } else {
1755
                    $str_sub = self::substr($str, 0, $pos_end, $encoding);
1756
                }
1757 1
                if ($str_sub !== false) {
1758 1
                    $extract = \rtrim($str_sub, $trim_chars) . $replacer_for_skipped_text;
1759
                } else {
1760 1
                    $extract = '';
1761
                }
1762
            } else {
1763 1
                $extract = $str;
1764
            }
1765
        }
1766
1767 1
        return $extract;
1768
    }
1769
1770
    /**
1771
     * Reads entire file into a string.
1772
     *
1773
     * EXAMPLE: <code>UTF8::file_get_contents('utf16le.txt'); // ...</code>
1774
     *
1775
     * WARNING: Do not use UTF-8 Option ($convert_to_utf8) for binary files (e.g.: images) !!!
1776
     *
1777
     * @see http://php.net/manual/en/function.file-get-contents.php
1778
     *
1779
     * @param string        $filename         <p>
1780
     *                                        Name of the file to read.
1781
     *                                        </p>
1782
     * @param bool          $use_include_path [optional] <p>
1783
     *                                        Prior to PHP 5, this parameter is called
1784
     *                                        use_include_path and is a bool.
1785
     *                                        As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
1786
     *                                        to trigger include path
1787
     *                                        search.
1788
     *                                        </p>
1789
     * @param resource|null $context          [optional] <p>
1790
     *                                        A valid context resource created with
1791
     *                                        stream_context_create. If you don't need to use a
1792
     *                                        custom context, you can skip this parameter by &null;.
1793
     *                                        </p>
1794
     * @param int|null      $offset           [optional] <p>
1795
     *                                        The offset where the reading starts.
1796
     *                                        </p>
1797
     * @param int|null      $max_length       [optional] <p>
1798
     *                                        Maximum length of data read. The default is to read until end
1799
     *                                        of file is reached.
1800
     *                                        </p>
1801
     * @param int           $timeout          <p>The time in seconds for the timeout.</p>
1802
     * @param bool          $convert_to_utf8  <strong>WARNING!!!</strong> <p>Maybe you can't use this option for
1803
     *                                        some files, because they used non default utf-8 chars. Binary files
1804
     *                                        like images or pdf will not be converted.</p>
1805
     * @param string        $from_encoding    [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
1806
     *                                        A empty string will trigger the autodetect anyway.</p>
1807
     *
1808
     * @psalm-pure
1809
     *
1810
     * @return false|string
1811
     *                      <p>The function returns the read data as string or <b>false</b> on failure.</p>
1812
     *
1813
     * @noinspection PhpTooManyParametersInspection
1814
     */
1815 12
    public static function file_get_contents(
1816
        string $filename,
1817
        bool $use_include_path = false,
1818
        $context = null,
1819
        int $offset = null,
1820
        int $max_length = null,
1821
        int $timeout = 10,
1822
        bool $convert_to_utf8 = true,
1823
        string $from_encoding = ''
1824
    ) {
1825
        // init
1826 12
        $filename = \filter_var($filename, \FILTER_SANITIZE_STRING);
1827
        /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
1828 12
        if ($filename === false) {
1829
            return false;
1830
        }
1831
1832 12
        if ($timeout && $context === null) {
1833 9
            $context = \stream_context_create(
1834
                [
1835
                    'http' => [
1836 9
                        'timeout' => $timeout,
1837
                    ],
1838
                ]
1839
            );
1840
        }
1841
1842 12
        if ($offset === null) {
1843 12
            $offset = 0;
1844
        }
1845
1846 12
        if (\is_int($max_length)) {
1847 2
            $data = \file_get_contents($filename, $use_include_path, $context, $offset, $max_length);
1848
        } else {
1849 12
            $data = \file_get_contents($filename, $use_include_path, $context, $offset);
1850
        }
1851
1852
        // return false on error
1853 12
        if ($data === false) {
1854
            return false;
1855
        }
1856
1857 12
        if ($convert_to_utf8) {
1858
            if (
1859 12
                !self::is_binary($data, true)
1860
                ||
1861 9
                self::is_utf16($data, false) !== false
1862
                ||
1863 12
                self::is_utf32($data, false) !== false
1864
            ) {
1865 9
                $data = self::encode('UTF-8', $data, false, $from_encoding);
1866 9
                $data = self::cleanup($data);
1867
            }
1868
        }
1869
1870 12
        return $data;
1871
    }
1872
1873
    /**
1874
     * Checks if a file starts with BOM (Byte Order Mark) character.
1875
     *
1876
     * EXAMPLE: <code>UTF8::file_has_bom('utf8_with_bom.txt'); // true</code>
1877
     *
1878
     * @param string $file_path <p>Path to a valid file.</p>
1879
     *
1880
     * @throws \RuntimeException if file_get_contents() returned false
1881
     *
1882
     * @return bool
1883
     *              <p><strong>true</strong> if the file has BOM at the start, <strong>false</strong> otherwise</p>
1884
     *
1885
     * @psalm-pure
1886
     */
1887 2
    public static function file_has_bom(string $file_path): bool
1888
    {
1889 2
        $file_content = \file_get_contents($file_path);
1890 2
        if ($file_content === false) {
1891
            throw new \RuntimeException('file_get_contents() returned false for:' . $file_path);
1892
        }
1893
1894 2
        return self::string_has_bom($file_content);
1895
    }
1896
1897
    /**
1898
     * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1899
     *
1900
     * EXAMPLE: <code>UTF8::filter(array("\xE9", 'à', 'a')); // array('é', 'à', 'a')</code>
1901
     *
1902
     * @param array|object|string $var
1903
     * @param int                 $normalization_form
1904
     * @param string              $leading_combining
1905
     *
1906
     * @psalm-pure
1907
     *
1908
     * @return mixed
1909
     *
1910
     * @template TFilter
1911
     * @phpstan-param TFilter $var
1912
     * @phpstan-return TFilter
1913
     */
1914 65
    public static function filter(
1915
        $var,
1916
        int $normalization_form = \Normalizer::NFC,
1917
        string $leading_combining = '◌'
1918
    ) {
1919 65
        switch (\gettype($var)) {
1920 65
            case 'object':
1921 65
            case 'array':
1922 6
                foreach ($var as $k => &$v) {
1923 6
                    $v = self::filter($v, $normalization_form, $leading_combining);
1924
                }
1925 6
                unset($v);
1926
1927 6
                break;
1928 65
            case 'string':
1929
1930 63
                if (\strpos($var, "\r") !== false) {
1931 3
                    $var = self::normalize_line_ending($var);
1932
                }
1933
1934 63
                if (!ASCII::is_ascii($var)) {
1935 33
                    if (\Normalizer::isNormalized($var, $normalization_form)) {
1936 27
                        $n = '-';
1937
                    } else {
1938 13
                        $n = \Normalizer::normalize($var, $normalization_form);
1939
1940 13
                        if (isset($n[0])) {
1941 7
                            $var = $n;
1942
                        } else {
1943 9
                            $var = self::encode('UTF-8', $var);
1944
                        }
1945
                    }
1946
1947 33
                    \assert(\is_string($var));
1948
                    if (
1949 33
                        $var[0] >= "\x80"
1950
                        &&
1951 33
                        isset($n[0], $leading_combining[0])
1952
                        &&
1953 33
                        \preg_match('/^\\p{Mn}/u', $var)
1954
                    ) {
1955
                        // Prevent leading combining chars
1956
                        // for NFC-safe concatenations.
1957 3
                        $var = $leading_combining . $var;
1958
                    }
1959
                }
1960
1961 63
                break;
1962
            default:
1963
                // nothing
1964
        }
1965
1966
        /** @noinspection PhpSillyAssignmentInspection */
1967
        /** @phpstan-var TFilter $var */
1968 65
        $var = $var;
1969
1970 65
        return $var;
1971
    }
1972
1973
    /**
1974
     * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1975
     *
1976
     * Gets a specific external variable by name and optionally filters it.
1977
     *
1978
     * EXAMPLE: <code>
1979
     * // _GET['foo'] = 'bar';
1980
     * UTF8::filter_input(INPUT_GET, 'foo', FILTER_SANITIZE_STRING)); // 'bar'
1981
     * </code>
1982
     *
1983
     * @see http://php.net/manual/en/function.filter-input.php
1984
     *
1985
     * @param int            $type          <p>
1986
     *                                      One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1987
     *                                      <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1988
     *                                      <b>INPUT_ENV</b>.
1989
     *                                      </p>
1990
     * @param string         $variable_name <p>
1991
     *                                      Name of a variable to get.
1992
     *                                      </p>
1993
     * @param int            $filter        [optional] <p>
1994
     *                                      The ID of the filter to apply. The
1995
     *                                      manual page lists the available filters.
1996
     *                                      </p>
1997
     * @param int|int[]|null $options       [optional] <p>
1998
     *                                      Associative array of options or bitwise disjunction of flags. If filter
1999
     *                                      accepts options, flags can be provided in "flags" field of array.
2000
     *                                      </p>
2001
     *
2002
     * @psalm-pure
2003
     *
2004
     * @return mixed
2005
     *               <p>
2006
     *               Value of the requested variable on success, <b>FALSE</b> if the filter fails, or <b>NULL</b> if the
2007
     *               <i>variable_name</i> variable is not set. If the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it
2008
     *               returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter fails.
2009
     *               </p>
2010
     */
2011 1
    public static function filter_input(
2012
        int $type,
2013
        string $variable_name,
2014
        int $filter = \FILTER_DEFAULT,
2015
        $options = null
2016
    ) {
2017
        /**
2018
         * @psalm-suppress ImpureFunctionCall - we use func_num_args only for args count matching here
2019
         */
2020 1
        if ($options === null || \func_num_args() < 4) {
2021 1
            $var = \filter_input($type, $variable_name, $filter);
2022
        } else {
2023
            $var = \filter_input($type, $variable_name, $filter, $options);
2024
        }
2025
2026 1
        return self::filter($var);
2027
    }
2028
2029
    /**
2030
     * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2031
     *
2032
     * Gets external variables and optionally filters them.
2033
     *
2034
     * EXAMPLE: <code>
2035
     * // _GET['foo'] = 'bar';
2036
     * UTF8::filter_input_array(INPUT_GET, array('foo' => 'FILTER_SANITIZE_STRING')); // array('bar')
2037
     * </code>
2038
     *
2039
     * @see http://php.net/manual/en/function.filter-input-array.php
2040
     *
2041
     * @param int        $type       <p>
2042
     *                               One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
2043
     *                               <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
2044
     *                               <b>INPUT_ENV</b>.
2045
     *                               </p>
2046
     * @param array|null $definition [optional] <p>
2047
     *                               An array defining the arguments. A valid key is a string
2048
     *                               containing a variable name and a valid value is either a filter type, or an array
2049
     *                               optionally specifying the filter, flags and options. If the value is an
2050
     *                               array, valid keys are filter which specifies the
2051
     *                               filter type,
2052
     *                               flags which specifies any flags that apply to the
2053
     *                               filter, and options which specifies any options that
2054
     *                               apply to the filter. See the example below for a better understanding.
2055
     *                               </p>
2056
     *                               <p>
2057
     *                               This parameter can be also an integer holding a filter constant. Then all values in the
2058
     *                               input array are filtered by this filter.
2059
     *                               </p>
2060
     * @param bool       $add_empty  [optional] <p>
2061
     *                               Add missing keys as <b>NULL</b> to the return value.
2062
     *                               </p>
2063
     *
2064
     * @psalm-pure
2065
     *
2066
     * @return mixed
2067
     *               <p>
2068
     *               An array containing the values of the requested variables on success, or <b>FALSE</b> on failure.
2069
     *               An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not
2070
     *               set. Or if the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it returns <b>FALSE</b> if the variable
2071
     *               is not set and <b>NULL</b> if the filter fails.
2072
     *               </p>
2073
     */
2074 1
    public static function filter_input_array(
2075
        int $type,
2076
        $definition = null,
2077
        bool $add_empty = true
2078
    ) {
2079
        /**
2080
         * @psalm-suppress ImpureFunctionCall - we use func_num_args only for args count matching here
2081
         */
2082 1
        if ($definition === null || \func_num_args() < 2) {
2083
            $a = \filter_input_array($type);
2084
        } else {
2085 1
            $a = \filter_input_array($type, $definition, $add_empty);
2086
        }
2087
2088 1
        return self::filter($a);
2089
    }
2090
2091
    /**
2092
     * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2093
     *
2094
     * Filters a variable with a specified filter.
2095
     *
2096
     * EXAMPLE: <code>UTF8::filter_var('-ABC-中文空白-', FILTER_VALIDATE_URL); // false</code>
2097
     *
2098
     * @see http://php.net/manual/en/function.filter-var.php
2099
     *
2100
     * @param float|int|string|null $variable <p>
2101
     *                                        Value to filter.
2102
     *                                        </p>
2103
     * @param int                   $filter   [optional] <p>
2104
     *                                        The ID of the filter to apply. The
2105
     *                                        manual page lists the available filters.
2106
     *                                        </p>
2107
     * @param int|int[]|null        $options  [optional] <p>
2108
     *                                        Associative array of options or bitwise disjunction of flags. If filter
2109
     *                                        accepts options, flags can be provided in "flags" field of array. For
2110
     *                                        the "callback" filter, callable type should be passed. The
2111
     *                                        callback must accept one argument, the value to be filtered, and return
2112
     *                                        the value after filtering/sanitizing it.
2113
     *                                        </p>
2114
     *                                        <p>
2115
     *                                        <code>
2116
     *                                        // for filters that accept options, use this format
2117
     *                                        $options = array(
2118
     *                                        'options' => array(
2119
     *                                        'default' => 3, // value to return if the filter fails
2120
     *                                        // other options here
2121
     *                                        'min_range' => 0
2122
     *                                        ),
2123
     *                                        'flags' => FILTER_FLAG_ALLOW_OCTAL,
2124
     *                                        );
2125
     *                                        $var = filter_var('0755', FILTER_VALIDATE_INT, $options);
2126
     *                                        // for filter that only accept flags, you can pass them directly
2127
     *                                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);
2128
     *                                        // for filter that only accept flags, you can also pass as an array
2129
     *                                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN,
2130
     *                                        array('flags' => FILTER_NULL_ON_FAILURE));
2131
     *                                        // callback validate filter
2132
     *                                        function foo($value)
2133
     *                                        {
2134
     *                                        // Expected format: Surname, GivenNames
2135
     *                                        if (strpos($value, ", ") === false) return false;
2136
     *                                        list($surname, $givennames) = explode(", ", $value, 2);
2137
     *                                        $empty = (empty($surname) || empty($givennames));
2138
     *                                        $notstrings = (!is_string($surname) || !is_string($givennames));
2139
     *                                        if ($empty || $notstrings) {
2140
     *                                        return false;
2141
     *                                        } else {
2142
     *                                        return $value;
2143
     *                                        }
2144
     *                                        }
2145
     *                                        $var = filter_var('Doe, Jane Sue', FILTER_CALLBACK, array('options' => 'foo'));
2146
     *                                        </code>
2147
     *                                        </p>
2148
     *
2149
     * @psalm-pure
2150
     *
2151
     * @return mixed
2152
     *               <p>The filtered data, or <b>FALSE</b> if the filter fails.</p>
2153
     */
2154 2
    public static function filter_var(
2155
        $variable,
2156
        int $filter = \FILTER_DEFAULT,
2157
        $options = null
2158
    ) {
2159
        /**
2160
         * @psalm-suppress ImpureFunctionCall - we use func_num_args only for args count matching here
2161
         */
2162 2
        if (\func_num_args() < 3) {
2163 2
            $variable = \filter_var($variable, $filter);
2164
        } else {
2165 2
            $variable = \filter_var($variable, $filter, $options);
2166
        }
2167
2168 2
        return self::filter($variable);
2169
    }
2170
2171
    /**
2172
     * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2173
     *
2174
     * Gets multiple variables and optionally filters them.
2175
     *
2176
     * EXAMPLE: <code>
2177
     * $filters = [
2178
     *     'name'  => ['filter'  => FILTER_CALLBACK, 'options' => [UTF8::class, 'ucwords']],
2179
     *     'age'   => ['filter'  => FILTER_VALIDATE_INT, 'options' => ['min_range' => 1, 'max_range' => 120]],
2180
     *     'email' => FILTER_VALIDATE_EMAIL,
2181
     * ];
2182
     *
2183
     * $data = [
2184
     *     'name' => 'κόσμε',
2185
     *     'age' => '18',
2186
     *     'email' => '[email protected]'
2187
     * ];
2188
     *
2189
     * UTF8::filter_var_array($data, $filters, true); // ['name' => 'Κόσμε', 'age' => 18, 'email' => '[email protected]']
2190
     * </code>
2191
     *
2192
     * @see http://php.net/manual/en/function.filter-var-array.php
2193
     *
2194
     * @param array<mixed>   $data       <p>
2195
     *                                   An array with string keys containing the data to filter.
2196
     *                                   </p>
2197
     * @param array|int|null $definition [optional] <p>
2198
     *                                   An array defining the arguments. A valid key is a string
2199
     *                                   containing a variable name and a valid value is either a
2200
     *                                   filter type, or an
2201
     *                                   array optionally specifying the filter, flags and options.
2202
     *                                   If the value is an array, valid keys are filter
2203
     *                                   which specifies the filter type,
2204
     *                                   flags which specifies any flags that apply to the
2205
     *                                   filter, and options which specifies any options that
2206
     *                                   apply to the filter. See the example below for a better understanding.
2207
     *                                   </p>
2208
     *                                   <p>
2209
     *                                   This parameter can be also an integer holding a filter constant. Then all values
2210
     *                                   in the input array are filtered by this filter.
2211
     *                                   </p>
2212
     * @param bool           $add_empty  [optional] <p>
2213
     *                                   Add missing keys as <b>NULL</b> to the return value.
2214
     *                                   </p>
2215
     *
2216
     * @psalm-pure
2217
     *
2218
     * @return mixed
2219
     *               <p>
2220
     *               An array containing the values of the requested variables on success, or <b>FALSE</b> on failure.
2221
     *               An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not
2222
     *               set.
2223
     *               </p>
2224
     */
2225 2
    public static function filter_var_array(
2226
        array $data,
2227
        $definition = null,
2228
        bool $add_empty = true
2229
    ) {
2230
        /**
2231
         * @psalm-suppress ImpureFunctionCall - we use func_num_args only for args count matching here
2232
         */
2233 2
        if (\func_num_args() < 2) {
2234 2
            $a = \filter_var_array($data);
2235
        } else {
2236 2
            $a = \filter_var_array($data, $definition, $add_empty);
2237
        }
2238
2239 2
        return self::filter($a);
2240
    }
2241
2242
    /**
2243
     * Checks whether finfo is available on the server.
2244
     *
2245
     * @psalm-pure
2246
     *
2247
     * @return bool
2248
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
2249
     *
2250
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
2251
     */
2252
    public static function finfo_loaded(): bool
2253
    {
2254
        return \class_exists('finfo');
2255
    }
2256
2257
    /**
2258
     * Returns the first $n characters of the string.
2259
     *
2260
     * @param string $str      <p>The input string.</p>
2261
     * @param int    $n        <p>Number of characters to retrieve from the start.</p>
2262
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
2263
     *
2264
     * @psalm-pure
2265
     *
2266
     * @return string
2267
     */
2268 13
    public static function first_char(
2269
        string $str,
2270
        int $n = 1,
2271
        string $encoding = 'UTF-8'
2272
    ): string {
2273 13
        if ($str === '' || $n <= 0) {
2274 5
            return '';
2275
        }
2276
2277 8
        if ($encoding === 'UTF-8') {
2278 4
            return (string) \mb_substr($str, 0, $n);
2279
        }
2280
2281 4
        return (string) self::substr($str, 0, $n, $encoding);
2282
    }
2283
2284
    /**
2285
     * Check if the number of Unicode characters isn't greater than the specified integer.
2286
     *
2287
     * EXAMPLE: <code>UTF8::fits_inside('κόσμε', 6); // false</code>
2288
     *
2289
     * @param string $str      the original string to be checked
2290
     * @param int    $box_size the size in number of chars to be checked against string
2291
     *
2292
     * @psalm-pure
2293
     *
2294
     * @return bool
2295
     *              <p><strong>TRUE</strong> if string is less than or equal to $box_size, <strong>FALSE</strong> otherwise.</p>
2296
     */
2297 2
    public static function fits_inside(string $str, int $box_size): bool
2298
    {
2299 2
        return (int) self::strlen($str) <= $box_size;
2300
    }
2301
2302
    /**
2303
     * Try to fix simple broken UTF-8 strings.
2304
     *
2305
     * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
2306
     *
2307
     * EXAMPLE: <code>UTF8::fix_simple_utf8('Düsseldorf'); // 'Düsseldorf'</code>
2308
     *
2309
     * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
2310
     * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
2311
     * See: http://en.wikipedia.org/wiki/Windows-1252
2312
     *
2313
     * @param string $str <p>The input string</p>
2314
     *
2315
     * @psalm-pure
2316
     *
2317
     * @return string
2318
     */
2319 47
    public static function fix_simple_utf8(string $str): string
2320
    {
2321 47
        if ($str === '') {
2322 4
            return '';
2323
        }
2324
2325
        /**
2326
         * @psalm-suppress ImpureStaticVariable
2327
         *
2328
         * @var array<mixed>|null
2329
         */
2330 47
        static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null;
2331
2332
        /**
2333
         * @psalm-suppress ImpureStaticVariable
2334
         *
2335
         * @var array<mixed>|null
2336
         */
2337 47
        static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null;
2338
2339 47
        if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) {
2340 1
            if (self::$BROKEN_UTF8_FIX === null) {
2341 1
                self::$BROKEN_UTF8_FIX = self::getData('utf8_fix');
2342
            }
2343
2344 1
            $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = \array_keys(self::$BROKEN_UTF8_FIX);
2345 1
            $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = self::$BROKEN_UTF8_FIX;
2346
        }
2347
2348 47
        \assert(\is_array($BROKEN_UTF8_TO_UTF8_VALUES_CACHE));
2349
2350 47
        return \str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str);
2351
    }
2352
2353
    /**
2354
     * Fix a double (or multiple) encoded UTF8 string.
2355
     *
2356
     * EXAMPLE: <code>UTF8::fix_utf8('Fédération'); // 'Fédération'</code>
2357
     *
2358
     * @param string|string[] $str you can use a string or an array of strings
2359
     *
2360
     * @psalm-pure
2361
     *
2362
     * @return string|string[]
2363
     *                         Will return the fixed input-"array" or
2364
     *                         the fixed input-"string"
2365
     *
2366
     * @psalm-suppress InvalidReturnType
2367
     */
2368 2
    public static function fix_utf8($str)
2369
    {
2370 2
        if (\is_array($str)) {
2371 2
            foreach ($str as $k => &$v) {
2372 2
                $v = self::fix_utf8($v);
2373
            }
2374 2
            unset($v);
2375
2376
            /**
2377
             * @psalm-suppress InvalidReturnStatement
2378
             */
2379 2
            return $str;
2380
        }
2381
2382 2
        $str = (string) $str;
2383 2
        $last = '';
2384 2
        while ($last !== $str) {
2385 2
            $last = $str;
2386
            /**
2387
             * @psalm-suppress PossiblyInvalidArgument
2388
             */
2389 2
            $str = self::to_utf8(
2390 2
                self::utf8_decode($str, true)
2391
            );
2392
        }
2393
2394
        /**
2395
         * @psalm-suppress InvalidReturnStatement
2396
         */
2397 2
        return $str;
2398
    }
2399
2400
    /**
2401
     * Get character of a specific character.
2402
     *
2403
     * EXAMPLE: <code>UTF8::getCharDirection('ا'); // 'RTL'</code>
2404
     *
2405
     * @param string $char
2406
     *
2407
     * @psalm-pure
2408
     *
2409
     * @return string
2410
     *                <p>'RTL' or 'LTR'.</p>
2411
     */
2412 2
    public static function getCharDirection(string $char): string
2413
    {
2414 2
        if (self::$SUPPORT['intlChar'] === true) {
2415
            /** @noinspection PhpComposerExtensionStubsInspection */
2416 2
            $tmp_return = \IntlChar::charDirection($char);
2417
2418
            // from "IntlChar"-Class
2419
            $char_direction = [
2420 2
                'RTL' => [1, 13, 14, 15, 21],
2421
                'LTR' => [0, 11, 12, 20],
2422
            ];
2423
2424 2
            if (\in_array($tmp_return, $char_direction['LTR'], true)) {
2425
                return 'LTR';
2426
            }
2427
2428 2
            if (\in_array($tmp_return, $char_direction['RTL'], true)) {
2429 2
                return 'RTL';
2430
            }
2431
        }
2432
2433 2
        $c = static::chr_to_decimal($char);
2434
2435 2
        if (!($c >= 0x5be && $c <= 0x10b7f)) {
2436 2
            return 'LTR';
2437
        }
2438
2439 2
        if ($c <= 0x85e) {
2440 2
            if ($c === 0x5be ||
2441 2
                $c === 0x5c0 ||
2442 2
                $c === 0x5c3 ||
2443 2
                $c === 0x5c6 ||
2444 2
                ($c >= 0x5d0 && $c <= 0x5ea) ||
2445 2
                ($c >= 0x5f0 && $c <= 0x5f4) ||
2446 2
                $c === 0x608 ||
2447 2
                $c === 0x60b ||
2448 2
                $c === 0x60d ||
2449 2
                $c === 0x61b ||
2450 2
                ($c >= 0x61e && $c <= 0x64a) ||
2451
                ($c >= 0x66d && $c <= 0x66f) ||
2452
                ($c >= 0x671 && $c <= 0x6d5) ||
2453
                ($c >= 0x6e5 && $c <= 0x6e6) ||
2454
                ($c >= 0x6ee && $c <= 0x6ef) ||
2455
                ($c >= 0x6fa && $c <= 0x70d) ||
2456
                $c === 0x710 ||
2457
                ($c >= 0x712 && $c <= 0x72f) ||
2458
                ($c >= 0x74d && $c <= 0x7a5) ||
2459
                $c === 0x7b1 ||
2460
                ($c >= 0x7c0 && $c <= 0x7ea) ||
2461
                ($c >= 0x7f4 && $c <= 0x7f5) ||
2462
                $c === 0x7fa ||
2463
                ($c >= 0x800 && $c <= 0x815) ||
2464
                $c === 0x81a ||
2465
                $c === 0x824 ||
2466
                $c === 0x828 ||
2467
                ($c >= 0x830 && $c <= 0x83e) ||
2468
                ($c >= 0x840 && $c <= 0x858) ||
2469 2
                $c === 0x85e
2470
            ) {
2471 2
                return 'RTL';
2472
            }
2473 2
        } elseif ($c === 0x200f) {
2474
            return 'RTL';
2475 2
        } elseif ($c >= 0xfb1d) {
2476 2
            if ($c === 0xfb1d ||
2477 2
                ($c >= 0xfb1f && $c <= 0xfb28) ||
2478 2
                ($c >= 0xfb2a && $c <= 0xfb36) ||
2479 2
                ($c >= 0xfb38 && $c <= 0xfb3c) ||
2480 2
                $c === 0xfb3e ||
2481 2
                ($c >= 0xfb40 && $c <= 0xfb41) ||
2482 2
                ($c >= 0xfb43 && $c <= 0xfb44) ||
2483 2
                ($c >= 0xfb46 && $c <= 0xfbc1) ||
2484 2
                ($c >= 0xfbd3 && $c <= 0xfd3d) ||
2485 2
                ($c >= 0xfd50 && $c <= 0xfd8f) ||
2486 2
                ($c >= 0xfd92 && $c <= 0xfdc7) ||
2487 2
                ($c >= 0xfdf0 && $c <= 0xfdfc) ||
2488 2
                ($c >= 0xfe70 && $c <= 0xfe74) ||
2489 2
                ($c >= 0xfe76 && $c <= 0xfefc) ||
2490 2
                ($c >= 0x10800 && $c <= 0x10805) ||
2491 2
                $c === 0x10808 ||
2492 2
                ($c >= 0x1080a && $c <= 0x10835) ||
2493 2
                ($c >= 0x10837 && $c <= 0x10838) ||
2494 2
                $c === 0x1083c ||
2495 2
                ($c >= 0x1083f && $c <= 0x10855) ||
2496 2
                ($c >= 0x10857 && $c <= 0x1085f) ||
2497 2
                ($c >= 0x10900 && $c <= 0x1091b) ||
2498 2
                ($c >= 0x10920 && $c <= 0x10939) ||
2499 2
                $c === 0x1093f ||
2500 2
                $c === 0x10a00 ||
2501 2
                ($c >= 0x10a10 && $c <= 0x10a13) ||
2502 2
                ($c >= 0x10a15 && $c <= 0x10a17) ||
2503 2
                ($c >= 0x10a19 && $c <= 0x10a33) ||
2504 2
                ($c >= 0x10a40 && $c <= 0x10a47) ||
2505 2
                ($c >= 0x10a50 && $c <= 0x10a58) ||
2506 2
                ($c >= 0x10a60 && $c <= 0x10a7f) ||
2507 2
                ($c >= 0x10b00 && $c <= 0x10b35) ||
2508 2
                ($c >= 0x10b40 && $c <= 0x10b55) ||
2509 2
                ($c >= 0x10b58 && $c <= 0x10b72) ||
2510 2
                ($c >= 0x10b78 && $c <= 0x10b7f)
2511
            ) {
2512 2
                return 'RTL';
2513
            }
2514
        }
2515
2516 2
        return 'LTR';
2517
    }
2518
2519
    /**
2520
     * Check for php-support.
2521
     *
2522
     * @param string|null $key
2523
     *
2524
     * @psalm-pure
2525
     *
2526
     * @return mixed
2527
     *               Return the full support-"array", if $key === null<br>
2528
     *               return bool-value, if $key is used and available<br>
2529
     *               otherwise return <strong>null</strong>
2530
     */
2531 27
    public static function getSupportInfo(string $key = null)
2532
    {
2533 27
        if ($key === null) {
2534 4
            return self::$SUPPORT;
2535
        }
2536
2537 25
        if (self::$INTL_TRANSLITERATOR_LIST === null) {
2538 1
            self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
2539
        }
2540
        // compatibility fix for old versions
2541 25
        self::$SUPPORT['intl__transliterator_list_ids'] = self::$INTL_TRANSLITERATOR_LIST;
2542
2543 25
        return self::$SUPPORT[$key] ?? null;
2544
    }
2545
2546
    /**
2547
     * Warning: this method only works for some file-types (png, jpg)
2548
     *          if you need more supported types, please use e.g. "finfo"
2549
     *
2550
     * @param string $str
2551
     * @param array  $fallback <p>with this keys: 'ext', 'mime', 'type'
2552
     *
2553
     * @psalm-pure
2554
     *
2555
     * @return null[]|string[]
2556
     *                         <p>with this keys: 'ext', 'mime', 'type'</p>
2557
     *
2558
     * @phpstan-param array{ext: null|string, mime: null|string, type: null|string} $fallback
2559
     */
2560 40
    public static function get_file_type(
2561
        string $str,
2562
        array $fallback = [
2563
            'ext'  => null,
2564
            'mime' => 'application/octet-stream',
2565
            'type' => null,
2566
        ]
2567
    ): array {
2568 40
        if ($str === '') {
2569
            return $fallback;
2570
        }
2571
2572
        /** @var false|string $str_info - needed for PhpStan (stubs error) */
2573 40
        $str_info = \substr($str, 0, 2);
2574 40
        if ($str_info === false || \strlen($str_info) !== 2) {
2575 11
            return $fallback;
2576
        }
2577
2578
        // DEBUG
2579
        //var_dump($str_info);
2580
2581 36
        $str_info = \unpack('C2chars', $str_info);
2582
2583
        /** @noinspection PhpSillyAssignmentInspection */
2584
        /** @var array|false $str_info - needed for PhpStan (stubs error) */
2585 36
        $str_info = $str_info;
2586
2587 36
        if ($str_info === false) {
2588
            return $fallback;
2589
        }
2590
        /** @noinspection OffsetOperationsInspection */
2591 36
        $type_code = (int) ($str_info['chars1'] . $str_info['chars2']);
2592
2593
        // DEBUG
2594
        //var_dump($type_code);
2595
2596
        //
2597
        // info: https://en.wikipedia.org/wiki/Magic_number_%28programming%29#Format_indicator
2598
        //
2599
        switch ($type_code) {
2600
            // WARNING: do not add too simple comparisons, because of false-positive results:
2601
            //
2602
            // 3780 => 'pdf', 7790 => 'exe', 7784 => 'midi', 8075 => 'zip',
2603
            // 8297 => 'rar', 7173 => 'gif', 7373 => 'tiff' 6677 => 'bmp', ...
2604
            //
2605 36
            case 255216:
2606
                $ext = 'jpg';
2607
                $mime = 'image/jpeg';
2608
                $type = 'binary';
2609
2610
                break;
2611 36
            case 13780:
2612 7
                $ext = 'png';
2613 7
                $mime = 'image/png';
2614 7
                $type = 'binary';
2615
2616 7
                break;
2617
            default:
2618 35
                return $fallback;
2619
        }
2620
2621
        return [
2622 7
            'ext'  => $ext,
2623 7
            'mime' => $mime,
2624 7
            'type' => $type,
2625
        ];
2626
    }
2627
2628
    /**
2629
     * @param int    $length         <p>Length of the random string.</p>
2630
     * @param string $possible_chars [optional] <p>Characters string for the random selection.</p>
2631
     * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
2632
     *
2633
     * @return string
2634
     */
2635 1
    public static function get_random_string(
2636
        int $length,
2637
        string $possible_chars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789',
2638
        string $encoding = 'UTF-8'
2639
    ): string {
2640
        // init
2641 1
        $i = 0;
2642 1
        $str = '';
2643
2644
        //
2645
        // add random chars
2646
        //
2647
2648 1
        if ($encoding === 'UTF-8') {
2649 1
            $max_length = (int) \mb_strlen($possible_chars);
2650 1
            if ($max_length === 0) {
2651 1
                return '';
2652
            }
2653
2654 1
            while ($i < $length) {
2655
                try {
2656 1
                    $rand_int = \random_int(0, $max_length - 1);
2657
                } catch (\Exception $e) {
2658
                    /** @noinspection RandomApiMigrationInspection */
2659
                    $rand_int = \mt_rand(0, $max_length - 1);
2660
                }
2661 1
                $char = \mb_substr($possible_chars, $rand_int, 1);
2662 1
                if ($char !== false) {
2663 1
                    $str .= $char;
2664 1
                    ++$i;
2665
                }
2666
            }
2667
        } else {
2668
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2669
2670
            $max_length = (int) self::strlen($possible_chars, $encoding);
2671
            if ($max_length === 0) {
2672
                return '';
2673
            }
2674
2675
            while ($i < $length) {
2676
                try {
2677
                    $rand_int = \random_int(0, $max_length - 1);
2678
                } catch (\Exception $e) {
2679
                    /** @noinspection RandomApiMigrationInspection */
2680
                    $rand_int = \mt_rand(0, $max_length - 1);
2681
                }
2682
                $char = self::substr($possible_chars, $rand_int, 1, $encoding);
2683
                if ($char !== false) {
2684
                    $str .= $char;
2685
                    ++$i;
2686
                }
2687
            }
2688
        }
2689
2690 1
        return $str;
2691
    }
2692
2693
    /**
2694
     * @param int|string $extra_entropy [optional] <p>Extra entropy via a string or int value.</p>
2695
     * @param bool       $use_md5       [optional] <p>Return the unique identifier as md5-hash? Default: true</p>
2696
     *
2697
     * @return string
2698
     */
2699 1
    public static function get_unique_string($extra_entropy = '', bool $use_md5 = true): string
2700
    {
2701
        try {
2702 1
            $rand_int = \random_int(0, \mt_getrandmax());
2703
        } catch (\Exception $e) {
2704
            /** @noinspection RandomApiMigrationInspection */
2705
            $rand_int = \mt_rand(0, \mt_getrandmax());
2706
        }
2707
2708
        $unique_helper = $rand_int .
2709 1
                         \session_id() .
2710 1
                         ($_SERVER['REMOTE_ADDR'] ?? '') .
2711 1
                         ($_SERVER['SERVER_ADDR'] ?? '') .
2712 1
                         $extra_entropy;
2713
2714 1
        $unique_string = \uniqid($unique_helper, true);
2715
2716 1
        if ($use_md5) {
2717 1
            $unique_string = \md5($unique_string . $unique_helper);
2718
        }
2719
2720 1
        return $unique_string;
2721
    }
2722
2723
    /**
2724
     * alias for "UTF8::string_has_bom()"
2725
     *
2726
     * @param string $str
2727
     *
2728
     * @psalm-pure
2729
     *
2730
     * @return bool
2731
     *
2732
     * @see        UTF8::string_has_bom()
2733
     * @deprecated <p>please use "UTF8::string_has_bom()"</p>
2734
     */
2735 2
    public static function hasBom(string $str): bool
2736
    {
2737 2
        return self::string_has_bom($str);
2738
    }
2739
2740
    /**
2741
     * Returns true if the string contains a lower case char, false otherwise.
2742
     *
2743
     * @param string $str <p>The input string.</p>
2744
     *
2745
     * @psalm-pure
2746
     *
2747
     * @return bool
2748
     *              <p>Whether or not the string contains a lower case character.</p>
2749
     */
2750 47
    public static function has_lowercase(string $str): bool
2751
    {
2752 47
        if (self::$SUPPORT['mbstring'] === true) {
2753
            /** @noinspection PhpComposerExtensionStubsInspection */
2754 47
            return \mb_ereg_match('.*[[:lower:]]', $str);
2755
        }
2756
2757
        return self::str_matches_pattern($str, '.*[[:lower:]]');
2758
    }
2759
2760
    /**
2761
     * Returns true if the string contains whitespace, false otherwise.
2762
     *
2763
     * @param string $str <p>The input string.</p>
2764
     *
2765
     * @psalm-pure
2766
     *
2767
     * @return bool
2768
     *              <p>Whether or not the string contains whitespace.</p>
2769
     */
2770 11
    public static function has_whitespace(string $str): bool
2771
    {
2772 11
        if (self::$SUPPORT['mbstring'] === true) {
2773
            /** @noinspection PhpComposerExtensionStubsInspection */
2774 11
            return \mb_ereg_match('.*[[:space:]]', $str);
2775
        }
2776
2777
        return self::str_matches_pattern($str, '.*[[:space:]]');
2778
    }
2779
2780
    /**
2781
     * Returns true if the string contains an upper case char, false otherwise.
2782
     *
2783
     * @param string $str <p>The input string.</p>
2784
     *
2785
     * @psalm-pure
2786
     *
2787
     * @return bool
2788
     *              <p>Whether or not the string contains an upper case character.</p>
2789
     */
2790 12
    public static function has_uppercase(string $str): bool
2791
    {
2792 12
        if (self::$SUPPORT['mbstring'] === true) {
2793
            /** @noinspection PhpComposerExtensionStubsInspection */
2794 12
            return \mb_ereg_match('.*[[:upper:]]', $str);
2795
        }
2796
2797
        return self::str_matches_pattern($str, '.*[[:upper:]]');
2798
    }
2799
2800
    /**
2801
     * Converts a hexadecimal value into a UTF-8 character.
2802
     *
2803
     * INFO: opposite to UTF8::chr_to_hex()
2804
     *
2805
     * EXAMPLE: <code>UTF8::hex_to_chr('U+00a7'); // '§'</code>
2806
     *
2807
     * @param string $hexdec <p>The hexadecimal value.</p>
2808
     *
2809
     * @psalm-pure
2810
     *
2811
     * @return false|string one single UTF-8 character
2812
     */
2813 4
    public static function hex_to_chr(string $hexdec)
2814
    {
2815
        /** @noinspection PhpUsageOfSilenceOperatorInspection - Invalid characters passed for attempted conversion, these have been ignored */
2816 4
        return self::decimal_to_chr((int) @\hexdec($hexdec));
2817
    }
2818
2819
    /**
2820
     * Converts hexadecimal U+xxxx code point representation to integer.
2821
     *
2822
     * INFO: opposite to UTF8::int_to_hex()
2823
     *
2824
     * EXAMPLE: <code>UTF8::hex_to_int('U+00f1'); // 241</code>
2825
     *
2826
     * @param string $hexdec <p>The hexadecimal code point representation.</p>
2827
     *
2828
     * @psalm-pure
2829
     *
2830
     * @return false|int
2831
     *                   <p>The code point, or false on failure.</p>
2832
     */
2833 2
    public static function hex_to_int($hexdec)
2834
    {
2835
        // init
2836 2
        $hexdec = (string) $hexdec;
2837
2838 2
        if ($hexdec === '') {
2839 2
            return false;
2840
        }
2841
2842 2
        if (\preg_match('/^(?:\\\u|U\+|)([a-zA-Z0-9]{4,6})$/', $hexdec, $match)) {
2843 2
            return \intval($match[1], 16);
2844
        }
2845
2846 2
        return false;
2847
    }
2848
2849
    /**
2850
     * alias for "UTF8::html_entity_decode()"
2851
     *
2852
     * @param string   $str
2853
     * @param int|null $flags
2854
     * @param string   $encoding
2855
     *
2856
     * @psalm-pure
2857
     *
2858
     * @return string
2859
     *
2860
     * @see        UTF8::html_entity_decode()
2861
     * @deprecated <p>please use "UTF8::html_entity_decode()"</p>
2862
     */
2863 2
    public static function html_decode(
2864
        string $str,
2865
        int $flags = null,
2866
        string $encoding = 'UTF-8'
2867
    ): string {
2868 2
        return self::html_entity_decode($str, $flags, $encoding);
2869
    }
2870
2871
    /**
2872
     * Converts a UTF-8 string to a series of HTML numbered entities.
2873
     *
2874
     * INFO: opposite to UTF8::html_decode()
2875
     *
2876
     * EXAMPLE: <code>UTF8::html_encode('中文空白'); // '&#20013;&#25991;&#31354;&#30333;'</code>
2877
     *
2878
     * @param string $str              <p>The Unicode string to be encoded as numbered entities.</p>
2879
     * @param bool   $keep_ascii_chars [optional] <p>Keep ASCII chars.</p>
2880
     * @param string $encoding         [optional] <p>Set the charset for e.g. "mb_" function</p>
2881
     *
2882
     * @psalm-pure
2883
     *
2884
     * @return string HTML numbered entities
2885
     */
2886 14
    public static function html_encode(
2887
        string $str,
2888
        bool $keep_ascii_chars = false,
2889
        string $encoding = 'UTF-8'
2890
    ): string {
2891 14
        if ($str === '') {
2892 4
            return '';
2893
        }
2894
2895 14
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2896 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2897
        }
2898
2899
        // INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2900 14
        if (self::$SUPPORT['mbstring'] === true) {
2901 14
            if ($keep_ascii_chars) {
2902 13
                $start_code = 0x80;
2903
            } else {
2904 3
                $start_code = 0x00;
2905
            }
2906
2907 14
            if ($encoding === 'UTF-8') {
2908
                /** @var false|string|null $return - needed for PhpStan (stubs error) */
2909 14
                $return = \mb_encode_numericentity(
2910 14
                    $str,
2911 14
                    [$start_code, 0xfffff, 0, 0xfffff]
2912
                );
2913 14
                if ($return !== null && $return !== false) {
2914 14
                    return $return;
2915
                }
2916
            }
2917
2918
            /** @var false|string|null $return - needed for PhpStan (stubs error) */
2919 4
            $return = \mb_encode_numericentity(
2920 4
                $str,
2921 4
                [$start_code, 0xfffff, 0, 0xfffff],
2922 4
                $encoding
2923
            );
2924 4
            if ($return !== null && $return !== false) {
2925 4
                return $return;
2926
            }
2927
        }
2928
2929
        //
2930
        // fallback via vanilla php
2931
        //
2932
2933
        return \implode(
2934
            '',
2935
            \array_map(
2936
                static function (string $chr) use ($keep_ascii_chars, $encoding): string {
2937
                    return self::single_chr_html_encode($chr, $keep_ascii_chars, $encoding);
2938
                },
2939
                self::str_split($str)
2940
            )
2941
        );
2942
    }
2943
2944
    /**
2945
     * UTF-8 version of html_entity_decode()
2946
     *
2947
     * The reason we are not using html_entity_decode() by itself is because
2948
     * while it is not technically correct to leave out the semicolon
2949
     * at the end of an entity most browsers will still interpret the entity
2950
     * correctly. html_entity_decode() does not convert entities without
2951
     * semicolons, so we are left with our own little solution here. Bummer.
2952
     *
2953
     * Convert all HTML entities to their applicable characters.
2954
     *
2955
     * INFO: opposite to UTF8::html_encode()
2956
     *
2957
     * EXAMPLE: <code>UTF8::html_entity_decode('&#20013;&#25991;&#31354;&#30333;'); // '中文空白'</code>
2958
     *
2959
     * @see http://php.net/manual/en/function.html-entity-decode.php
2960
     *
2961
     * @param string   $str      <p>
2962
     *                           The input string.
2963
     *                           </p>
2964
     * @param int|null $flags    [optional] <p>
2965
     *                           A bitmask of one or more of the following flags, which specify how to handle quotes
2966
     *                           and which document type to use. The default is ENT_COMPAT | ENT_HTML401.
2967
     *                           <table>
2968
     *                           Available <i>flags</i> constants
2969
     *                           <tr valign="top">
2970
     *                           <td>Constant Name</td>
2971
     *                           <td>Description</td>
2972
     *                           </tr>
2973
     *                           <tr valign="top">
2974
     *                           <td><b>ENT_COMPAT</b></td>
2975
     *                           <td>Will convert double-quotes and leave single-quotes alone.</td>
2976
     *                           </tr>
2977
     *                           <tr valign="top">
2978
     *                           <td><b>ENT_QUOTES</b></td>
2979
     *                           <td>Will convert both double and single quotes.</td>
2980
     *                           </tr>
2981
     *                           <tr valign="top">
2982
     *                           <td><b>ENT_NOQUOTES</b></td>
2983
     *                           <td>Will leave both double and single quotes unconverted.</td>
2984
     *                           </tr>
2985
     *                           <tr valign="top">
2986
     *                           <td><b>ENT_HTML401</b></td>
2987
     *                           <td>
2988
     *                           Handle code as HTML 4.01.
2989
     *                           </td>
2990
     *                           </tr>
2991
     *                           <tr valign="top">
2992
     *                           <td><b>ENT_XML1</b></td>
2993
     *                           <td>
2994
     *                           Handle code as XML 1.
2995
     *                           </td>
2996
     *                           </tr>
2997
     *                           <tr valign="top">
2998
     *                           <td><b>ENT_XHTML</b></td>
2999
     *                           <td>
3000
     *                           Handle code as XHTML.
3001
     *                           </td>
3002
     *                           </tr>
3003
     *                           <tr valign="top">
3004
     *                           <td><b>ENT_HTML5</b></td>
3005
     *                           <td>
3006
     *                           Handle code as HTML 5.
3007
     *                           </td>
3008
     *                           </tr>
3009
     *                           </table>
3010
     *                           </p>
3011
     * @param string   $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
3012
     *
3013
     * @psalm-pure
3014
     *
3015
     * @return string the decoded string
3016
     */
3017 51
    public static function html_entity_decode(
3018
        string $str,
3019
        int $flags = null,
3020
        string $encoding = 'UTF-8'
3021
    ): string {
3022
        if (
3023 51
            !isset($str[3]) // examples: &; || &x;
3024
            ||
3025 51
            \strpos($str, '&') === false // no "&"
3026
        ) {
3027 24
            return $str;
3028
        }
3029
3030 49
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
3031 9
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
3032
        }
3033
3034 49
        if ($flags === null) {
3035 11
            $flags = \ENT_QUOTES | \ENT_HTML5;
3036
        }
3037
3038
        /** @noinspection InArrayCanBeUsedInspection */
3039
        if (
3040 49
            $encoding !== 'UTF-8'
3041
            &&
3042 49
            $encoding !== 'ISO-8859-1'
3043
            &&
3044 49
            $encoding !== 'WINDOWS-1252'
3045
            &&
3046 49
            self::$SUPPORT['mbstring'] === false
3047
        ) {
3048
            /**
3049
             * @psalm-suppress ImpureFunctionCall - is is only a warning
3050
             */
3051
            \trigger_error('UTF8::html_entity_decode() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
3052
        }
3053
3054
        do {
3055 49
            $str_compare = $str;
3056
3057 49
            if (\strpos($str, '&') !== false) {
3058 49
                if (\strpos($str, '&#') !== false) {
3059
                    // decode also numeric & UTF16 two byte entities
3060 41
                    $str = (string) \preg_replace(
3061 41
                        '/(&#(?:x0*[0-9a-fA-F]{2,6}(?![0-9a-fA-F;])|(?:0*\d{2,6}(?![0-9;]))))/S',
3062 41
                        '$1;',
3063 41
                        $str
3064
                    );
3065
                }
3066
3067 49
                $str = \html_entity_decode(
3068 49
                    $str,
3069 49
                    $flags,
3070 49
                    $encoding
3071
                );
3072
            }
3073 49
        } while ($str_compare !== $str);
3074
3075 49
        return $str;
3076
    }
3077
3078
    /**
3079
     * Create a escape html version of the string via "UTF8::htmlspecialchars()".
3080
     *
3081
     * @param string $str
3082
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
3083
     *
3084
     * @psalm-pure
3085
     *
3086
     * @return string
3087
     */
3088 6
    public static function html_escape(string $str, string $encoding = 'UTF-8'): string
3089
    {
3090 6
        return self::htmlspecialchars(
3091 6
            $str,
3092 6
            \ENT_QUOTES | \ENT_SUBSTITUTE,
3093 6
            $encoding
3094
        );
3095
    }
3096
3097
    /**
3098
     * Remove empty html-tag.
3099
     *
3100
     * e.g.: <pre><tag></tag></pre>
3101
     *
3102
     * @param string $str
3103
     *
3104
     * @psalm-pure
3105
     *
3106
     * @return string
3107
     */
3108 1
    public static function html_stripe_empty_tags(string $str): string
3109
    {
3110 1
        return (string) \preg_replace(
3111 1
            '/<[^\\/>]*?>\\s*?<\\/[^>]*?>/u',
3112 1
            '',
3113 1
            $str
3114
        );
3115
    }
3116
3117
    /**
3118
     * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities().
3119
     *
3120
     * EXAMPLE: <code>UTF8::htmlentities('<白-öäü>'); // '&lt;&#30333;-&ouml;&auml;&uuml;&gt;'</code>
3121
     *
3122
     * @see http://php.net/manual/en/function.htmlentities.php
3123
     *
3124
     * @param string $str           <p>
3125
     *                              The input string.
3126
     *                              </p>
3127
     * @param int    $flags         [optional] <p>
3128
     *                              A bitmask of one or more of the following flags, which specify how to handle
3129
     *                              quotes, invalid code unit sequences and the used document type. The default is
3130
     *                              ENT_COMPAT | ENT_HTML401.
3131
     *                              <table>
3132
     *                              Available <i>flags</i> constants
3133
     *                              <tr valign="top">
3134
     *                              <td>Constant Name</td>
3135
     *                              <td>Description</td>
3136
     *                              </tr>
3137
     *                              <tr valign="top">
3138
     *                              <td><b>ENT_COMPAT</b></td>
3139
     *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
3140
     *                              </tr>
3141
     *                              <tr valign="top">
3142
     *                              <td><b>ENT_QUOTES</b></td>
3143
     *                              <td>Will convert both double and single quotes.</td>
3144
     *                              </tr>
3145
     *                              <tr valign="top">
3146
     *                              <td><b>ENT_NOQUOTES</b></td>
3147
     *                              <td>Will leave both double and single quotes unconverted.</td>
3148
     *                              </tr>
3149
     *                              <tr valign="top">
3150
     *                              <td><b>ENT_IGNORE</b></td>
3151
     *                              <td>
3152
     *                              Silently discard invalid code unit sequences instead of returning
3153
     *                              an empty string. Using this flag is discouraged as it
3154
     *                              may have security implications.
3155
     *                              </td>
3156
     *                              </tr>
3157
     *                              <tr valign="top">
3158
     *                              <td><b>ENT_SUBSTITUTE</b></td>
3159
     *                              <td>
3160
     *                              Replace invalid code unit sequences with a Unicode Replacement Character
3161
     *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty
3162
     *                              string.
3163
     *                              </td>
3164
     *                              </tr>
3165
     *                              <tr valign="top">
3166
     *                              <td><b>ENT_DISALLOWED</b></td>
3167
     *                              <td>
3168
     *                              Replace invalid code points for the given document type with a
3169
     *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
3170
     *                              (otherwise) instead of leaving them as is. This may be useful, for
3171
     *                              instance, to ensure the well-formedness of XML documents with
3172
     *                              embedded external content.
3173
     *                              </td>
3174
     *                              </tr>
3175
     *                              <tr valign="top">
3176
     *                              <td><b>ENT_HTML401</b></td>
3177
     *                              <td>
3178
     *                              Handle code as HTML 4.01.
3179
     *                              </td>
3180
     *                              </tr>
3181
     *                              <tr valign="top">
3182
     *                              <td><b>ENT_XML1</b></td>
3183
     *                              <td>
3184
     *                              Handle code as XML 1.
3185
     *                              </td>
3186
     *                              </tr>
3187
     *                              <tr valign="top">
3188
     *                              <td><b>ENT_XHTML</b></td>
3189
     *                              <td>
3190
     *                              Handle code as XHTML.
3191
     *                              </td>
3192
     *                              </tr>
3193
     *                              <tr valign="top">
3194
     *                              <td><b>ENT_HTML5</b></td>
3195
     *                              <td>
3196
     *                              Handle code as HTML 5.
3197
     *                              </td>
3198
     *                              </tr>
3199
     *                              </table>
3200
     *                              </p>
3201
     * @param string $encoding      [optional] <p>
3202
     *                              Like <b>htmlspecialchars</b>,
3203
     *                              <b>htmlentities</b> takes an optional third argument
3204
     *                              <i>encoding</i> which defines encoding used in
3205
     *                              conversion.
3206
     *                              Although this argument is technically optional, you are highly
3207
     *                              encouraged to specify the correct value for your code.
3208
     *                              </p>
3209
     * @param bool   $double_encode [optional] <p>
3210
     *                              When <i>double_encode</i> is turned off PHP will not
3211
     *                              encode existing html entities. The default is to convert everything.
3212
     *                              </p>
3213
     *
3214
     * @psalm-pure
3215
     *
3216
     * @return string
3217
     *                <p>
3218
     *                The encoded string.
3219
     *                <br><br>
3220
     *                If the input <i>string</i> contains an invalid code unit
3221
     *                sequence within the given <i>encoding</i> an empty string
3222
     *                will be returned, unless either the <b>ENT_IGNORE</b> or
3223
     *                <b>ENT_SUBSTITUTE</b> flags are set.
3224
     *                </p>
3225
     */
3226 9
    public static function htmlentities(
3227
        string $str,
3228
        int $flags = \ENT_COMPAT,
3229
        string $encoding = 'UTF-8',
3230
        bool $double_encode = true
3231
    ): string {
3232 9
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
3233 7
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
3234
        }
3235
3236 9
        $str = \htmlentities(
3237 9
            $str,
3238 9
            $flags,
3239 9
            $encoding,
3240 9
            $double_encode
3241
        );
3242
3243
        /**
3244
         * PHP doesn't replace a backslash to its html entity since this is something
3245
         * that's mostly used to escape characters when inserting in a database. Since
3246
         * we're using a decent database layer, we don't need this shit and we're replacing
3247
         * the double backslashes by its' html entity equivalent.
3248
         *
3249
         * https://github.com/forkcms/library/blob/master/spoon/filter/filter.php#L303
3250
         */
3251 9
        $str = \str_replace('\\', '&#92;', $str);
3252
3253 9
        return self::html_encode($str, true, $encoding);
3254
    }
3255
3256
    /**
3257
     * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
3258
     *
3259
     * INFO: Take a look at "UTF8::htmlentities()"
3260
     *
3261
     * EXAMPLE: <code>UTF8::htmlspecialchars('<白-öäü>'); // '&lt;白-öäü&gt;'</code>
3262
     *
3263
     * @see http://php.net/manual/en/function.htmlspecialchars.php
3264
     *
3265
     * @param string $str           <p>
3266
     *                              The string being converted.
3267
     *                              </p>
3268
     * @param int    $flags         [optional] <p>
3269
     *                              A bitmask of one or more of the following flags, which specify how to handle
3270
     *                              quotes, invalid code unit sequences and the used document type. The default is
3271
     *                              ENT_COMPAT | ENT_HTML401.
3272
     *                              <table>
3273
     *                              Available <i>flags</i> constants
3274
     *                              <tr valign="top">
3275
     *                              <td>Constant Name</td>
3276
     *                              <td>Description</td>
3277
     *                              </tr>
3278
     *                              <tr valign="top">
3279
     *                              <td><b>ENT_COMPAT</b></td>
3280
     *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
3281
     *                              </tr>
3282
     *                              <tr valign="top">
3283
     *                              <td><b>ENT_QUOTES</b></td>
3284
     *                              <td>Will convert both double and single quotes.</td>
3285
     *                              </tr>
3286
     *                              <tr valign="top">
3287
     *                              <td><b>ENT_NOQUOTES</b></td>
3288
     *                              <td>Will leave both double and single quotes unconverted.</td>
3289
     *                              </tr>
3290
     *                              <tr valign="top">
3291
     *                              <td><b>ENT_IGNORE</b></td>
3292
     *                              <td>
3293
     *                              Silently discard invalid code unit sequences instead of returning
3294
     *                              an empty string. Using this flag is discouraged as it
3295
     *                              may have security implications.
3296
     *                              </td>
3297
     *                              </tr>
3298
     *                              <tr valign="top">
3299
     *                              <td><b>ENT_SUBSTITUTE</b></td>
3300
     *                              <td>
3301
     *                              Replace invalid code unit sequences with a Unicode Replacement Character
3302
     *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty
3303
     *                              string.
3304
     *                              </td>
3305
     *                              </tr>
3306
     *                              <tr valign="top">
3307
     *                              <td><b>ENT_DISALLOWED</b></td>
3308
     *                              <td>
3309
     *                              Replace invalid code points for the given document type with a
3310
     *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
3311
     *                              (otherwise) instead of leaving them as is. This may be useful, for
3312
     *                              instance, to ensure the well-formedness of XML documents with
3313
     *                              embedded external content.
3314
     *                              </td>
3315
     *                              </tr>
3316
     *                              <tr valign="top">
3317
     *                              <td><b>ENT_HTML401</b></td>
3318
     *                              <td>
3319
     *                              Handle code as HTML 4.01.
3320
     *                              </td>
3321
     *                              </tr>
3322
     *                              <tr valign="top">
3323
     *                              <td><b>ENT_XML1</b></td>
3324
     *                              <td>
3325
     *                              Handle code as XML 1.
3326
     *                              </td>
3327
     *                              </tr>
3328
     *                              <tr valign="top">
3329
     *                              <td><b>ENT_XHTML</b></td>
3330
     *                              <td>
3331
     *                              Handle code as XHTML.
3332
     *                              </td>
3333
     *                              </tr>
3334
     *                              <tr valign="top">
3335
     *                              <td><b>ENT_HTML5</b></td>
3336
     *                              <td>
3337
     *                              Handle code as HTML 5.
3338
     *                              </td>
3339
     *                              </tr>
3340
     *                              </table>
3341
     *                              </p>
3342
     * @param string $encoding      [optional] <p>
3343
     *                              Defines encoding used in conversion.
3344
     *                              </p>
3345
     *                              <p>
3346
     *                              For the purposes of this function, the encodings
3347
     *                              ISO-8859-1, ISO-8859-15,
3348
     *                              UTF-8, cp866,
3349
     *                              cp1251, cp1252, and
3350
     *                              KOI8-R are effectively equivalent, provided the
3351
     *                              <i>string</i> itself is valid for the encoding, as
3352
     *                              the characters affected by <b>htmlspecialchars</b> occupy
3353
     *                              the same positions in all of these encodings.
3354
     *                              </p>
3355
     * @param bool   $double_encode [optional] <p>
3356
     *                              When <i>double_encode</i> is turned off PHP will not
3357
     *                              encode existing html entities, the default is to convert everything.
3358
     *                              </p>
3359
     *
3360
     * @psalm-pure
3361
     *
3362
     * @return string the converted string.
3363
     *                </p>
3364
     *                <p>
3365
     *                If the input <i>string</i> contains an invalid code unit
3366
     *                sequence within the given <i>encoding</i> an empty string
3367
     *                will be returned, unless either the <b>ENT_IGNORE</b> or
3368
     *                <b>ENT_SUBSTITUTE</b> flags are set
3369
     */
3370 8
    public static function htmlspecialchars(
3371
        string $str,
3372
        int $flags = \ENT_COMPAT,
3373
        string $encoding = 'UTF-8',
3374
        bool $double_encode = true
3375
    ): string {
3376 8
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
3377 8
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
3378
        }
3379
3380 8
        return \htmlspecialchars(
3381 8
            $str,
3382 8
            $flags,
3383 8
            $encoding,
3384 8
            $double_encode
3385
        );
3386
    }
3387
3388
    /**
3389
     * Checks whether iconv is available on the server.
3390
     *
3391
     * @psalm-pure
3392
     *
3393
     * @return bool
3394
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
3395
     *
3396
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
3397
     */
3398
    public static function iconv_loaded(): bool
3399
    {
3400
        return \extension_loaded('iconv');
3401
    }
3402
3403
    /**
3404
     * alias for "UTF8::decimal_to_chr()"
3405
     *
3406
     * @param int|string $int
3407
     *
3408
     * @phpstan-param int|numeric-string $int
3409
     *
3410
     * @psalm-pure
3411
     *
3412
     * @return string
3413
     *
3414
     * @see        UTF8::decimal_to_chr()
3415
     * @deprecated <p>please use "UTF8::decimal_to_chr()"</p>
3416
     */
3417 4
    public static function int_to_chr($int): string
3418
    {
3419 4
        return self::decimal_to_chr($int);
3420
    }
3421
3422
    /**
3423
     * Converts Integer to hexadecimal U+xxxx code point representation.
3424
     *
3425
     * INFO: opposite to UTF8::hex_to_int()
3426
     *
3427
     * EXAMPLE: <code>UTF8::int_to_hex(241); // 'U+00f1'</code>
3428
     *
3429
     * @param int    $int    <p>The integer to be converted to hexadecimal code point.</p>
3430
     * @param string $prefix [optional]
3431
     *
3432
     * @psalm-pure
3433
     *
3434
     * @return string the code point, or empty string on failure
3435
     */
3436 6
    public static function int_to_hex(int $int, string $prefix = 'U+'): string
3437
    {
3438 6
        $hex = \dechex($int);
3439
3440 6
        $hex = (\strlen($hex) < 4 ? \substr('0000' . $hex, -4) : $hex);
3441
3442 6
        return $prefix . $hex . '';
3443
    }
3444
3445
    /**
3446
     * Checks whether intl-char is available on the server.
3447
     *
3448
     * @psalm-pure
3449
     *
3450
     * @return bool
3451
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
3452
     *
3453
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
3454
     */
3455
    public static function intlChar_loaded(): bool
3456
    {
3457
        return \class_exists('IntlChar');
3458
    }
3459
3460
    /**
3461
     * Checks whether intl is available on the server.
3462
     *
3463
     * @psalm-pure
3464
     *
3465
     * @return bool
3466
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
3467
     *
3468
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
3469
     */
3470 5
    public static function intl_loaded(): bool
3471
    {
3472 5
        return \extension_loaded('intl');
3473
    }
3474
3475
    /**
3476
     * alias for "UTF8::is_ascii()"
3477
     *
3478
     * @param string $str
3479
     *
3480
     * @psalm-pure
3481
     *
3482
     * @return bool
3483
     *
3484
     * @see        UTF8::is_ascii()
3485
     * @deprecated <p>please use "UTF8::is_ascii()"</p>
3486
     */
3487 2
    public static function isAscii(string $str): bool
3488
    {
3489 2
        return ASCII::is_ascii($str);
3490
    }
3491
3492
    /**
3493
     * alias for "UTF8::is_base64()"
3494
     *
3495
     * @param string $str
3496
     *
3497
     * @psalm-pure
3498
     *
3499
     * @return bool
3500
     *
3501
     * @see        UTF8::is_base64()
3502
     * @deprecated <p>please use "UTF8::is_base64()"</p>
3503
     */
3504 2
    public static function isBase64($str): bool
3505
    {
3506 2
        return self::is_base64($str);
3507
    }
3508
3509
    /**
3510
     * alias for "UTF8::is_binary()"
3511
     *
3512
     * @param int|string $str
3513
     * @param bool       $strict
3514
     *
3515
     * @psalm-pure
3516
     *
3517
     * @return bool
3518
     *
3519
     * @see        UTF8::is_binary()
3520
     * @deprecated <p>please use "UTF8::is_binary()"</p>
3521
     */
3522 4
    public static function isBinary($str, bool $strict = false): bool
3523
    {
3524 4
        return self::is_binary($str, $strict);
3525
    }
3526
3527
    /**
3528
     * alias for "UTF8::is_bom()"
3529
     *
3530
     * @param string $utf8_chr
3531
     *
3532
     * @psalm-pure
3533
     *
3534
     * @return bool
3535
     *
3536
     * @see        UTF8::is_bom()
3537
     * @deprecated <p>please use "UTF8::is_bom()"</p>
3538
     */
3539 2
    public static function isBom(string $utf8_chr): bool
3540
    {
3541 2
        return self::is_bom($utf8_chr);
3542
    }
3543
3544
    /**
3545
     * alias for "UTF8::is_html()"
3546
     *
3547
     * @param string $str
3548
     *
3549
     * @psalm-pure
3550
     *
3551
     * @return bool
3552
     *
3553
     * @see        UTF8::is_html()
3554
     * @deprecated <p>please use "UTF8::is_html()"</p>
3555
     */
3556 2
    public static function isHtml(string $str): bool
3557
    {
3558 2
        return self::is_html($str);
3559
    }
3560
3561
    /**
3562
     * alias for "UTF8::is_json()"
3563
     *
3564
     * @param string $str
3565
     *
3566
     * @return bool
3567
     *
3568
     * @see        UTF8::is_json()
3569
     * @deprecated <p>please use "UTF8::is_json()"</p>
3570
     */
3571 1
    public static function isJson(string $str): bool
3572
    {
3573 1
        return self::is_json($str);
3574
    }
3575
3576
    /**
3577
     * alias for "UTF8::is_utf16()"
3578
     *
3579
     * @param string $str
3580
     *
3581
     * @psalm-pure
3582
     *
3583
     * @return false|int
3584
     *                   <strong>false</strong> if is't not UTF16,<br>
3585
     *                   <strong>1</strong> for UTF-16LE,<br>
3586
     *                   <strong>2</strong> for UTF-16BE
3587
     *
3588
     * @see        UTF8::is_utf16()
3589
     * @deprecated <p>please use "UTF8::is_utf16()"</p>
3590
     */
3591 2
    public static function isUtf16($str)
3592
    {
3593 2
        return self::is_utf16($str);
3594
    }
3595
3596
    /**
3597
     * alias for "UTF8::is_utf32()"
3598
     *
3599
     * @param string $str
3600
     *
3601
     * @psalm-pure
3602
     *
3603
     * @return false|int
3604
     *                   <strong>false</strong> if is't not UTF16,
3605
     *                   <strong>1</strong> for UTF-32LE,
3606
     *                   <strong>2</strong> for UTF-32BE
3607
     *
3608
     * @see        UTF8::is_utf32()
3609
     * @deprecated <p>please use "UTF8::is_utf32()"</p>
3610
     */
3611 2
    public static function isUtf32($str)
3612
    {
3613 2
        return self::is_utf32($str);
3614
    }
3615
3616
    /**
3617
     * alias for "UTF8::is_utf8()"
3618
     *
3619
     * @param string $str
3620
     * @param bool   $strict
3621
     *
3622
     * @psalm-pure
3623
     *
3624
     * @return bool
3625
     *
3626
     * @see        UTF8::is_utf8()
3627
     * @deprecated <p>please use "UTF8::is_utf8()"</p>
3628
     */
3629 17
    public static function isUtf8($str, bool $strict = false): bool
3630
    {
3631 17
        return self::is_utf8($str, $strict);
3632
    }
3633
3634
    /**
3635
     * Returns true if the string contains only alphabetic chars, false otherwise.
3636
     *
3637
     * @param string $str <p>The input string.</p>
3638
     *
3639
     * @psalm-pure
3640
     *
3641
     * @return bool
3642
     *              <p>Whether or not $str contains only alphabetic chars.</p>
3643
     */
3644 10
    public static function is_alpha(string $str): bool
3645
    {
3646 10
        if (self::$SUPPORT['mbstring'] === true) {
3647
            /** @noinspection PhpComposerExtensionStubsInspection */
3648 10
            return \mb_ereg_match('^[[:alpha:]]*$', $str);
3649
        }
3650
3651
        return self::str_matches_pattern($str, '^[[:alpha:]]*$');
3652
    }
3653
3654
    /**
3655
     * Returns true if the string contains only alphabetic and numeric chars, false otherwise.
3656
     *
3657
     * @param string $str <p>The input string.</p>
3658
     *
3659
     * @psalm-pure
3660
     *
3661
     * @return bool
3662
     *              <p>Whether or not $str contains only alphanumeric chars.</p>
3663
     */
3664 13
    public static function is_alphanumeric(string $str): bool
3665
    {
3666 13
        if (self::$SUPPORT['mbstring'] === true) {
3667
            /** @noinspection PhpComposerExtensionStubsInspection */
3668 13
            return \mb_ereg_match('^[[:alnum:]]*$', $str);
3669
        }
3670
3671
        return self::str_matches_pattern($str, '^[[:alnum:]]*$');
3672
    }
3673
3674
    /**
3675
     * Returns true if the string contains only punctuation chars, false otherwise.
3676
     *
3677
     * @param string $str <p>The input string.</p>
3678
     *
3679
     * @psalm-pure
3680
     *
3681
     * @return bool
3682
     *              <p>Whether or not $str contains only punctuation chars.</p>
3683
     */
3684 10
    public static function is_punctuation(string $str): bool
3685
    {
3686 10
        return self::str_matches_pattern($str, '^[[:punct:]]*$');
3687
    }
3688
3689
    /**
3690
     * Returns true if the string contains only printable (non-invisible) chars, false otherwise.
3691
     *
3692
     * @param string $str                       <p>The input string.</p>
3693
     * @param bool   $ignore_control_characters [optional] <p>Ignore control characters like [LRM] or [LSEP].</p>
3694
     *
3695
     * @psalm-pure
3696
     *
3697
     * @return bool
3698
     *              <p>Whether or not $str contains only printable (non-invisible) chars.</p>
3699
     */
3700 1
    public static function is_printable(string $str, bool $ignore_control_characters = false): bool
3701
    {
3702 1
        return self::remove_invisible_characters($str, false, '', $ignore_control_characters) === $str;
3703
    }
3704
3705
    /**
3706
     * Checks if a string is 7 bit ASCII.
3707
     *
3708
     * EXAMPLE: <code>UTF8::is_ascii('白'); // false</code>
3709
     *
3710
     * @param string $str <p>The string to check.</p>
3711
     *
3712
     * @psalm-pure
3713
     *
3714
     * @return bool
3715
     *              <p>
3716
     *              <strong>true</strong> if it is ASCII<br>
3717
     *              <strong>false</strong> otherwise
3718
     *              </p>
3719
     */
3720 8
    public static function is_ascii(string $str): bool
3721
    {
3722 8
        return ASCII::is_ascii($str);
3723
    }
3724
3725
    /**
3726
     * Returns true if the string is base64 encoded, false otherwise.
3727
     *
3728
     * EXAMPLE: <code>UTF8::is_base64('4KSu4KWL4KSo4KS/4KSa'); // true</code>
3729
     *
3730
     * @param string|null $str                   <p>The input string.</p>
3731
     * @param bool        $empty_string_is_valid [optional] <p>Is an empty string valid base64 or not?</p>
3732
     *
3733
     * @psalm-pure
3734
     *
3735
     * @return bool
3736
     *              <p>Whether or not $str is base64 encoded.</p>
3737
     */
3738 16
    public static function is_base64($str, bool $empty_string_is_valid = false): bool
3739
    {
3740
        if (
3741 16
            !$empty_string_is_valid
3742
            &&
3743 16
            $str === ''
3744
        ) {
3745 3
            return false;
3746
        }
3747
3748 15
        if (!\is_string($str)) {
3749 2
            return false;
3750
        }
3751
3752 15
        $base64String = \base64_decode($str, true);
3753
3754 15
        return $base64String !== false && \base64_encode($base64String) === $str;
3755
    }
3756
3757
    /**
3758
     * Check if the input is binary... (is look like a hack).
3759
     *
3760
     * EXAMPLE: <code>UTF8::is_binary(01); // true</code>
3761
     *
3762
     * @param int|string $input
3763
     * @param bool       $strict
3764
     *
3765
     * @psalm-pure
3766
     *
3767
     * @return bool
3768
     */
3769 40
    public static function is_binary($input, bool $strict = false): bool
3770
    {
3771 40
        $input = (string) $input;
3772 40
        if ($input === '') {
3773 10
            return false;
3774
        }
3775
3776 40
        if (\preg_match('~^[01]+$~', $input)) {
3777 13
            return true;
3778
        }
3779
3780 40
        $ext = self::get_file_type($input);
3781 40
        if ($ext['type'] === 'binary') {
3782 7
            return true;
3783
        }
3784
3785 39
        $test_length = \strlen($input);
3786 39
        $test_null_counting = \substr_count($input, "\x0", 0, $test_length);
3787 39
        if (($test_null_counting / $test_length) > 0.25) {
3788 15
            return true;
3789
        }
3790
3791 35
        if ($strict) {
3792 35
            if (self::$SUPPORT['finfo'] === false) {
3793
                throw new \RuntimeException('ext-fileinfo: is not installed');
3794
            }
3795
3796
            /**
3797
             * @noinspection   PhpComposerExtensionStubsInspection
3798
             * @psalm-suppress ImpureMethodCall - it will return the same result for the same file ...
3799
             */
3800 35
            $finfo_encoding = (new \finfo(\FILEINFO_MIME_ENCODING))->buffer($input);
3801 35
            if ($finfo_encoding && $finfo_encoding === 'binary') {
3802 15
                return true;
3803
            }
3804
        }
3805
3806 31
        return false;
3807
    }
3808
3809
    /**
3810
     * Check if the file is binary.
3811
     *
3812
     * EXAMPLE: <code>UTF8::is_binary('./utf32.txt'); // true</code>
3813
     *
3814
     * @param string $file
3815
     *
3816
     * @return bool
3817
     */
3818 6
    public static function is_binary_file($file): bool
3819
    {
3820
        // init
3821 6
        $block = '';
3822
3823 6
        $fp = \fopen($file, 'rb');
3824 6
        if (\is_resource($fp)) {
3825 6
            $block = \fread($fp, 512);
3826 6
            \fclose($fp);
3827
        }
3828
3829 6
        if ($block === '' || $block === false) {
3830 2
            return false;
3831
        }
3832
3833 6
        return self::is_binary($block, true);
3834
    }
3835
3836
    /**
3837
     * Returns true if the string contains only whitespace chars, false otherwise.
3838
     *
3839
     * @param string $str <p>The input string.</p>
3840
     *
3841
     * @psalm-pure
3842
     *
3843
     * @return bool
3844
     *              <p>Whether or not $str contains only whitespace characters.</p>
3845
     */
3846 15
    public static function is_blank(string $str): bool
3847
    {
3848 15
        if (self::$SUPPORT['mbstring'] === true) {
3849
            /** @noinspection PhpComposerExtensionStubsInspection */
3850 15
            return \mb_ereg_match('^[[:space:]]*$', $str);
3851
        }
3852
3853
        return self::str_matches_pattern($str, '^[[:space:]]*$');
3854
    }
3855
3856
    /**
3857
     * Checks if the given string is equal to any "Byte Order Mark".
3858
     *
3859
     * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
3860
     *
3861
     * EXAMPLE: <code>UTF8::is_bom("\xef\xbb\xbf"); // true</code>
3862
     *
3863
     * @param string $str <p>The input string.</p>
3864
     *
3865
     * @psalm-pure
3866
     *
3867
     * @return bool
3868
     *              <p><strong>true</strong> if the $utf8_chr is Byte Order Mark, <strong>false</strong> otherwise.</p>
3869
     */
3870 2
    public static function is_bom($str): bool
3871
    {
3872
        /** @noinspection PhpUnusedLocalVariableInspection */
3873 2
        foreach (self::$BOM as $bom_string => &$bom_byte_length) {
3874 2
            if ($str === $bom_string) {
3875 2
                return true;
3876
            }
3877
        }
3878
3879 2
        return false;
3880
    }
3881
3882
    /**
3883
     * Determine whether the string is considered to be empty.
3884
     *
3885
     * A variable is considered empty if it does not exist or if its value equals FALSE.
3886
     * empty() does not generate a warning if the variable does not exist.
3887
     *
3888
     * @param array|float|int|string $str
3889
     *
3890
     * @psalm-pure
3891
     *
3892
     * @return bool
3893
     *              <p>Whether or not $str is empty().</p>
3894
     */
3895 1
    public static function is_empty($str): bool
3896
    {
3897 1
        return empty($str);
3898
    }
3899
3900
    /**
3901
     * Returns true if the string contains only hexadecimal chars, false otherwise.
3902
     *
3903
     * @param string $str <p>The input string.</p>
3904
     *
3905
     * @psalm-pure
3906
     *
3907
     * @return bool
3908
     *              <p>Whether or not $str contains only hexadecimal chars.</p>
3909
     */
3910 13
    public static function is_hexadecimal(string $str): bool
3911
    {
3912 13
        if (self::$SUPPORT['mbstring'] === true) {
3913
            /** @noinspection PhpComposerExtensionStubsInspection */
3914 13
            return \mb_ereg_match('^[[:xdigit:]]*$', $str);
3915
        }
3916
3917
        return self::str_matches_pattern($str, '^[[:xdigit:]]*$');
3918
    }
3919
3920
    /**
3921
     * Check if the string contains any HTML tags.
3922
     *
3923
     * EXAMPLE: <code>UTF8::is_html('<b>lall</b>'); // true</code>
3924
     *
3925
     * @param string $str <p>The input string.</p>
3926
     *
3927
     * @psalm-pure
3928
     *
3929
     * @return bool
3930
     *              <p>Whether or not $str contains html elements.</p>
3931
     */
3932 3
    public static function is_html(string $str): bool
3933
    {
3934 3
        if ($str === '') {
3935 3
            return false;
3936
        }
3937
3938
        // init
3939 3
        $matches = [];
3940
3941 3
        $str = self::emoji_encode($str); // hack for emoji support :/
3942
3943 3
        \preg_match("/<\\/?\\w+(?:(?:\\s+\\w+(?:\\s*=\\s*(?:\".*?\"|'.*?'|[^'\">\\s]+))?)*\\s*|\\s*)\\/?>/u", $str, $matches);
3944
3945 3
        return $matches !== [];
3946
    }
3947
3948
    /**
3949
     * Check if $url is an correct url.
3950
     *
3951
     * @param string $url
3952
     * @param bool   $disallow_localhost
3953
     *
3954
     * @psalm-pure
3955
     *
3956
     * @return bool
3957
     */
3958 1
    public static function is_url(string $url, bool $disallow_localhost = false): bool
3959
    {
3960 1
        if ($url === '') {
3961 1
            return false;
3962
        }
3963
3964
        // WARNING: keep this as hack protection
3965 1
        if (!self::str_istarts_with_any($url, ['http://', 'https://'])) {
3966 1
            return false;
3967
        }
3968
3969
        // e.g. -> the server itself connect to "https://foo.localhost/phpmyadmin/...
3970 1
        if ($disallow_localhost) {
3971 1
            if (self::str_istarts_with_any(
3972 1
                $url,
3973
                [
3974 1
                    'http://localhost',
3975
                    'https://localhost',
3976
                    'http://127.0.0.1',
3977
                    'https://127.0.0.1',
3978
                    'http://::1',
3979
                    'https://::1',
3980
                ]
3981
            )) {
3982 1
                return false;
3983
            }
3984
3985 1
            $regex = '/^(?:http(?:s)?:\/\/).*?(?:\.localhost)/iu';
3986
            /** @noinspection BypassedUrlValidationInspection */
3987 1
            if (\preg_match($regex, $url)) {
3988 1
                return false;
3989
            }
3990
        }
3991
3992
        // INFO: this is needed for e.g. "http://müller.de/" (internationalized domain names) and non ASCII-parameters
3993
        /** @noinspection SuspiciousAssignmentsInspection - false-positive - https://github.com/kalessil/phpinspectionsea/issues/1500 */
3994 1
        $regex = '/^(?:http(?:s)?:\\/\\/)(?:[\p{L}0-9][\p{L}0-9_-]*(?:\\.[\p{L}0-9][\p{L}0-9_-]*))(?:\\d+)?(?:\\/\\.*)?/iu';
3995
        /** @noinspection BypassedUrlValidationInspection */
3996 1
        if (\preg_match($regex, $url)) {
3997 1
            return true;
3998
        }
3999
4000
        /** @noinspection BypassedUrlValidationInspection */
4001 1
        return \filter_var($url, \FILTER_VALIDATE_URL) !== false;
4002
    }
4003
4004
    /**
4005
     * Try to check if "$str" is a JSON-string.
4006
     *
4007
     * EXAMPLE: <code>UTF8::is_json('{"array":[1,"¥","ä"]}'); // true</code>
4008
     *
4009
     * @param string $str                                    <p>The input string.</p>
4010
     * @param bool   $only_array_or_object_results_are_valid [optional] <p>Only array and objects are valid json
4011
     *                                                       results.</p>
4012
     *
4013
     * @return bool
4014
     *              <p>Whether or not the $str is in JSON format.</p>
4015
     */
4016 42
    public static function is_json(string $str, bool $only_array_or_object_results_are_valid = true): bool
4017
    {
4018 42
        if ($str === '') {
4019 4
            return false;
4020
        }
4021
4022 40
        if (self::$SUPPORT['json'] === false) {
4023
            throw new \RuntimeException('ext-json: is not installed');
4024
        }
4025
4026 40
        $jsonOrNull = self::json_decode($str);
4027 40
        if ($jsonOrNull === null && \strtoupper($str) !== 'NULL') {
4028 18
            return false;
4029
        }
4030
4031
        if (
4032 24
            $only_array_or_object_results_are_valid
4033
            &&
4034 24
            !\is_object($jsonOrNull)
4035
            &&
4036 24
            !\is_array($jsonOrNull)
4037
        ) {
4038 5
            return false;
4039
        }
4040
4041
        /** @noinspection PhpComposerExtensionStubsInspection */
4042 19
        return \json_last_error() === \JSON_ERROR_NONE;
4043
    }
4044
4045
    /**
4046
     * @param string $str <p>The input string.</p>
4047
     *
4048
     * @psalm-pure
4049
     *
4050
     * @return bool
4051
     *              <p>Whether or not $str contains only lowercase chars.</p>
4052
     */
4053 8
    public static function is_lowercase(string $str): bool
4054
    {
4055 8
        if (self::$SUPPORT['mbstring'] === true) {
4056
            /** @noinspection PhpComposerExtensionStubsInspection */
4057 8
            return \mb_ereg_match('^[[:lower:]]*$', $str);
4058
        }
4059
4060
        return self::str_matches_pattern($str, '^[[:lower:]]*$');
4061
    }
4062
4063
    /**
4064
     * Returns true if the string is serialized, false otherwise.
4065
     *
4066
     * @param string $str <p>The input string.</p>
4067
     *
4068
     * @psalm-pure
4069
     *
4070
     * @return bool
4071
     *              <p>Whether or not $str is serialized.</p>
4072
     */
4073 7
    public static function is_serialized(string $str): bool
4074
    {
4075 7
        if ($str === '') {
4076 1
            return false;
4077
        }
4078
4079
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
4080
        /** @noinspection UnserializeExploitsInspection */
4081 6
        return $str === 'b:0;'
4082
               ||
4083 6
               @\unserialize($str) !== false;
4084
    }
4085
4086
    /**
4087
     * Returns true if the string contains only lower case chars, false
4088
     * otherwise.
4089
     *
4090
     * @param string $str <p>The input string.</p>
4091
     *
4092
     * @psalm-pure
4093
     *
4094
     * @return bool
4095
     *              <p>Whether or not $str contains only lower case characters.</p>
4096
     */
4097 8
    public static function is_uppercase(string $str): bool
4098
    {
4099 8
        if (self::$SUPPORT['mbstring'] === true) {
4100
            /** @noinspection PhpComposerExtensionStubsInspection */
4101 8
            return \mb_ereg_match('^[[:upper:]]*$', $str);
4102
        }
4103
4104
        return self::str_matches_pattern($str, '^[[:upper:]]*$');
4105
    }
4106
4107
    /**
4108
     * Check if the string is UTF-16.
4109
     *
4110
     * EXAMPLE: <code>
4111
     * UTF8::is_utf16(file_get_contents('utf-16-le.txt')); // 1
4112
     * //
4113
     * UTF8::is_utf16(file_get_contents('utf-16-be.txt')); // 2
4114
     * //
4115
     * UTF8::is_utf16(file_get_contents('utf-8.txt')); // false
4116
     * </code>
4117
     *
4118
     * @param string $str                       <p>The input string.</p>
4119
     * @param bool   $check_if_string_is_binary
4120
     *
4121
     * @psalm-pure
4122
     *
4123
     * @return false|int
4124
     *                   <strong>false</strong> if is't not UTF-16,<br>
4125
     *                   <strong>1</strong> for UTF-16LE,<br>
4126
     *                   <strong>2</strong> for UTF-16BE
4127
     */
4128 22
    public static function is_utf16($str, bool $check_if_string_is_binary = true)
4129
    {
4130
        // init
4131 22
        $str = (string) $str;
4132 22
        $str_chars = [];
4133
4134
        if (
4135 22
            $check_if_string_is_binary
4136
            &&
4137 22
            !self::is_binary($str, true)
4138
        ) {
4139 2
            return false;
4140
        }
4141
4142 22
        if (self::$SUPPORT['mbstring'] === false) {
4143
            /**
4144
             * @psalm-suppress ImpureFunctionCall - is is only a warning
4145
             */
4146 3
            \trigger_error('UTF8::is_utf16() without mbstring may did not work correctly', \E_USER_WARNING);
4147
        }
4148
4149 22
        $str = self::remove_bom($str);
4150
4151 22
        $maybe_utf16le = 0;
4152 22
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
4153 22
        if ($test) {
4154 15
            $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
4155 15
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
4156 15
            if ($test3 === $test) {
4157
                /**
4158
                 * @psalm-suppress RedundantCondition
4159
                 */
4160 15
                if ($str_chars === []) {
4161 15
                    $str_chars = self::count_chars($str, true, false);
4162
                }
4163 15
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
4164 15
                    if (\in_array($test3char, $str_chars, true)) {
4165 15
                        ++$maybe_utf16le;
4166
                    }
4167
                }
4168 15
                unset($test3charEmpty);
4169
            }
4170
        }
4171
4172 22
        $maybe_utf16be = 0;
4173 22
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
4174 22
        if ($test) {
4175 15
            $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
4176 15
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
4177 15
            if ($test3 === $test) {
4178 15
                if ($str_chars === []) {
4179 7
                    $str_chars = self::count_chars($str, true, false);
4180
                }
4181 15
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
4182 15
                    if (\in_array($test3char, $str_chars, true)) {
4183 15
                        ++$maybe_utf16be;
4184
                    }
4185
                }
4186 15
                unset($test3charEmpty);
4187
            }
4188
        }
4189
4190 22
        if ($maybe_utf16be !== $maybe_utf16le) {
4191 7
            if ($maybe_utf16le > $maybe_utf16be) {
4192 5
                return 1;
4193
            }
4194
4195 6
            return 2;
4196
        }
4197
4198 18
        return false;
4199
    }
4200
4201
    /**
4202
     * Check if the string is UTF-32.
4203
     *
4204
     * EXAMPLE: <code>
4205
     * UTF8::is_utf32(file_get_contents('utf-32-le.txt')); // 1
4206
     * //
4207
     * UTF8::is_utf32(file_get_contents('utf-32-be.txt')); // 2
4208
     * //
4209
     * UTF8::is_utf32(file_get_contents('utf-8.txt')); // false
4210
     * </code>
4211
     *
4212
     * @param string $str                       <p>The input string.</p>
4213
     * @param bool   $check_if_string_is_binary
4214
     *
4215
     * @psalm-pure
4216
     *
4217
     * @return false|int
4218
     *                   <strong>false</strong> if is't not UTF-32,<br>
4219
     *                   <strong>1</strong> for UTF-32LE,<br>
4220
     *                   <strong>2</strong> for UTF-32BE
4221
     */
4222 20
    public static function is_utf32($str, bool $check_if_string_is_binary = true)
4223
    {
4224
        // init
4225 20
        $str = (string) $str;
4226 20
        $str_chars = [];
4227
4228
        if (
4229 20
            $check_if_string_is_binary
4230
            &&
4231 20
            !self::is_binary($str, true)
4232
        ) {
4233 2
            return false;
4234
        }
4235
4236 20
        if (self::$SUPPORT['mbstring'] === false) {
4237
            /**
4238
             * @psalm-suppress ImpureFunctionCall - is is only a warning
4239
             */
4240 3
            \trigger_error('UTF8::is_utf32() without mbstring may did not work correctly', \E_USER_WARNING);
4241
        }
4242
4243 20
        $str = self::remove_bom($str);
4244
4245 20
        $maybe_utf32le = 0;
4246 20
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
4247 20
        if ($test) {
4248 13
            $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
4249 13
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
4250 13
            if ($test3 === $test) {
4251
                /**
4252
                 * @psalm-suppress RedundantCondition
4253
                 */
4254 13
                if ($str_chars === []) {
4255 13
                    $str_chars = self::count_chars($str, true, false);
4256
                }
4257 13
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
4258 13
                    if (\in_array($test3char, $str_chars, true)) {
4259 13
                        ++$maybe_utf32le;
4260
                    }
4261
                }
4262 13
                unset($test3charEmpty);
4263
            }
4264
        }
4265
4266 20
        $maybe_utf32be = 0;
4267 20
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
4268 20
        if ($test) {
4269 13
            $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
4270 13
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
4271 13
            if ($test3 === $test) {
4272 13
                if ($str_chars === []) {
4273 7
                    $str_chars = self::count_chars($str, true, false);
4274
                }
4275 13
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
4276 13
                    if (\in_array($test3char, $str_chars, true)) {
4277 13
                        ++$maybe_utf32be;
4278
                    }
4279
                }
4280 13
                unset($test3charEmpty);
4281
            }
4282
        }
4283
4284 20
        if ($maybe_utf32be !== $maybe_utf32le) {
4285 3
            if ($maybe_utf32le > $maybe_utf32be) {
4286 2
                return 1;
4287
            }
4288
4289 3
            return 2;
4290
        }
4291
4292 20
        return false;
4293
    }
4294
4295
    /**
4296
     * Checks whether the passed input contains only byte sequences that appear valid UTF-8.
4297
     *
4298
     * EXAMPLE: <code>
4299
     * UTF8::is_utf8(['Iñtërnâtiônàlizætiøn', 'foo']); // true
4300
     * //
4301
     * UTF8::is_utf8(["Iñtërnâtiônàlizætiøn\xA0\xA1", 'bar']); // false
4302
     * </code>
4303
     *
4304
     * @param int|string|string[]|null $str    <p>The input to be checked.</p>
4305
     * @param bool                     $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
4306
     *
4307
     * @psalm-pure
4308
     *
4309
     * @return bool
4310
     */
4311 83
    public static function is_utf8($str, bool $strict = false): bool
4312
    {
4313 83
        if (\is_array($str)) {
4314 2
            foreach ($str as &$v) {
4315 2
                if (!self::is_utf8($v, $strict)) {
4316 2
                    return false;
4317
                }
4318
            }
4319
4320
            return true;
4321
        }
4322
4323 83
        return self::is_utf8_string((string) $str, $strict);
4324
    }
4325
4326
    /**
4327
     * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
4328
     * Decodes a JSON string
4329
     *
4330
     * EXAMPLE: <code>UTF8::json_decode('[1,"\u00a5","\u00e4"]'); // array(1, '¥', 'ä')</code>
4331
     *
4332
     * @see http://php.net/manual/en/function.json-decode.php
4333
     *
4334
     * @param string $json    <p>
4335
     *                        The <i>json</i> string being decoded.
4336
     *                        </p>
4337
     *                        <p>
4338
     *                        This function only works with UTF-8 encoded strings.
4339
     *                        </p>
4340
     *                        <p>PHP implements a superset of
4341
     *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
4342
     *                        only supports these values when they are nested inside an array or an object.
4343
     *                        </p>
4344
     * @param bool   $assoc   [optional] <p>
4345
     *                        When <b>TRUE</b>, returned objects will be converted into
4346
     *                        associative arrays.
4347
     *                        </p>
4348
     * @param int    $depth   [optional] <p>
4349
     *                        User specified recursion depth.
4350
     *                        </p>
4351
     * @param int    $options [optional] <p>
4352
     *                        Bitmask of JSON decode options. Currently only
4353
     *                        <b>JSON_BIGINT_AS_STRING</b>
4354
     *                        is supported (default is to cast large integers as floats)
4355
     *                        </p>
4356
     *
4357
     * @psalm-pure
4358
     *
4359
     * @return mixed
4360
     *               <p>The value encoded in <i>json</i> in appropriate PHP type. Values true, false and
4361
     *               null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b> and <b>NULL</b> respectively.
4362
     *               <b>NULL</b> is returned if the <i>json</i> cannot be decoded or if the encoded data
4363
     *               is deeper than the recursion limit.</p>
4364
     */
4365 43
    public static function json_decode(
4366
        string $json,
4367
        bool $assoc = false,
4368
        int $depth = 512,
4369
        int $options = 0
4370
    ) {
4371 43
        $json = self::filter($json);
4372
4373 43
        if (self::$SUPPORT['json'] === false) {
4374
            throw new \RuntimeException('ext-json: is not installed');
4375
        }
4376
4377
        /** @noinspection PhpComposerExtensionStubsInspection */
4378 43
        return \json_decode($json, $assoc, $depth, $options);
4379
    }
4380
4381
    /**
4382
     * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
4383
     * Returns the JSON representation of a value.
4384
     *
4385
     * EXAMPLE: <code>UTF8::json_enocde(array(1, '¥', 'ä')); // '[1,"\u00a5","\u00e4"]'</code>
4386
     *
4387
     * @see http://php.net/manual/en/function.json-encode.php
4388
     *
4389
     * @param mixed $value   <p>
4390
     *                       The <i>value</i> being encoded. Can be any type except
4391
     *                       a resource.
4392
     *                       </p>
4393
     *                       <p>
4394
     *                       All string data must be UTF-8 encoded.
4395
     *                       </p>
4396
     *                       <p>PHP implements a superset of
4397
     *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
4398
     *                       only supports these values when they are nested inside an array or an object.
4399
     *                       </p>
4400
     * @param int   $options [optional] <p>
4401
     *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
4402
     *                       <b>JSON_HEX_TAG</b>,
4403
     *                       <b>JSON_HEX_AMP</b>,
4404
     *                       <b>JSON_HEX_APOS</b>,
4405
     *                       <b>JSON_NUMERIC_CHECK</b>,
4406
     *                       <b>JSON_PRETTY_PRINT</b>,
4407
     *                       <b>JSON_UNESCAPED_SLASHES</b>,
4408
     *                       <b>JSON_FORCE_OBJECT</b>,
4409
     *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
4410
     *                       constants is described on
4411
     *                       the JSON constants page.
4412
     *                       </p>
4413
     * @param int   $depth   [optional] <p>
4414
     *                       Set the maximum depth. Must be greater than zero.
4415
     *                       </p>
4416
     *
4417
     * @psalm-pure
4418
     *
4419
     * @return false|string
4420
     *                      A JSON encoded <strong>string</strong> on success or<br>
4421
     *                      <strong>FALSE</strong> on failure
4422
     */
4423 5
    public static function json_encode($value, int $options = 0, int $depth = 512)
4424
    {
4425 5
        $value = self::filter($value);
4426
4427 5
        if (self::$SUPPORT['json'] === false) {
4428
            throw new \RuntimeException('ext-json: is not installed');
4429
        }
4430
4431
        /** @noinspection PhpComposerExtensionStubsInspection */
4432 5
        return \json_encode($value, $options, $depth);
4433
    }
4434
4435
    /**
4436
     * Checks whether JSON is available on the server.
4437
     *
4438
     * @psalm-pure
4439
     *
4440
     * @return bool
4441
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
4442
     *
4443
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
4444
     */
4445
    public static function json_loaded(): bool
4446
    {
4447
        return \function_exists('json_decode');
4448
    }
4449
4450
    /**
4451
     * Makes string's first char lowercase.
4452
     *
4453
     * EXAMPLE: <code>UTF8::lcfirst('ÑTËRNÂTIÔNÀLIZÆTIØN'); // ñTËRNÂTIÔNÀLIZÆTIØN</code>
4454
     *
4455
     * @param string      $str                           <p>The input string</p>
4456
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
4457
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
4458
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
4459
     *                                                   tr</p>
4460
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
4461
     *                                                   -> ß</p>
4462
     *
4463
     * @psalm-pure
4464
     *
4465
     * @return string the resulting string
4466
     */
4467 46
    public static function lcfirst(
4468
        string $str,
4469
        string $encoding = 'UTF-8',
4470
        bool $clean_utf8 = false,
4471
        string $lang = null,
4472
        bool $try_to_keep_the_string_length = false
4473
    ): string {
4474 46
        if ($clean_utf8) {
4475
            $str = self::clean($str);
4476
        }
4477
4478 46
        $use_mb_functions = ($lang === null && !$try_to_keep_the_string_length);
4479
4480 46
        if ($encoding === 'UTF-8') {
4481 43
            $str_part_two = (string) \mb_substr($str, 1);
4482
4483 43
            if ($use_mb_functions) {
4484 43
                $str_part_one = \mb_strtolower(
4485 43
                    (string) \mb_substr($str, 0, 1)
4486
                );
4487
            } else {
4488
                $str_part_one = self::strtolower(
4489
                    (string) \mb_substr($str, 0, 1),
4490
                    $encoding,
4491
                    false,
4492
                    $lang,
4493 43
                    $try_to_keep_the_string_length
4494
                );
4495
            }
4496
        } else {
4497 3
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4498
4499 3
            $str_part_two = (string) self::substr($str, 1, null, $encoding);
4500
4501 3
            $str_part_one = self::strtolower(
4502 3
                (string) self::substr($str, 0, 1, $encoding),
4503 3
                $encoding,
4504 3
                false,
4505 3
                $lang,
4506 3
                $try_to_keep_the_string_length
4507
            );
4508
        }
4509
4510 46
        return $str_part_one . $str_part_two;
4511
    }
4512
4513
    /**
4514
     * alias for "UTF8::lcfirst()"
4515
     *
4516
     * @param string      $str
4517
     * @param string      $encoding
4518
     * @param bool        $clean_utf8
4519
     * @param string|null $lang
4520
     * @param bool        $try_to_keep_the_string_length
4521
     *
4522
     * @psalm-pure
4523
     *
4524
     * @return string
4525
     *
4526
     * @see        UTF8::lcfirst()
4527
     * @deprecated <p>please use "UTF8::lcfirst()"</p>
4528
     */
4529 2
    public static function lcword(
4530
        string $str,
4531
        string $encoding = 'UTF-8',
4532
        bool $clean_utf8 = false,
4533
        string $lang = null,
4534
        bool $try_to_keep_the_string_length = false
4535
    ): string {
4536 2
        return self::lcfirst(
4537 2
            $str,
4538 2
            $encoding,
4539 2
            $clean_utf8,
4540 2
            $lang,
4541 2
            $try_to_keep_the_string_length
4542
        );
4543
    }
4544
4545
    /**
4546
     * Lowercase for all words in the string.
4547
     *
4548
     * @param string      $str                           <p>The input string.</p>
4549
     * @param string[]    $exceptions                    [optional] <p>Exclusion for some words.</p>
4550
     * @param string      $char_list                     [optional] <p>Additional chars that contains to words and do
4551
     *                                                   not start a new word.</p>
4552
     * @param string      $encoding                      [optional] <p>Set the charset.</p>
4553
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
4554
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
4555
     *                                                   tr</p>
4556
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
4557
     *                                                   -> ß</p>
4558
     *
4559
     * @psalm-pure
4560
     *
4561
     * @return string
4562
     */
4563 2
    public static function lcwords(
4564
        string $str,
4565
        array $exceptions = [],
4566
        string $char_list = '',
4567
        string $encoding = 'UTF-8',
4568
        bool $clean_utf8 = false,
4569
        string $lang = null,
4570
        bool $try_to_keep_the_string_length = false
4571
    ): string {
4572 2
        if (!$str) {
4573 2
            return '';
4574
        }
4575
4576 2
        $words = self::str_to_words($str, $char_list);
4577 2
        $use_exceptions = $exceptions !== [];
4578
4579 2
        $words_str = '';
4580 2
        foreach ($words as &$word) {
4581 2
            if (!$word) {
4582 2
                continue;
4583
            }
4584
4585
            if (
4586 2
                !$use_exceptions
4587
                ||
4588 2
                !\in_array($word, $exceptions, true)
4589
            ) {
4590 2
                $words_str .= self::lcfirst($word, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
4591
            } else {
4592 2
                $words_str .= $word;
4593
            }
4594
        }
4595
4596 2
        return $words_str;
4597
    }
4598
4599
    /**
4600
     * alias for "UTF8::lcfirst()"
4601
     *
4602
     * @param string      $str
4603
     * @param string      $encoding
4604
     * @param bool        $clean_utf8
4605
     * @param string|null $lang
4606
     * @param bool        $try_to_keep_the_string_length
4607
     *
4608
     * @psalm-pure
4609
     *
4610
     * @return string
4611
     *
4612
     * @see        UTF8::lcfirst()
4613
     * @deprecated <p>please use "UTF8::lcfirst()"</p>
4614
     */
4615 5
    public static function lowerCaseFirst(
4616
        string $str,
4617
        string $encoding = 'UTF-8',
4618
        bool $clean_utf8 = false,
4619
        string $lang = null,
4620
        bool $try_to_keep_the_string_length = false
4621
    ): string {
4622 5
        return self::lcfirst(
4623 5
            $str,
4624 5
            $encoding,
4625 5
            $clean_utf8,
4626 5
            $lang,
4627 5
            $try_to_keep_the_string_length
4628
        );
4629
    }
4630
4631
    /**
4632
     * Strip whitespace or other characters from the beginning of a UTF-8 string.
4633
     *
4634
     * EXAMPLE: <code>UTF8::ltrim(' 中文空白  '); // '中文空白  '</code>
4635
     *
4636
     * @param string      $str   <p>The string to be trimmed</p>
4637
     * @param string|null $chars <p>Optional characters to be stripped</p>
4638
     *
4639
     * @psalm-pure
4640
     *
4641
     * @return string the string with unwanted characters stripped from the left
4642
     */
4643 23
    public static function ltrim(string $str = '', string $chars = null): string
4644
    {
4645 23
        if ($str === '') {
4646 3
            return '';
4647
        }
4648
4649 22
        if (self::$SUPPORT['mbstring'] === true) {
4650 22
            if ($chars !== null) {
4651
                /** @noinspection PregQuoteUsageInspection */
4652 11
                $chars = \preg_quote($chars);
4653 11
                $pattern = "^[${chars}]+";
4654
            } else {
4655 14
                $pattern = '^[\\s]+';
4656
            }
4657
4658
            /** @noinspection PhpComposerExtensionStubsInspection */
4659 22
            return (string) \mb_ereg_replace($pattern, '', $str);
4660
        }
4661
4662
        if ($chars !== null) {
4663
            $chars = \preg_quote($chars, '/');
4664
            $pattern = "^[${chars}]+";
4665
        } else {
4666
            $pattern = '^[\\s]+';
4667
        }
4668
4669
        return self::regex_replace($str, $pattern, '');
4670
    }
4671
4672
    /**
4673
     * Returns the UTF-8 character with the maximum code point in the given data.
4674
     *
4675
     * EXAMPLE: <code>UTF8::max('abc-äöü-中文空白'); // 'ø'</code>
4676
     *
4677
     * @param array<string>|string $arg <p>A UTF-8 encoded string or an array of such strings.</p>
4678
     *
4679
     * @psalm-pure
4680
     *
4681
     * @return string|null the character with the highest code point than others, returns null on failure or empty input
4682
     */
4683 2
    public static function max($arg)
4684
    {
4685 2
        if (\is_array($arg)) {
4686 2
            $arg = \implode('', $arg);
4687
        }
4688
4689 2
        $codepoints = self::codepoints($arg);
4690 2
        if ($codepoints === []) {
4691 2
            return null;
4692
        }
4693
4694 2
        $codepoint_max = \max($codepoints);
4695
4696 2
        return self::chr((int) $codepoint_max);
4697
    }
4698
4699
    /**
4700
     * Calculates and returns the maximum number of bytes taken by any
4701
     * UTF-8 encoded character in the given string.
4702
     *
4703
     * EXAMPLE: <code>UTF8::max_chr_width('Intërnâtiônàlizætiøn'); // 2</code>
4704
     *
4705
     * @param string $str <p>The original Unicode string.</p>
4706
     *
4707
     * @psalm-pure
4708
     *
4709
     * @return int
4710
     *             <p>Max byte lengths of the given chars.</p>
4711
     */
4712 2
    public static function max_chr_width(string $str): int
4713
    {
4714 2
        $bytes = self::chr_size_list($str);
4715 2
        if ($bytes !== []) {
4716 2
            return (int) \max($bytes);
4717
        }
4718
4719 2
        return 0;
4720
    }
4721
4722
    /**
4723
     * Checks whether mbstring is available on the server.
4724
     *
4725
     * @psalm-pure
4726
     *
4727
     * @return bool
4728
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
4729
     *
4730
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
4731
     */
4732 28
    public static function mbstring_loaded(): bool
4733
    {
4734 28
        return \extension_loaded('mbstring');
4735
    }
4736
4737
    /**
4738
     * Returns the UTF-8 character with the minimum code point in the given data.
4739
     *
4740
     * EXAMPLE: <code>UTF8::min('abc-äöü-中文空白'); // '-'</code>
4741
     *
4742
     * @param string|string[] $arg <strong>A UTF-8 encoded string or an array of such strings.</strong>
4743
     *
4744
     * @psalm-pure
4745
     *
4746
     * @return string|null
4747
     *                     <p>The character with the lowest code point than others, returns null on failure or empty input.</p>
4748
     */
4749 2
    public static function min($arg)
4750
    {
4751 2
        if (\is_array($arg)) {
4752 2
            $arg = \implode('', $arg);
4753
        }
4754
4755 2
        $codepoints = self::codepoints($arg);
4756 2
        if ($codepoints === []) {
4757 2
            return null;
4758
        }
4759
4760 2
        $codepoint_min = \min($codepoints);
4761
4762 2
        return self::chr((int) $codepoint_min);
4763
    }
4764
4765
    /**
4766
     * alias for "UTF8::normalize_encoding()"
4767
     *
4768
     * @param mixed $encoding
4769
     * @param mixed $fallback
4770
     *
4771
     * @psalm-pure
4772
     *
4773
     * @return mixed
4774
     *
4775
     * @see        UTF8::normalize_encoding()
4776
     * @deprecated <p>please use "UTF8::normalize_encoding()"</p>
4777
     */
4778 2
    public static function normalizeEncoding($encoding, $fallback = '')
4779
    {
4780 2
        return self::normalize_encoding($encoding, $fallback);
4781
    }
4782
4783
    /**
4784
     * Normalize the encoding-"name" input.
4785
     *
4786
     * EXAMPLE: <code>UTF8::normalize_encoding('UTF8'); // 'UTF-8'</code>
4787
     *
4788
     * @param mixed $encoding <p>e.g.: ISO, UTF8, WINDOWS-1251 etc.</p>
4789
     * @param mixed $fallback <p>e.g.: UTF-8</p>
4790
     *
4791
     * @psalm-pure
4792
     *
4793
     * @return mixed|string
4794
     *                      <p>e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.<br>Will return a empty string as fallback (by default)</p>
4795
     *
4796
     * @template TNormalizeEncodingFallback
4797
     * @phpstan-param string|TNormalizeEncodingFallback $fallback
4798
     * @phpstan-return string|TNormalizeEncodingFallback
4799
     */
4800 339
    public static function normalize_encoding($encoding, $fallback = '')
4801
    {
4802
        /**
4803
         * @psalm-suppress ImpureStaticVariable
4804
         *
4805
         * @var array<string,string>
4806
         */
4807 339
        static $STATIC_NORMALIZE_ENCODING_CACHE = [];
4808
4809
        // init
4810 339
        $encoding = (string) $encoding;
4811
4812 339
        if (!$encoding) {
4813 290
            return $fallback;
4814
        }
4815
4816
        if (
4817 53
            $encoding === 'UTF-8'
4818
            ||
4819 53
            $encoding === 'UTF8'
4820
        ) {
4821 29
            return 'UTF-8';
4822
        }
4823
4824
        if (
4825 44
            $encoding === '8BIT'
4826
            ||
4827 44
            $encoding === 'BINARY'
4828
        ) {
4829
            return 'CP850';
4830
        }
4831
4832
        if (
4833 44
            $encoding === 'HTML'
4834
            ||
4835 44
            $encoding === 'HTML-ENTITIES'
4836
        ) {
4837 2
            return 'HTML-ENTITIES';
4838
        }
4839
4840
        if (
4841 44
            $encoding === 'ISO'
4842
            ||
4843 44
            $encoding === 'ISO-8859-1'
4844
        ) {
4845 41
            return 'ISO-8859-1';
4846
        }
4847
4848
        if (
4849 11
            $encoding === '1' // only a fallback, for non "strict_types" usage ...
4850
            ||
4851 11
            $encoding === '0' // only a fallback, for non "strict_types" usage ...
4852
        ) {
4853
            return $fallback;
4854
        }
4855
4856 11
        if (isset($STATIC_NORMALIZE_ENCODING_CACHE[$encoding])) {
4857 8
            return $STATIC_NORMALIZE_ENCODING_CACHE[$encoding];
4858
        }
4859
4860 5
        if (self::$ENCODINGS === null) {
4861 1
            self::$ENCODINGS = self::getData('encodings');
4862
        }
4863
4864 5
        if (\in_array($encoding, self::$ENCODINGS, true)) {
4865 3
            $STATIC_NORMALIZE_ENCODING_CACHE[$encoding] = $encoding;
4866
4867 3
            return $encoding;
4868
        }
4869
4870 4
        $encoding_original = $encoding;
4871 4
        $encoding = \strtoupper($encoding);
4872 4
        $encoding_upper_helper = (string) \preg_replace('/[^a-zA-Z0-9]/u', '', $encoding);
4873
4874
        $equivalences = [
4875 4
            'ISO8859'     => 'ISO-8859-1',
4876
            'ISO88591'    => 'ISO-8859-1',
4877
            'ISO'         => 'ISO-8859-1',
4878
            'LATIN'       => 'ISO-8859-1',
4879
            'LATIN1'      => 'ISO-8859-1', // Western European
4880
            'ISO88592'    => 'ISO-8859-2',
4881
            'LATIN2'      => 'ISO-8859-2', // Central European
4882
            'ISO88593'    => 'ISO-8859-3',
4883
            'LATIN3'      => 'ISO-8859-3', // Southern European
4884
            'ISO88594'    => 'ISO-8859-4',
4885
            'LATIN4'      => 'ISO-8859-4', // Northern European
4886
            'ISO88595'    => 'ISO-8859-5',
4887
            'ISO88596'    => 'ISO-8859-6', // Greek
4888
            'ISO88597'    => 'ISO-8859-7',
4889
            'ISO88598'    => 'ISO-8859-8', // Hebrew
4890
            'ISO88599'    => 'ISO-8859-9',
4891
            'LATIN5'      => 'ISO-8859-9', // Turkish
4892
            'ISO885911'   => 'ISO-8859-11',
4893
            'TIS620'      => 'ISO-8859-11', // Thai
4894
            'ISO885910'   => 'ISO-8859-10',
4895
            'LATIN6'      => 'ISO-8859-10', // Nordic
4896
            'ISO885913'   => 'ISO-8859-13',
4897
            'LATIN7'      => 'ISO-8859-13', // Baltic
4898
            'ISO885914'   => 'ISO-8859-14',
4899
            'LATIN8'      => 'ISO-8859-14', // Celtic
4900
            'ISO885915'   => 'ISO-8859-15',
4901
            'LATIN9'      => 'ISO-8859-15', // Western European (with some extra chars e.g. €)
4902
            'ISO885916'   => 'ISO-8859-16',
4903
            'LATIN10'     => 'ISO-8859-16', // Southeast European
4904
            'CP1250'      => 'WINDOWS-1250',
4905
            'WIN1250'     => 'WINDOWS-1250',
4906
            'WINDOWS1250' => 'WINDOWS-1250',
4907
            'CP1251'      => 'WINDOWS-1251',
4908
            'WIN1251'     => 'WINDOWS-1251',
4909
            'WINDOWS1251' => 'WINDOWS-1251',
4910
            'CP1252'      => 'WINDOWS-1252',
4911
            'WIN1252'     => 'WINDOWS-1252',
4912
            'WINDOWS1252' => 'WINDOWS-1252',
4913
            'CP1253'      => 'WINDOWS-1253',
4914
            'WIN1253'     => 'WINDOWS-1253',
4915
            'WINDOWS1253' => 'WINDOWS-1253',
4916
            'CP1254'      => 'WINDOWS-1254',
4917
            'WIN1254'     => 'WINDOWS-1254',
4918
            'WINDOWS1254' => 'WINDOWS-1254',
4919
            'CP1255'      => 'WINDOWS-1255',
4920
            'WIN1255'     => 'WINDOWS-1255',
4921
            'WINDOWS1255' => 'WINDOWS-1255',
4922
            'CP1256'      => 'WINDOWS-1256',
4923
            'WIN1256'     => 'WINDOWS-1256',
4924
            'WINDOWS1256' => 'WINDOWS-1256',
4925
            'CP1257'      => 'WINDOWS-1257',
4926
            'WIN1257'     => 'WINDOWS-1257',
4927
            'WINDOWS1257' => 'WINDOWS-1257',
4928
            'CP1258'      => 'WINDOWS-1258',
4929
            'WIN1258'     => 'WINDOWS-1258',
4930
            'WINDOWS1258' => 'WINDOWS-1258',
4931
            'UTF16'       => 'UTF-16',
4932
            'UTF32'       => 'UTF-32',
4933
            'UTF8'        => 'UTF-8',
4934
            'UTF'         => 'UTF-8',
4935
            'UTF7'        => 'UTF-7',
4936
            '8BIT'        => 'CP850',
4937
            'BINARY'      => 'CP850',
4938
        ];
4939
4940 4
        if (!empty($equivalences[$encoding_upper_helper])) {
4941 3
            $encoding = $equivalences[$encoding_upper_helper];
4942
        }
4943
4944 4
        $STATIC_NORMALIZE_ENCODING_CACHE[$encoding_original] = $encoding;
4945
4946 4
        return $encoding;
4947
    }
4948
4949
    /**
4950
     * Standardize line ending to unix-like.
4951
     *
4952
     * @param string          $str      <p>The input string.</p>
4953
     * @param string|string[] $replacer <p>The replacer char e.g. "\n" (Linux) or "\r\n" (Windows). You can also use \PHP_EOL
4954
     *                                  here.</p>
4955
     *
4956
     * @psalm-pure
4957
     *
4958
     * @return string
4959
     *                <p>A string with normalized line ending.</p>
4960
     */
4961 5
    public static function normalize_line_ending(string $str, $replacer = "\n"): string
4962
    {
4963 5
        return \str_replace(["\r\n", "\r", "\n"], $replacer, $str);
4964
    }
4965
4966
    /**
4967
     * Normalize some MS Word special characters.
4968
     *
4969
     * EXAMPLE: <code>UTF8::normalize_msword('„Abcdef…”'); // '"Abcdef..."'</code>
4970
     *
4971
     * @param string $str <p>The string to be normalized.</p>
4972
     *
4973
     * @psalm-pure
4974
     *
4975
     * @return string
4976
     *                <p>A string with normalized characters for commonly used chars in Word documents.</p>
4977
     */
4978 10
    public static function normalize_msword(string $str): string
4979
    {
4980 10
        return ASCII::normalize_msword($str);
4981
    }
4982
4983
    /**
4984
     * Normalize the whitespace.
4985
     *
4986
     * EXAMPLE: <code>UTF8::normalize_whitespace("abc-\xc2\xa0-öäü-\xe2\x80\xaf-\xE2\x80\xAC", true); // "abc-\xc2\xa0-öäü- -"</code>
4987
     *
4988
     * @param string $str                        <p>The string to be normalized.</p>
4989
     * @param bool   $keep_non_breaking_space    [optional] <p>Set to true, to keep non-breaking-spaces.</p>
4990
     * @param bool   $keep_bidi_unicode_controls [optional] <p>Set to true, to keep non-printable (for the web)
4991
     *                                           bidirectional text chars.</p>
4992
     *
4993
     * @psalm-pure
4994
     *
4995
     * @return string
4996
     *                <p>A string with normalized whitespace.</p>
4997
     */
4998 61
    public static function normalize_whitespace(
4999
        string $str,
5000
        bool $keep_non_breaking_space = false,
5001
        bool $keep_bidi_unicode_controls = false
5002
    ): string {
5003 61
        return ASCII::normalize_whitespace(
5004 61
            $str,
5005 61
            $keep_non_breaking_space,
5006 61
            $keep_bidi_unicode_controls
5007
        );
5008
    }
5009
5010
    /**
5011
     * Calculates Unicode code point of the given UTF-8 encoded character.
5012
     *
5013
     * INFO: opposite to UTF8::chr()
5014
     *
5015
     * EXAMPLE: <code>UTF8::ord('☃'); // 0x2603</code>
5016
     *
5017
     * @param string $chr      <p>The character of which to calculate code point.<p/>
5018
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5019
     *
5020
     * @psalm-pure
5021
     *
5022
     * @return int
5023
     *             <p>Unicode code point of the given character,<br>
5024
     *             0 on invalid UTF-8 byte sequence</p>
5025
     */
5026 27
    public static function ord($chr, string $encoding = 'UTF-8'): int
5027
    {
5028
        /**
5029
         * @psalm-suppress ImpureStaticVariable
5030
         *
5031
         * @var array<string,int>
5032
         */
5033 27
        static $CHAR_CACHE = [];
5034
5035
        // init
5036 27
        $chr = (string) $chr;
5037
5038 27
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
5039 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
5040
        }
5041
5042 27
        $cache_key = $chr . '_' . $encoding;
5043 27
        if (isset($CHAR_CACHE[$cache_key])) {
5044 27
            return $CHAR_CACHE[$cache_key];
5045
        }
5046
5047
        // check again, if it's still not UTF-8
5048 11
        if ($encoding !== 'UTF-8') {
5049 3
            $chr = self::encode($encoding, $chr);
5050
        }
5051
5052 11
        if (self::$ORD === null) {
5053
            self::$ORD = self::getData('ord');
5054
        }
5055
5056 11
        if (isset(self::$ORD[$chr])) {
5057 11
            return $CHAR_CACHE[$cache_key] = self::$ORD[$chr];
5058
        }
5059
5060
        //
5061
        // fallback via "IntlChar"
5062
        //
5063
5064 6
        if (self::$SUPPORT['intlChar'] === true) {
5065
            /** @noinspection PhpComposerExtensionStubsInspection */
5066 5
            $code = \IntlChar::ord($chr);
5067 5
            if ($code) {
5068 5
                return $CHAR_CACHE[$cache_key] = $code;
5069
            }
5070
        }
5071
5072
        //
5073
        // fallback via vanilla php
5074
        //
5075
5076
        /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
5077 1
        $chr = \unpack('C*', (string) \substr($chr, 0, 4));
5078
        /** @noinspection OffsetOperationsInspection */
5079 1
        $code = $chr ? $chr[1] : 0;
5080
5081
        /** @noinspection OffsetOperationsInspection */
5082 1
        if ($code >= 0xF0 && isset($chr[4])) {
5083
            /** @noinspection UnnecessaryCastingInspection */
5084
            /** @noinspection OffsetOperationsInspection */
5085
            return $CHAR_CACHE[$cache_key] = (int) ((($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80);
5086
        }
5087
5088
        /** @noinspection OffsetOperationsInspection */
5089 1
        if ($code >= 0xE0 && isset($chr[3])) {
5090
            /** @noinspection UnnecessaryCastingInspection */
5091
            /** @noinspection OffsetOperationsInspection */
5092 1
            return $CHAR_CACHE[$cache_key] = (int) ((($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80);
5093
        }
5094
5095
        /** @noinspection OffsetOperationsInspection */
5096 1
        if ($code >= 0xC0 && isset($chr[2])) {
5097
            /** @noinspection UnnecessaryCastingInspection */
5098
            /** @noinspection OffsetOperationsInspection */
5099 1
            return $CHAR_CACHE[$cache_key] = (int) ((($code - 0xC0) << 6) + $chr[2] - 0x80);
5100
        }
5101
5102
        return $CHAR_CACHE[$cache_key] = $code;
5103
    }
5104
5105
    /**
5106
     * Parses the string into an array (into the the second parameter).
5107
     *
5108
     * WARNING: Unlike "parse_str()", this method does not (re-)place variables in the current scope,
5109
     *          if the second parameter is not set!
5110
     *
5111
     * EXAMPLE: <code>
5112
     * UTF8::parse_str('Iñtërnâtiônéàlizætiøn=測試&arr[]=foo+測試&arr[]=ການທົດສອບ', $array);
5113
     * echo $array['Iñtërnâtiônéàlizætiøn']; // '測試'
5114
     * </code>
5115
     *
5116
     * @see http://php.net/manual/en/function.parse-str.php
5117
     *
5118
     * @param string $str        <p>The input string.</p>
5119
     * @param array  $result     <p>The result will be returned into this reference parameter.</p>
5120
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5121
     *
5122
     * @psalm-pure
5123
     *
5124
     * @return bool
5125
     *              <p>Will return <strong>false</strong> if php can't parse the string and we haven't any $result.</p>
5126
     */
5127 2
    public static function parse_str(string $str, &$result, bool $clean_utf8 = false): bool
5128
    {
5129 2
        if ($clean_utf8) {
5130 2
            $str = self::clean($str);
5131
        }
5132
5133 2
        if (self::$SUPPORT['mbstring'] === true) {
5134 2
            $return = \mb_parse_str($str, $result);
5135
5136 2
            return $return !== false && $result !== [];
5137
        }
5138
5139
        /**
5140
         * @psalm-suppress ImpureFunctionCall - we use the second parameter, so we don't change variables by magic
5141
         */
5142
        \parse_str($str, $result);
5143
5144
        return $result !== [];
5145
    }
5146
5147
    /**
5148
     * Checks if \u modifier is available that enables Unicode support in PCRE.
5149
     *
5150
     * @psalm-pure
5151
     *
5152
     * @return bool
5153
     *              <p>
5154
     *              <strong>true</strong> if support is available,<br>
5155
     *              <strong>false</strong> otherwise
5156
     *              </p>
5157
     */
5158
    public static function pcre_utf8_support(): bool
5159
    {
5160
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
5161
        return (bool) @\preg_match('//u', '');
5162
    }
5163
5164
    /**
5165
     * Create an array containing a range of UTF-8 characters.
5166
     *
5167
     * EXAMPLE: <code>UTF8::range('κ', 'ζ'); // array('κ', 'ι', 'θ', 'η', 'ζ',)</code>
5168
     *
5169
     * @param int|string $var1      <p>Numeric or hexadecimal code points, or a UTF-8 character to start from.</p>
5170
     * @param int|string $var2      <p>Numeric or hexadecimal code points, or a UTF-8 character to end at.</p>
5171
     * @param bool       $use_ctype <p>use ctype to detect numeric and hexadecimal, otherwise we will use a simple
5172
     *                              "is_numeric"</p>
5173
     * @param string     $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
5174
     * @param float|int  $step      [optional] <p>
5175
     *                              If a step value is given, it will be used as the
5176
     *                              increment between elements in the sequence. step
5177
     *                              should be given as a positive number. If not specified,
5178
     *                              step will default to 1.
5179
     *                              </p>
5180
     *
5181
     * @psalm-pure
5182
     *
5183
     * @return string[]
5184
     */
5185 2
    public static function range(
5186
        $var1,
5187
        $var2,
5188
        bool $use_ctype = true,
5189
        string $encoding = 'UTF-8',
5190
        $step = 1
5191
    ): array {
5192 2
        if (!$var1 || !$var2) {
5193 2
            return [];
5194
        }
5195
5196 2
        if ($step !== 1) {
5197
            /**
5198
             * @psalm-suppress RedundantConditionGivenDocblockType
5199
             * @psalm-suppress DocblockTypeContradiction
5200
             */
5201 1
            if (!\is_numeric($step)) {
0 ignored issues
show
introduced by
The condition is_numeric($step) is always true.
Loading history...
5202
                throw new \InvalidArgumentException('$step need to be a number, type given: ' . \gettype($step));
5203
            }
5204
5205
            /**
5206
             * @psalm-suppress RedundantConditionGivenDocblockType - false-positive from psalm?
5207
             */
5208 1
            if ($step <= 0) {
5209
                throw new \InvalidArgumentException('$step need to be a positive number, given: ' . $step);
5210
            }
5211
        }
5212
5213 2
        if ($use_ctype && self::$SUPPORT['ctype'] === false) {
5214
            throw new \RuntimeException('ext-ctype: is not installed');
5215
        }
5216
5217 2
        $is_digit = false;
5218 2
        $is_xdigit = false;
5219
5220
        /** @noinspection PhpComposerExtensionStubsInspection */
5221 2
        if ($use_ctype && \ctype_digit((string) $var1) && \ctype_digit((string) $var2)) {
5222 2
            $is_digit = true;
5223 2
            $start = (int) $var1;
5224 2
        } /** @noinspection PhpComposerExtensionStubsInspection */ elseif ($use_ctype && \ctype_xdigit($var1) && \ctype_xdigit($var2)) {
5225
            $is_xdigit = true;
5226
            $start = (int) self::hex_to_int((string) $var1);
5227 2
        } elseif (!$use_ctype && \is_numeric($var1)) {
5228 1
            $start = (int) $var1;
5229
        } else {
5230 2
            $start = self::ord((string) $var1);
5231
        }
5232
5233 2
        if (!$start) {
5234
            return [];
5235
        }
5236
5237 2
        if ($is_digit) {
5238 2
            $end = (int) $var2;
5239 2
        } elseif ($is_xdigit) {
5240
            $end = (int) self::hex_to_int((string) $var2);
5241 2
        } elseif (!$use_ctype && \is_numeric($var2)) {
5242 1
            $end = (int) $var2;
5243
        } else {
5244 2
            $end = self::ord((string) $var2);
5245
        }
5246
5247 2
        if (!$end) {
5248
            return [];
5249
        }
5250
5251 2
        $array = [];
5252 2
        foreach (\range($start, $end, $step) as $i) {
5253 2
            $array[] = (string) self::chr((int) $i, $encoding);
5254
        }
5255
5256 2
        return $array;
5257
    }
5258
5259
    /**
5260
     * Multi decode HTML entity + fix urlencoded-win1252-chars.
5261
     *
5262
     * EXAMPLE: <code>UTF8::rawurldecode('tes%20öäü%20\u00edtest+test'); // 'tes öäü ítest+test'</code>
5263
     *
5264
     * e.g:
5265
     * 'test+test'                     => 'test+test'
5266
     * 'D&#252;sseldorf'               => 'Düsseldorf'
5267
     * 'D%FCsseldorf'                  => 'Düsseldorf'
5268
     * 'D&#xFC;sseldorf'               => 'Düsseldorf'
5269
     * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
5270
     * 'Düsseldorf'                   => 'Düsseldorf'
5271
     * 'D%C3%BCsseldorf'               => 'Düsseldorf'
5272
     * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
5273
     * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
5274
     *
5275
     * @param string $str          <p>The input string.</p>
5276
     * @param bool   $multi_decode <p>Decode as often as possible.</p>
5277
     *
5278
     * @psalm-pure
5279
     *
5280
     * @return string
5281
     *                <p>The decoded URL, as a string.</p>
5282
     */
5283 7
    public static function rawurldecode(string $str, bool $multi_decode = true): string
5284
    {
5285 7
        if ($str === '') {
5286 4
            return '';
5287
        }
5288
5289
        if (
5290 7
            \strpos($str, '&') === false
5291
            &&
5292 7
            \strpos($str, '%') === false
5293
            &&
5294 7
            \strpos($str, '+') === false
5295
            &&
5296 7
            \strpos($str, '\u') === false
5297
        ) {
5298 4
            return self::fix_simple_utf8($str);
5299
        }
5300
5301 7
        $str = self::urldecode_unicode_helper($str);
5302
5303 7
        if ($multi_decode) {
5304
            do {
5305 6
                $str_compare = $str;
5306
5307
                /**
5308
                 * @psalm-suppress PossiblyInvalidArgument
5309
                 */
5310 6
                $str = self::fix_simple_utf8(
5311 6
                    \rawurldecode(
5312 6
                        self::html_entity_decode(
5313 6
                            self::to_utf8($str),
5314 6
                            \ENT_QUOTES | \ENT_HTML5
5315
                        )
5316
                    )
5317
                );
5318 6
            } while ($str_compare !== $str);
5319
        } else {
5320
            /**
5321
             * @psalm-suppress PossiblyInvalidArgument
5322
             */
5323 1
            $str = self::fix_simple_utf8(
5324 1
                \rawurldecode(
5325 1
                    self::html_entity_decode(
5326 1
                        self::to_utf8($str),
5327 1
                        \ENT_QUOTES | \ENT_HTML5
5328
                    )
5329
                )
5330
            );
5331
        }
5332
5333 7
        return $str;
5334
    }
5335
5336
    /**
5337
     * Replaces all occurrences of $pattern in $str by $replacement.
5338
     *
5339
     * @param string $str         <p>The input string.</p>
5340
     * @param string $pattern     <p>The regular expression pattern.</p>
5341
     * @param string $replacement <p>The string to replace with.</p>
5342
     * @param string $options     [optional] <p>Matching conditions to be used.</p>
5343
     * @param string $delimiter   [optional] <p>Delimiter the the regex. Default: '/'</p>
5344
     *
5345
     * @psalm-pure
5346
     *
5347
     * @return string
5348
     */
5349 18
    public static function regex_replace(
5350
        string $str,
5351
        string $pattern,
5352
        string $replacement,
5353
        string $options = '',
5354
        string $delimiter = '/'
5355
    ): string {
5356 18
        if ($options === 'msr') {
5357 9
            $options = 'ms';
5358
        }
5359
5360
        // fallback
5361 18
        if (!$delimiter) {
5362
            $delimiter = '/';
5363
        }
5364
5365 18
        return (string) \preg_replace(
5366 18
            $delimiter . $pattern . $delimiter . 'u' . $options,
5367 18
            $replacement,
5368 18
            $str
5369
        );
5370
    }
5371
5372
    /**
5373
     * alias for "UTF8::remove_bom()"
5374
     *
5375
     * @param string $str
5376
     *
5377
     * @psalm-pure
5378
     *
5379
     * @return string
5380
     *
5381
     * @see        UTF8::remove_bom()
5382
     * @deprecated <p>please use "UTF8::remove_bom()"</p>
5383
     */
5384 1
    public static function removeBOM(string $str): string
5385
    {
5386 1
        return self::remove_bom($str);
5387
    }
5388
5389
    /**
5390
     * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
5391
     *
5392
     * EXAMPLE: <code>UTF8::remove_bom("\xEF\xBB\xBFΜπορώ να"); // 'Μπορώ να'</code>
5393
     *
5394
     * @param string $str <p>The input string.</p>
5395
     *
5396
     * @psalm-pure
5397
     *
5398
     * @return string
5399
     *                <p>A string without UTF-BOM.</p>
5400
     */
5401 55
    public static function remove_bom(string $str): string
5402
    {
5403 55
        if ($str === '') {
5404 9
            return '';
5405
        }
5406
5407 55
        $str_length = \strlen($str);
5408 55
        foreach (self::$BOM as $bom_string => $bom_byte_length) {
5409 55
            if (\strncmp($str, $bom_string, $bom_byte_length) === 0) {
5410
                /** @var false|string $str_tmp - needed for PhpStan (stubs error) */
5411 11
                $str_tmp = \substr($str, $bom_byte_length, $str_length);
5412 11
                if ($str_tmp === false) {
5413
                    return '';
5414
                }
5415
5416 11
                $str_length -= (int) $bom_byte_length;
5417
5418 55
                $str = (string) $str_tmp;
5419
            }
5420
        }
5421
5422 55
        return $str;
5423
    }
5424
5425
    /**
5426
     * Removes duplicate occurrences of a string in another string.
5427
     *
5428
     * EXAMPLE: <code>UTF8::remove_duplicates('öäü-κόσμεκόσμε-äöü', 'κόσμε'); // 'öäü-κόσμε-äöü'</code>
5429
     *
5430
     * @param string          $str  <p>The base string.</p>
5431
     * @param string|string[] $what <p>String to search for in the base string.</p>
5432
     *
5433
     * @psalm-pure
5434
     *
5435
     * @return string
5436
     *                <p>A string with removed duplicates.</p>
5437
     */
5438 2
    public static function remove_duplicates(string $str, $what = ' '): string
5439
    {
5440 2
        if (\is_string($what)) {
5441 2
            $what = [$what];
5442
        }
5443
5444
        /**
5445
         * @psalm-suppress RedundantConditionGivenDocblockType
5446
         */
5447 2
        if (\is_array($what)) {
0 ignored issues
show
introduced by
The condition is_array($what) is always true.
Loading history...
5448 2
            foreach ($what as $item) {
5449 2
                $str = (string) \preg_replace('/(' . \preg_quote($item, '/') . ')+/u', $item, $str);
5450
            }
5451
        }
5452
5453 2
        return $str;
5454
    }
5455
5456
    /**
5457
     * Remove html via "strip_tags()" from the string.
5458
     *
5459
     * @param string $str            <p>The input string.</p>
5460
     * @param string $allowable_tags [optional] <p>You can use the optional second parameter to specify tags which
5461
     *                               should not be stripped. Default: null
5462
     *                               </p>
5463
     *
5464
     * @psalm-pure
5465
     *
5466
     * @return string
5467
     *                <p>A string with without html tags.</p>
5468
     */
5469 6
    public static function remove_html(string $str, string $allowable_tags = ''): string
5470
    {
5471 6
        return \strip_tags($str, $allowable_tags);
5472
    }
5473
5474
    /**
5475
     * Remove all breaks [<br> | \r\n | \r | \n | ...] from the string.
5476
     *
5477
     * @param string $str         <p>The input string.</p>
5478
     * @param string $replacement [optional] <p>Default is a empty string.</p>
5479
     *
5480
     * @psalm-pure
5481
     *
5482
     * @return string
5483
     *                <p>A string without breaks.</p>
5484
     */
5485 6
    public static function remove_html_breaks(string $str, string $replacement = ''): string
5486
    {
5487 6
        return (string) \preg_replace("#/\r\n|\r|\n|<br.*/?>#isU", $replacement, $str);
5488
    }
5489
5490
    /**
5491
     * Remove invisible characters from a string.
5492
     *
5493
     * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
5494
     *
5495
     * EXAMPLE: <code>UTF8::remove_invisible_characters("κόσ\0με"); // 'κόσμε'</code>
5496
     *
5497
     * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
5498
     *
5499
     * @param string $str                     <p>The input string.</p>
5500
     * @param bool   $url_encoded             [optional] <p>
5501
     *                                        Try to remove url encoded control character.
5502
     *                                        WARNING: maybe contains false-positives e.g. aa%0Baa -> aaaa.
5503
     *                                        <br>
5504
     *                                        Default: false
5505
     *                                        </p>
5506
     * @param string $replacement             [optional] <p>The replacement character.</p>
5507
     * @param bool   $keep_control_characters [optional] <p>Keep control characters like [LRM] or [LSEP].</p>
5508
     *
5509
     * @psalm-pure
5510
     *
5511
     * @return string
5512
     *                <p>A string without invisible chars.</p>
5513
     */
5514 92
    public static function remove_invisible_characters(
5515
        string $str,
5516
        bool $url_encoded = false,
5517
        string $replacement = '',
5518
        bool $keep_control_characters = true
5519
    ): string {
5520 92
        return ASCII::remove_invisible_characters(
5521 92
            $str,
5522 92
            $url_encoded,
5523 92
            $replacement,
5524 92
            $keep_control_characters
5525
        );
5526
    }
5527
5528
    /**
5529
     * Returns a new string with the prefix $substring removed, if present.
5530
     *
5531
     * @param string $str       <p>The input string.</p>
5532
     * @param string $substring <p>The prefix to remove.</p>
5533
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
5534
     *
5535
     * @psalm-pure
5536
     *
5537
     * @return string
5538
     *                <p>A string without the prefix $substring.</p>
5539
     */
5540 12
    public static function remove_left(
5541
        string $str,
5542
        string $substring,
5543
        string $encoding = 'UTF-8'
5544
    ): string {
5545
        if (
5546 12
            $substring
5547
            &&
5548 12
            \strpos($str, $substring) === 0
5549
        ) {
5550 6
            if ($encoding === 'UTF-8') {
5551 4
                return (string) \mb_substr(
5552 4
                    $str,
5553 4
                    (int) \mb_strlen($substring)
5554
                );
5555
            }
5556
5557 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
5558
5559 2
            return (string) self::substr(
5560 2
                $str,
5561 2
                (int) self::strlen($substring, $encoding),
5562 2
                null,
5563 2
                $encoding
5564
            );
5565
        }
5566
5567 6
        return $str;
5568
    }
5569
5570
    /**
5571
     * Returns a new string with the suffix $substring removed, if present.
5572
     *
5573
     * @param string $str
5574
     * @param string $substring <p>The suffix to remove.</p>
5575
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
5576
     *
5577
     * @psalm-pure
5578
     *
5579
     * @return string
5580
     *                <p>A string having a $str without the suffix $substring.</p>
5581
     */
5582 12
    public static function remove_right(
5583
        string $str,
5584
        string $substring,
5585
        string $encoding = 'UTF-8'
5586
    ): string {
5587 12
        if ($substring && \substr($str, -\strlen($substring)) === $substring) {
5588 6
            if ($encoding === 'UTF-8') {
5589 4
                return (string) \mb_substr(
5590 4
                    $str,
5591 4
                    0,
5592 4
                    (int) \mb_strlen($str) - (int) \mb_strlen($substring)
5593
                );
5594
            }
5595
5596 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
5597
5598 2
            return (string) self::substr(
5599 2
                $str,
5600 2
                0,
5601 2
                (int) self::strlen($str, $encoding) - (int) self::strlen($substring, $encoding),
5602 2
                $encoding
5603
            );
5604
        }
5605
5606 6
        return $str;
5607
    }
5608
5609
    /**
5610
     * Replaces all occurrences of $search in $str by $replacement.
5611
     *
5612
     * @param string $str            <p>The input string.</p>
5613
     * @param string $search         <p>The needle to search for.</p>
5614
     * @param string $replacement    <p>The string to replace with.</p>
5615
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5616
     *
5617
     * @psalm-pure
5618
     *
5619
     * @return string
5620
     *                <p>A string with replaced parts.</p>
5621
     */
5622 29
    public static function replace(
5623
        string $str,
5624
        string $search,
5625
        string $replacement,
5626
        bool $case_sensitive = true
5627
    ): string {
5628 29
        if ($case_sensitive) {
5629 22
            return \str_replace($search, $replacement, $str);
5630
        }
5631
5632 7
        return self::str_ireplace($search, $replacement, $str);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::str_ireplac...ch, $replacement, $str) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
5633
    }
5634
5635
    /**
5636
     * Replaces all occurrences of $search in $str by $replacement.
5637
     *
5638
     * @param string       $str            <p>The input string.</p>
5639
     * @param array        $search         <p>The elements to search for.</p>
5640
     * @param array|string $replacement    <p>The string to replace with.</p>
5641
     * @param bool         $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5642
     *
5643
     * @psalm-pure
5644
     *
5645
     * @return string
5646
     *                <p>A string with replaced parts.</p>
5647
     */
5648 30
    public static function replace_all(
5649
        string $str,
5650
        array $search,
5651
        $replacement,
5652
        bool $case_sensitive = true
5653
    ): string {
5654 30
        if ($case_sensitive) {
5655 23
            return \str_replace($search, $replacement, $str);
5656
        }
5657
5658 7
        return self::str_ireplace($search, $replacement, $str);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::str_ireplac...ch, $replacement, $str) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
5659
    }
5660
5661
    /**
5662
     * Replace the diamond question mark (�) and invalid-UTF8 chars with the replacement.
5663
     *
5664
     * EXAMPLE: <code>UTF8::replace_diamond_question_mark('中文空白�', ''); // '中文空白'</code>
5665
     *
5666
     * @param string $str                        <p>The input string</p>
5667
     * @param string $replacement_char           <p>The replacement character.</p>
5668
     * @param bool   $process_invalid_utf8_chars <p>Convert invalid UTF-8 chars </p>
5669
     *
5670
     * @psalm-pure
5671
     *
5672
     * @return string
5673
     *                <p>A string without diamond question marks (�).</p>
5674
     */
5675 35
    public static function replace_diamond_question_mark(
5676
        string $str,
5677
        string $replacement_char = '',
5678
        bool $process_invalid_utf8_chars = true
5679
    ): string {
5680 35
        if ($str === '') {
5681 9
            return '';
5682
        }
5683
5684 35
        if ($process_invalid_utf8_chars) {
5685 35
            if ($replacement_char === '') {
5686 35
                $replacement_char_helper = 'none';
5687
            } else {
5688 2
                $replacement_char_helper = \ord($replacement_char);
5689
            }
5690
5691 35
            if (self::$SUPPORT['mbstring'] === false) {
5692
                // if there is no native support for "mbstring",
5693
                // then we need to clean the string before ...
5694
                $str = self::clean($str);
5695
            }
5696
5697
            /**
5698
             * @psalm-suppress ImpureFunctionCall - we will reset the value in the next step
5699
             */
5700 35
            $save = \mb_substitute_character();
5701
            /** @noinspection PhpUsageOfSilenceOperatorInspection - ignore "Unknown character" warnings, it's working anyway */
5702 35
            @\mb_substitute_character($replacement_char_helper);
0 ignored issues
show
Security Best Practice introduced by
It seems like you do not handle an error condition for mb_substitute_character(). This can introduce security issues, and is generally not recommended. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-unhandled  annotation

5702
            /** @scrutinizer ignore-unhandled */ @\mb_substitute_character($replacement_char_helper);

If you suppress an error, we recommend checking for the error condition explicitly:

// For example instead of
@mkdir($dir);

// Better use
if (@mkdir($dir) === false) {
    throw new \RuntimeException('The directory '.$dir.' could not be created.');
}
Loading history...
5703
            // the polyfill maybe return false, so cast to string
5704 35
            $str = (string) \mb_convert_encoding($str, 'UTF-8', 'UTF-8');
5705 35
            \mb_substitute_character($save);
5706
        }
5707
5708 35
        return \str_replace(
5709
            [
5710 35
                "\xEF\xBF\xBD",
5711
                '�',
5712
            ],
5713
            [
5714 35
                $replacement_char,
5715 35
                $replacement_char,
5716
            ],
5717 35
            $str
5718
        );
5719
    }
5720
5721
    /**
5722
     * Strip whitespace or other characters from the end of a UTF-8 string.
5723
     *
5724
     * EXAMPLE: <code>UTF8::rtrim('-ABC-中文空白-  '); // '-ABC-中文空白-'</code>
5725
     *
5726
     * @param string      $str   <p>The string to be trimmed.</p>
5727
     * @param string|null $chars <p>Optional characters to be stripped.</p>
5728
     *
5729
     * @psalm-pure
5730
     *
5731
     * @return string
5732
     *                <p>A string with unwanted characters stripped from the right.</p>
5733
     */
5734 21
    public static function rtrim(string $str = '', string $chars = null): string
5735
    {
5736 21
        if ($str === '') {
5737 3
            return '';
5738
        }
5739
5740 20
        if (self::$SUPPORT['mbstring'] === true) {
5741 20
            if ($chars !== null) {
5742
                /** @noinspection PregQuoteUsageInspection */
5743 9
                $chars = \preg_quote($chars);
5744 9
                $pattern = "[${chars}]+$";
5745
            } else {
5746 14
                $pattern = '[\\s]+$';
5747
            }
5748
5749
            /** @noinspection PhpComposerExtensionStubsInspection */
5750 20
            return (string) \mb_ereg_replace($pattern, '', $str);
5751
        }
5752
5753
        if ($chars !== null) {
5754
            $chars = \preg_quote($chars, '/');
5755
            $pattern = "[${chars}]+$";
5756
        } else {
5757
            $pattern = '[\\s]+$';
5758
        }
5759
5760
        return self::regex_replace($str, $pattern, '');
5761
    }
5762
5763
    /**
5764
     * WARNING: Print native UTF-8 support (libs) by default, e.g. for debugging.
5765
     *
5766
     * @param bool $useEcho
5767
     *
5768
     * @psalm-pure
5769
     *
5770
     * @return string|void
5771
     */
5772 2
    public static function showSupport(bool $useEcho = true)
5773
    {
5774
        // init
5775 2
        $html = '';
5776
5777 2
        $html .= '<pre>';
5778
        /** @noinspection AlterInForeachInspection */
5779 2
        foreach (self::$SUPPORT as $key => &$value) {
5780 2
            $html .= $key . ' - ' . \print_r($value, true) . "\n<br>";
5781
        }
5782 2
        $html .= '</pre>';
5783
5784 2
        if ($useEcho) {
5785 1
            echo $html;
5786
        }
5787
5788 2
        return $html;
5789
    }
5790
5791
    /**
5792
     * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
5793
     *
5794
     * EXAMPLE: <code>UTF8::single_chr_html_encode('κ'); // '&#954;'</code>
5795
     *
5796
     * @param string $char             <p>The Unicode character to be encoded as numbered entity.</p>
5797
     * @param bool   $keep_ascii_chars <p>Set to <strong>true</strong> to keep ASCII chars.</>
5798
     * @param string $encoding         [optional] <p>Set the charset for e.g. "mb_" function</p>
5799
     *
5800
     * @psalm-pure
5801
     *
5802
     * @return string
5803
     *                <p>The HTML numbered entity for the given character.</p>
5804
     */
5805 2
    public static function single_chr_html_encode(
5806
        string $char,
5807
        bool $keep_ascii_chars = false,
5808
        string $encoding = 'UTF-8'
5809
    ): string {
5810 2
        if ($char === '') {
5811 2
            return '';
5812
        }
5813
5814
        if (
5815 2
            $keep_ascii_chars
5816
            &&
5817 2
            ASCII::is_ascii($char)
5818
        ) {
5819 2
            return $char;
5820
        }
5821
5822 2
        return '&#' . self::ord($char, $encoding) . ';';
5823
    }
5824
5825
    /**
5826
     * @param string $str
5827
     * @param int    $tab_length
5828
     *
5829
     * @psalm-pure
5830
     *
5831
     * @return string
5832
     */
5833 5
    public static function spaces_to_tabs(string $str, int $tab_length = 4): string
5834
    {
5835 5
        if ($tab_length === 4) {
5836 3
            $tab = '    ';
5837 2
        } elseif ($tab_length === 2) {
5838 1
            $tab = '  ';
5839
        } else {
5840 1
            $tab = \str_repeat(' ', $tab_length);
5841
        }
5842
5843 5
        return \str_replace($tab, "\t", $str);
5844
    }
5845
5846
    /**
5847
     * alias for "UTF8::str_split()"
5848
     *
5849
     * @param int|string $str
5850
     * @param int        $length
5851
     * @param bool       $clean_utf8
5852
     *
5853
     * @psalm-pure
5854
     *
5855
     * @return string[]
5856
     *
5857
     * @see        UTF8::str_split()
5858
     * @deprecated <p>please use "UTF8::str_split()"</p>
5859
     */
5860 9
    public static function split(
5861
        $str,
5862
        int $length = 1,
5863
        bool $clean_utf8 = false
5864
    ): array {
5865
        /** @var string[] */
5866 9
        return self::str_split($str, $length, $clean_utf8);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::str_split($str, $length, $clean_utf8) returns an array which contains values of type array|string[] which are incompatible with the documented value type string.
Loading history...
5867
    }
5868
5869
    /**
5870
     * alias for "UTF8::str_starts_with()"
5871
     *
5872
     * @param string $haystack
5873
     * @param string $needle
5874
     *
5875
     * @psalm-pure
5876
     *
5877
     * @return bool
5878
     *
5879
     * @see        UTF8::str_starts_with()
5880
     * @deprecated <p>please use "UTF8::str_starts_with()"</p>
5881
     */
5882 1
    public static function str_begins(string $haystack, string $needle): bool
5883
    {
5884 1
        return self::str_starts_with($haystack, $needle);
5885
    }
5886
5887
    /**
5888
     * Returns a camelCase version of the string. Trims surrounding spaces,
5889
     * capitalizes letters following digits, spaces, dashes and underscores,
5890
     * and removes spaces, dashes, as well as underscores.
5891
     *
5892
     * @param string      $str                           <p>The input string.</p>
5893
     * @param string      $encoding                      [optional] <p>Default: 'UTF-8'</p>
5894
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
5895
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
5896
     *                                                   tr</p>
5897
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
5898
     *                                                   -> ß</p>
5899
     *
5900
     * @psalm-pure
5901
     *
5902
     * @return string
5903
     */
5904 32
    public static function str_camelize(
5905
        string $str,
5906
        string $encoding = 'UTF-8',
5907
        bool $clean_utf8 = false,
5908
        string $lang = null,
5909
        bool $try_to_keep_the_string_length = false
5910
    ): string {
5911 32
        if ($clean_utf8) {
5912
            $str = self::clean($str);
5913
        }
5914
5915 32
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
5916 26
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
5917
        }
5918
5919 32
        $str = self::lcfirst(
5920 32
            \trim($str),
5921 32
            $encoding,
5922 32
            false,
5923 32
            $lang,
5924 32
            $try_to_keep_the_string_length
5925
        );
5926 32
        $str = (string) \preg_replace('/^[-_]+/', '', $str);
5927
5928 32
        $use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
5929
5930 32
        $str = (string) \preg_replace_callback(
5931 32
            '/[-_\\s]+(.)?/u',
5932
            /**
5933
             * @param array $match
5934
             *
5935
             * @psalm-pure
5936
             *
5937
             * @return string
5938
             */
5939
            static function (array $match) use ($use_mb_functions, $encoding, $lang, $try_to_keep_the_string_length): string {
5940 27
                if (isset($match[1])) {
5941 27
                    if ($use_mb_functions) {
5942 27
                        if ($encoding === 'UTF-8') {
5943 27
                            return \mb_strtoupper($match[1]);
5944
                        }
5945
5946
                        return \mb_strtoupper($match[1], $encoding);
5947
                    }
5948
5949
                    return self::strtoupper($match[1], $encoding, false, $lang, $try_to_keep_the_string_length);
5950
                }
5951
5952 1
                return '';
5953 32
            },
5954 32
            $str
5955
        );
5956
5957 32
        return (string) \preg_replace_callback(
5958 32
            '/[\\p{N}]+(.)?/u',
5959
            /**
5960
             * @param array $match
5961
             *
5962
             * @psalm-pure
5963
             *
5964
             * @return string
5965
             */
5966
            static function (array $match) use ($use_mb_functions, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length): string {
5967 6
                if ($use_mb_functions) {
5968 6
                    if ($encoding === 'UTF-8') {
5969 6
                        return \mb_strtoupper($match[0]);
5970
                    }
5971
5972
                    return \mb_strtoupper($match[0], $encoding);
5973
                }
5974
5975
                return self::strtoupper($match[0], $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
5976 32
            },
5977 32
            $str
5978
        );
5979
    }
5980
5981
    /**
5982
     * Returns the string with the first letter of each word capitalized,
5983
     * except for when the word is a name which shouldn't be capitalized.
5984
     *
5985
     * @param string $str
5986
     *
5987
     * @psalm-pure
5988
     *
5989
     * @return string
5990
     *                <p>A string with $str capitalized.</p>
5991
     */
5992 1
    public static function str_capitalize_name(string $str): string
5993
    {
5994 1
        return self::str_capitalize_name_helper(
5995 1
            self::str_capitalize_name_helper(
5996 1
                self::collapse_whitespace($str),
5997 1
                ' '
5998
            ),
5999 1
            '-'
6000
        );
6001
    }
6002
6003
    /**
6004
     * Returns true if the string contains $needle, false otherwise. By default
6005
     * the comparison is case-sensitive, but can be made insensitive by setting
6006
     * $case_sensitive to false.
6007
     *
6008
     * @param string $haystack       <p>The input string.</p>
6009
     * @param string $needle         <p>Substring to look for.</p>
6010
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
6011
     *
6012
     * @psalm-pure
6013
     *
6014
     * @return bool
6015
     *              <p>Whether or not $haystack contains $needle.</p>
6016
     */
6017 21
    public static function str_contains(
6018
        string $haystack,
6019
        string $needle,
6020
        bool $case_sensitive = true
6021
    ): bool {
6022 21
        if ($case_sensitive) {
6023 11
            if (\PHP_VERSION_ID >= 80000) {
6024
                /** @phpstan-ignore-next-line - only for PHP8 */
6025
                return \str_contains($haystack, $needle);
0 ignored issues
show
Bug introduced by
The function str_contains was not found. Maybe you did not declare it correctly or list all dependencies? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

6025
                return /** @scrutinizer ignore-call */ \str_contains($haystack, $needle);
Loading history...
6026
            }
6027
6028 11
            return \strpos($haystack, $needle) !== false;
6029
        }
6030
6031 10
        return \mb_stripos($haystack, $needle) !== false;
6032
    }
6033
6034
    /**
6035
     * Returns true if the string contains all $needles, false otherwise. By
6036
     * default the comparison is case-sensitive, but can be made insensitive by
6037
     * setting $case_sensitive to false.
6038
     *
6039
     * @param string $haystack       <p>The input string.</p>
6040
     * @param array  $needles        <p>SubStrings to look for.</p>
6041
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
6042
     *
6043
     * @psalm-pure
6044
     *
6045
     * @return bool
6046
     *              <p>Whether or not $haystack contains $needle.</p>
6047
     */
6048 45
    public static function str_contains_all(
6049
        string $haystack,
6050
        array $needles,
6051
        bool $case_sensitive = true
6052
    ): bool {
6053 45
        if ($haystack === '' || $needles === []) {
6054 1
            return false;
6055
        }
6056
6057
        /** @noinspection LoopWhichDoesNotLoopInspection */
6058 44
        foreach ($needles as &$needle) {
6059 44
            if ($case_sensitive) {
6060
                /** @noinspection NestedPositiveIfStatementsInspection */
6061 24
                if (!$needle || \strpos($haystack, $needle) === false) {
6062 12
                    return false;
6063
                }
6064
            }
6065
6066 33
            if (!$needle || \mb_stripos($haystack, $needle) === false) {
6067 33
                return false;
6068
            }
6069
        }
6070
6071 24
        return true;
6072
    }
6073
6074
    /**
6075
     * Returns true if the string contains any $needles, false otherwise. By
6076
     * default the comparison is case-sensitive, but can be made insensitive by
6077
     * setting $case_sensitive to false.
6078
     *
6079
     * @param string $haystack       <p>The input string.</p>
6080
     * @param array  $needles        <p>SubStrings to look for.</p>
6081
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
6082
     *
6083
     * @psalm-pure
6084
     *
6085
     * @return bool
6086
     *              <p>Whether or not $str contains $needle.</p>
6087
     */
6088 46
    public static function str_contains_any(
6089
        string $haystack,
6090
        array $needles,
6091
        bool $case_sensitive = true
6092
    ): bool {
6093 46
        if ($haystack === '' || $needles === []) {
6094 1
            return false;
6095
        }
6096
6097
        /** @noinspection LoopWhichDoesNotLoopInspection */
6098 45
        foreach ($needles as &$needle) {
6099 45
            if (!$needle) {
6100
                continue;
6101
            }
6102
6103 45
            if ($case_sensitive) {
6104 25
                if (\strpos($haystack, $needle) !== false) {
6105 14
                    return true;
6106
                }
6107
6108 13
                continue;
6109
            }
6110
6111 20
            if (\mb_stripos($haystack, $needle) !== false) {
6112 20
                return true;
6113
            }
6114
        }
6115
6116 19
        return false;
6117
    }
6118
6119
    /**
6120
     * Returns a lowercase and trimmed string separated by dashes. Dashes are
6121
     * inserted before uppercase characters (with the exception of the first
6122
     * character of the string), and in place of spaces as well as underscores.
6123
     *
6124
     * @param string $str      <p>The input string.</p>
6125
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6126
     *
6127
     * @psalm-pure
6128
     *
6129
     * @return string
6130
     */
6131 19
    public static function str_dasherize(string $str, string $encoding = 'UTF-8'): string
6132
    {
6133 19
        return self::str_delimit($str, '-', $encoding);
6134
    }
6135
6136
    /**
6137
     * Returns a lowercase and trimmed string separated by the given delimiter.
6138
     * Delimiters are inserted before uppercase characters (with the exception
6139
     * of the first character of the string), and in place of spaces, dashes,
6140
     * and underscores. Alpha delimiters are not converted to lowercase.
6141
     *
6142
     * @param string      $str                           <p>The input string.</p>
6143
     * @param string      $delimiter                     <p>Sequence used to separate parts of the string.</p>
6144
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
6145
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
6146
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
6147
     *                                                   tr</p>
6148
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ ->
6149
     *                                                   ß</p>
6150
     *
6151
     * @psalm-pure
6152
     *
6153
     * @return string
6154
     */
6155 49
    public static function str_delimit(
6156
        string $str,
6157
        string $delimiter,
6158
        string $encoding = 'UTF-8',
6159
        bool $clean_utf8 = false,
6160
        string $lang = null,
6161
        bool $try_to_keep_the_string_length = false
6162
    ): string {
6163 49
        if (self::$SUPPORT['mbstring'] === true) {
6164
            /** @noinspection PhpComposerExtensionStubsInspection */
6165 49
            $str = (string) \mb_ereg_replace('\\B(\\p{Lu})', '-\1', \trim($str));
6166
6167 49
            $use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
6168 49
            if ($use_mb_functions && $encoding === 'UTF-8') {
6169 22
                $str = \mb_strtolower($str);
6170
            } else {
6171 27
                $str = self::strtolower($str, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
6172
            }
6173
6174
            /** @noinspection PhpComposerExtensionStubsInspection */
6175 49
            return (string) \mb_ereg_replace('[\\-_\\s]+', $delimiter, $str);
6176
        }
6177
6178
        $str = (string) \preg_replace('/\\B(\\p{Lu})/u', '-\1', \trim($str));
6179
6180
        $use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
6181
        if ($use_mb_functions && $encoding === 'UTF-8') {
6182
            $str = \mb_strtolower($str);
6183
        } else {
6184
            $str = self::strtolower($str, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
6185
        }
6186
6187
        return (string) \preg_replace('/[\\-_\\s]+/u', $delimiter, $str);
6188
    }
6189
6190
    /**
6191
     * Optimized "mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
6192
     *
6193
     * EXAMPLE: <code>
6194
     * UTF8::str_detect_encoding('中文空白'); // 'UTF-8'
6195
     * UTF8::str_detect_encoding('Abc'); // 'ASCII'
6196
     * </code>
6197
     *
6198
     * @param string $str <p>The input string.</p>
6199
     *
6200
     * @psalm-pure
6201
     *
6202
     * @return false|string
6203
     *                      <p>
6204
     *                      The detected string-encoding e.g. UTF-8 or UTF-16BE,<br>
6205
     *                      otherwise it will return false e.g. for BINARY or not detected encoding.
6206
     *                      </p>
6207
     */
6208 31
    public static function str_detect_encoding($str)
6209
    {
6210
        // init
6211 31
        $str = (string) $str;
6212
6213
        //
6214
        // 1.) check binary strings (010001001...) like UTF-16 / UTF-32 / PDF / Images / ...
6215
        //
6216
6217 31
        if (self::is_binary($str, true)) {
6218 11
            $is_utf32 = self::is_utf32($str, false);
6219 11
            if ($is_utf32 === 1) {
6220
                return 'UTF-32LE';
6221
            }
6222 11
            if ($is_utf32 === 2) {
6223 1
                return 'UTF-32BE';
6224
            }
6225
6226 11
            $is_utf16 = self::is_utf16($str, false);
6227 11
            if ($is_utf16 === 1) {
6228 3
                return 'UTF-16LE';
6229
            }
6230 11
            if ($is_utf16 === 2) {
6231 2
                return 'UTF-16BE';
6232
            }
6233
6234
            // is binary but not "UTF-16" or "UTF-32"
6235 9
            return false;
6236
        }
6237
6238
        //
6239
        // 2.) simple check for ASCII chars
6240
        //
6241
6242 27
        if (ASCII::is_ascii($str)) {
6243 10
            return 'ASCII';
6244
        }
6245
6246
        //
6247
        // 3.) simple check for UTF-8 chars
6248
        //
6249
6250 27
        if (self::is_utf8_string($str)) {
6251 19
            return 'UTF-8';
6252
        }
6253
6254
        //
6255
        // 4.) check via "mb_detect_encoding()"
6256
        //
6257
        // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "mb_detect_encoding()"
6258
6259
        $encoding_detecting_order = [
6260 16
            'ISO-8859-1',
6261
            'ISO-8859-2',
6262
            'ISO-8859-3',
6263
            'ISO-8859-4',
6264
            'ISO-8859-5',
6265
            'ISO-8859-6',
6266
            'ISO-8859-7',
6267
            'ISO-8859-8',
6268
            'ISO-8859-9',
6269
            'ISO-8859-10',
6270
            'ISO-8859-13',
6271
            'ISO-8859-14',
6272
            'ISO-8859-15',
6273
            'ISO-8859-16',
6274
            'WINDOWS-1251',
6275
            'WINDOWS-1252',
6276
            'WINDOWS-1254',
6277
            'CP932',
6278
            'CP936',
6279
            'CP950',
6280
            'CP866',
6281
            'CP850',
6282
            'CP51932',
6283
            'CP50220',
6284
            'CP50221',
6285
            'CP50222',
6286
            'ISO-2022-JP',
6287
            'ISO-2022-KR',
6288
            'JIS',
6289
            'JIS-ms',
6290
            'EUC-CN',
6291
            'EUC-JP',
6292
        ];
6293
6294 16
        if (self::$SUPPORT['mbstring'] === true) {
6295
            // info: do not use the symfony polyfill here
6296 16
            $encoding = \mb_detect_encoding($str, $encoding_detecting_order, true);
6297 16
            if ($encoding) {
6298 16
                return $encoding;
6299
            }
6300
        }
6301
6302
        //
6303
        // 5.) check via "iconv()"
6304
        //
6305
6306
        if (self::$ENCODINGS === null) {
6307
            self::$ENCODINGS = self::getData('encodings');
6308
        }
6309
6310
        foreach (self::$ENCODINGS as $encoding_tmp) {
6311
            // INFO: //IGNORE but still throw notice
6312
            /** @noinspection PhpUsageOfSilenceOperatorInspection */
6313
            if ((string) @\iconv($encoding_tmp, $encoding_tmp . '//IGNORE', $str) === $str) {
6314
                return $encoding_tmp;
6315
            }
6316
        }
6317
6318
        return false;
6319
    }
6320
6321
    /**
6322
     * alias for "UTF8::str_ends_with()"
6323
     *
6324
     * @param string $haystack
6325
     * @param string $needle
6326
     *
6327
     * @psalm-pure
6328
     *
6329
     * @return bool
6330
     *
6331
     * @see        UTF8::str_ends_with()
6332
     * @deprecated <p>please use "UTF8::str_ends_with()"</p>
6333
     */
6334 1
    public static function str_ends(string $haystack, string $needle): bool
6335
    {
6336 1
        return self::str_ends_with($haystack, $needle);
6337
    }
6338
6339
    /**
6340
     * Check if the string ends with the given substring.
6341
     *
6342
     * EXAMPLE: <code>
6343
     * UTF8::str_ends_with('BeginMiddleΚόσμε', 'Κόσμε'); // true
6344
     * UTF8::str_ends_with('BeginMiddleΚόσμε', 'κόσμε'); // false
6345
     * </code>
6346
     *
6347
     * @param string $haystack <p>The string to search in.</p>
6348
     * @param string $needle   <p>The substring to search for.</p>
6349
     *
6350
     * @psalm-pure
6351
     *
6352
     * @return bool
6353
     */
6354 9
    public static function str_ends_with(string $haystack, string $needle): bool
6355
    {
6356 9
        if ($needle === '') {
6357 2
            return true;
6358
        }
6359
6360 9
        if ($haystack === '') {
6361
            return false;
6362
        }
6363
6364 9
        if (\PHP_VERSION_ID >= 80000) {
6365
            /** @phpstan-ignore-next-line - only for PHP8 */
6366
            return \str_ends_with($haystack, $needle);
0 ignored issues
show
Bug introduced by
The function str_ends_with was not found. Maybe you did not declare it correctly or list all dependencies? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

6366
            return /** @scrutinizer ignore-call */ \str_ends_with($haystack, $needle);
Loading history...
6367
        }
6368
6369 9
        return \substr($haystack, -\strlen($needle)) === $needle;
6370
    }
6371
6372
    /**
6373
     * Returns true if the string ends with any of $substrings, false otherwise.
6374
     *
6375
     * - case-sensitive
6376
     *
6377
     * @param string   $str        <p>The input string.</p>
6378
     * @param string[] $substrings <p>Substrings to look for.</p>
6379
     *
6380
     * @psalm-pure
6381
     *
6382
     * @return bool
6383
     *              <p>Whether or not $str ends with $substring.</p>
6384
     */
6385 7
    public static function str_ends_with_any(string $str, array $substrings): bool
6386
    {
6387 7
        if ($substrings === []) {
6388
            return false;
6389
        }
6390
6391 7
        foreach ($substrings as &$substring) {
6392 7
            if (\substr($str, -\strlen($substring)) === $substring) {
6393 7
                return true;
6394
            }
6395
        }
6396
6397 6
        return false;
6398
    }
6399
6400
    /**
6401
     * Ensures that the string begins with $substring. If it doesn't, it's
6402
     * prepended.
6403
     *
6404
     * @param string $str       <p>The input string.</p>
6405
     * @param string $substring <p>The substring to add if not present.</p>
6406
     *
6407
     * @psalm-pure
6408
     *
6409
     * @return string
6410
     */
6411 10
    public static function str_ensure_left(string $str, string $substring): string
6412
    {
6413
        if (
6414 10
            $substring !== ''
6415
            &&
6416 10
            \strpos($str, $substring) === 0
6417
        ) {
6418 6
            return $str;
6419
        }
6420
6421 4
        return $substring . $str;
6422
    }
6423
6424
    /**
6425
     * Ensures that the string ends with $substring. If it doesn't, it's appended.
6426
     *
6427
     * @param string $str       <p>The input string.</p>
6428
     * @param string $substring <p>The substring to add if not present.</p>
6429
     *
6430
     * @psalm-pure
6431
     *
6432
     * @return string
6433
     */
6434 10
    public static function str_ensure_right(string $str, string $substring): string
6435
    {
6436
        if (
6437 10
            $str === ''
6438
            ||
6439 10
            $substring === ''
6440
            ||
6441 10
            \substr($str, -\strlen($substring)) !== $substring
6442
        ) {
6443 4
            $str .= $substring;
6444
        }
6445
6446 10
        return $str;
6447
    }
6448
6449
    /**
6450
     * Capitalizes the first word of the string, replaces underscores with
6451
     * spaces, and strips '_id'.
6452
     *
6453
     * @param string $str
6454
     *
6455
     * @psalm-pure
6456
     *
6457
     * @return string
6458
     */
6459 3
    public static function str_humanize($str): string
6460
    {
6461 3
        $str = \str_replace(
6462
            [
6463 3
                '_id',
6464
                '_',
6465
            ],
6466
            [
6467 3
                '',
6468
                ' ',
6469
            ],
6470 3
            $str
6471
        );
6472
6473 3
        return self::ucfirst(\trim($str));
6474
    }
6475
6476
    /**
6477
     * alias for "UTF8::str_istarts_with()"
6478
     *
6479
     * @param string $haystack
6480
     * @param string $needle
6481
     *
6482
     * @psalm-pure
6483
     *
6484
     * @return bool
6485
     *
6486
     * @see        UTF8::str_istarts_with()
6487
     * @deprecated <p>please use "UTF8::str_istarts_with()"</p>
6488
     */
6489 1
    public static function str_ibegins(string $haystack, string $needle): bool
6490
    {
6491 1
        return self::str_istarts_with($haystack, $needle);
6492
    }
6493
6494
    /**
6495
     * alias for "UTF8::str_iends_with()"
6496
     *
6497
     * @param string $haystack
6498
     * @param string $needle
6499
     *
6500
     * @psalm-pure
6501
     *
6502
     * @return bool
6503
     *
6504
     * @see        UTF8::str_iends_with()
6505
     * @deprecated <p>please use "UTF8::str_iends_with()"</p>
6506
     */
6507 1
    public static function str_iends(string $haystack, string $needle): bool
6508
    {
6509 1
        return self::str_iends_with($haystack, $needle);
6510
    }
6511
6512
    /**
6513
     * Check if the string ends with the given substring, case-insensitive.
6514
     *
6515
     * EXAMPLE: <code>
6516
     * UTF8::str_iends_with('BeginMiddleΚόσμε', 'Κόσμε'); // true
6517
     * UTF8::str_iends_with('BeginMiddleΚόσμε', 'κόσμε'); // true
6518
     * </code>
6519
     *
6520
     * @param string $haystack <p>The string to search in.</p>
6521
     * @param string $needle   <p>The substring to search for.</p>
6522
     *
6523
     * @psalm-pure
6524
     *
6525
     * @return bool
6526
     */
6527 12
    public static function str_iends_with(string $haystack, string $needle): bool
6528
    {
6529 12
        if ($needle === '') {
6530 2
            return true;
6531
        }
6532
6533 12
        if ($haystack === '') {
6534
            return false;
6535
        }
6536
6537 12
        return self::strcasecmp(\substr($haystack, -\strlen($needle)), $needle) === 0;
6538
    }
6539
6540
    /**
6541
     * Returns true if the string ends with any of $substrings, false otherwise.
6542
     *
6543
     * - case-insensitive
6544
     *
6545
     * @param string   $str        <p>The input string.</p>
6546
     * @param string[] $substrings <p>Substrings to look for.</p>
6547
     *
6548
     * @psalm-pure
6549
     *
6550
     * @return bool
6551
     *              <p>Whether or not $str ends with $substring.</p>
6552
     */
6553 4
    public static function str_iends_with_any(string $str, array $substrings): bool
6554
    {
6555 4
        if ($substrings === []) {
6556
            return false;
6557
        }
6558
6559 4
        foreach ($substrings as &$substring) {
6560 4
            if (self::str_iends_with($str, $substring)) {
6561 4
                return true;
6562
            }
6563
        }
6564
6565
        return false;
6566
    }
6567
6568
    /**
6569
     * Returns the index of the first occurrence of $needle in the string,
6570
     * and false if not found. Accepts an optional offset from which to begin
6571
     * the search.
6572
     *
6573
     * @param string $str      <p>The input string.</p>
6574
     * @param string $needle   <p>Substring to look for.</p>
6575
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
6576
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6577
     *
6578
     * @psalm-pure
6579
     *
6580
     * @return false|int
6581
     *                   <p>The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.</p>
6582
     *
6583
     * @see        UTF8::stripos()
6584
     * @deprecated <p>please use "UTF8::stripos()"</p>
6585
     */
6586 1
    public static function str_iindex_first(
6587
        string $str,
6588
        string $needle,
6589
        int $offset = 0,
6590
        string $encoding = 'UTF-8'
6591
    ) {
6592 1
        return self::stripos(
6593 1
            $str,
6594 1
            $needle,
6595 1
            $offset,
6596 1
            $encoding
6597
        );
6598
    }
6599
6600
    /**
6601
     * Returns the index of the last occurrence of $needle in the string,
6602
     * and false if not found. Accepts an optional offset from which to begin
6603
     * the search. Offsets may be negative to count from the last character
6604
     * in the string.
6605
     *
6606
     * @param string $str      <p>The input string.</p>
6607
     * @param string $needle   <p>Substring to look for.</p>
6608
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
6609
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6610
     *
6611
     * @psalm-pure
6612
     *
6613
     * @return false|int
6614
     *                   <p>The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.</p>
6615
     *
6616
     * @see        UTF8::strripos()
6617
     * @deprecated <p>please use "UTF8::strripos()"</p>
6618
     */
6619 10
    public static function str_iindex_last(
6620
        string $str,
6621
        string $needle,
6622
        int $offset = 0,
6623
        string $encoding = 'UTF-8'
6624
    ) {
6625 10
        return self::strripos(
6626 10
            $str,
6627 10
            $needle,
6628 10
            $offset,
6629 10
            $encoding
6630
        );
6631
    }
6632
6633
    /**
6634
     * Returns the index of the first occurrence of $needle in the string,
6635
     * and false if not found. Accepts an optional offset from which to begin
6636
     * the search.
6637
     *
6638
     * @param string $str      <p>The input string.</p>
6639
     * @param string $needle   <p>Substring to look for.</p>
6640
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
6641
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6642
     *
6643
     * @psalm-pure
6644
     *
6645
     * @return false|int
6646
     *                   <p>The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.</p>
6647
     *
6648
     * @see        UTF8::strpos()
6649
     * @deprecated <p>please use "UTF8::strpos()"</p>
6650
     */
6651 11
    public static function str_index_first(
6652
        string $str,
6653
        string $needle,
6654
        int $offset = 0,
6655
        string $encoding = 'UTF-8'
6656
    ) {
6657 11
        return self::strpos(
6658 11
            $str,
6659 11
            $needle,
6660 11
            $offset,
6661 11
            $encoding
6662
        );
6663
    }
6664
6665
    /**
6666
     * Returns the index of the last occurrence of $needle in the string,
6667
     * and false if not found. Accepts an optional offset from which to begin
6668
     * the search. Offsets may be negative to count from the last character
6669
     * in the string.
6670
     *
6671
     * @param string $str      <p>The input string.</p>
6672
     * @param string $needle   <p>Substring to look for.</p>
6673
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
6674
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6675
     *
6676
     * @psalm-pure
6677
     *
6678
     * @return false|int
6679
     *                   <p>The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.</p>
6680
     *
6681
     * @see        UTF8::strrpos()
6682
     * @deprecated <p>please use "UTF8::strrpos()"</p>
6683
     */
6684 10
    public static function str_index_last(
6685
        string $str,
6686
        string $needle,
6687
        int $offset = 0,
6688
        string $encoding = 'UTF-8'
6689
    ) {
6690 10
        return self::strrpos(
6691 10
            $str,
6692 10
            $needle,
6693 10
            $offset,
6694 10
            $encoding
6695
        );
6696
    }
6697
6698
    /**
6699
     * Inserts $substring into the string at the $index provided.
6700
     *
6701
     * @param string $str       <p>The input string.</p>
6702
     * @param string $substring <p>String to be inserted.</p>
6703
     * @param int    $index     <p>The index at which to insert the substring.</p>
6704
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
6705
     *
6706
     * @psalm-pure
6707
     *
6708
     * @return string
6709
     */
6710 8
    public static function str_insert(
6711
        string $str,
6712
        string $substring,
6713
        int $index,
6714
        string $encoding = 'UTF-8'
6715
    ): string {
6716 8
        if ($encoding === 'UTF-8') {
6717 4
            $len = (int) \mb_strlen($str);
6718 4
            if ($index > $len) {
6719
                return $str;
6720
            }
6721
6722
            /** @noinspection UnnecessaryCastingInspection */
6723 4
            return (string) \mb_substr($str, 0, $index) .
6724 4
                   $substring .
6725 4
                   (string) \mb_substr($str, $index, $len);
6726
        }
6727
6728 4
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6729
6730 4
        $len = (int) self::strlen($str, $encoding);
6731 4
        if ($index > $len) {
6732 1
            return $str;
6733
        }
6734
6735 3
        return ((string) self::substr($str, 0, $index, $encoding)) .
6736 3
               $substring .
6737 3
               ((string) self::substr($str, $index, $len, $encoding));
6738
    }
6739
6740
    /**
6741
     * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
6742
     *
6743
     * EXAMPLE: <code>
6744
     * UTF8::str_ireplace('lIzÆ', 'lise', 'Iñtërnâtiônàlizætiøn'); // 'Iñtërnâtiônàlisetiøn'
6745
     * </code>
6746
     *
6747
     * @see http://php.net/manual/en/function.str-ireplace.php
6748
     *
6749
     * @param string|string[] $search      <p>
6750
     *                                     Every replacement with search array is
6751
     *                                     performed on the result of previous replacement.
6752
     *                                     </p>
6753
     * @param string|string[] $replacement <p>The replacement.</p>
6754
     * @param string|string[] $subject     <p>
6755
     *                                     If subject is an array, then the search and
6756
     *                                     replace is performed with every entry of
6757
     *                                     subject, and the return value is an array as
6758
     *                                     well.
6759
     *                                     </p>
6760
     * @param int             $count       [optional] <p>
6761
     *                                     The number of matched and replaced needles will
6762
     *                                     be returned in count which is passed by
6763
     *                                     reference.
6764
     *                                     </p>
6765
     *
6766
     * @psalm-pure
6767
     *
6768
     * @return string|string[]
6769
     *                         <p>A string or an array of replacements.</p>
6770
     *
6771
     * @template TStrIReplaceSubject
6772
     * @phpstan-param TStrIReplaceSubject $subject
6773
     * @phpstan-return TStrIReplaceSubject
6774
     */
6775 29
    public static function str_ireplace($search, $replacement, $subject, &$count = null)
6776
    {
6777 29
        $search = (array) $search;
6778
6779
        /** @noinspection AlterInForeachInspection */
6780 29
        foreach ($search as &$s) {
6781 29
            $s = (string) $s;
6782 29
            if ($s === '') {
6783 6
                $s = '/^(?<=.)$/';
6784
            } else {
6785 29
                $s = '/' . \preg_quote($s, '/') . '/ui';
6786
            }
6787
        }
6788
6789
        // fallback
6790
        /** @phpstan-ignore-next-line - only a fallback for PHP8 */
6791 29
        if ($replacement === null) {
0 ignored issues
show
introduced by
The condition $replacement === null is always false.
Loading history...
6792 1
            $replacement = '';
6793
        }
6794
        /** @phpstan-ignore-next-line - only a fallback for PHP8 */
6795 29
        if ($subject === null) {
6796 1
            $subject = '';
6797
        }
6798
6799
        /**
6800
         * @psalm-suppress PossiblyNullArgument
6801
         * @phpstan-var TStrIReplaceSubject $subject
6802
         */
6803 29
        $subject = \preg_replace($search, $replacement, $subject, -1, $count);
6804
6805 29
        return $subject;
6806
    }
6807
6808
    /**
6809
     * Replaces $search from the beginning of string with $replacement.
6810
     *
6811
     * @param string $str         <p>The input string.</p>
6812
     * @param string $search      <p>The string to search for.</p>
6813
     * @param string $replacement <p>The replacement.</p>
6814
     *
6815
     * @psalm-pure
6816
     *
6817
     * @return string
6818
     *                <p>The string after the replacement.</p>
6819
     */
6820 17
    public static function str_ireplace_beginning(string $str, string $search, string $replacement): string
6821
    {
6822 17
        if ($str === '') {
6823 4
            if ($replacement === '') {
6824 2
                return '';
6825
            }
6826
6827 2
            if ($search === '') {
6828 2
                return $replacement;
6829
            }
6830
        }
6831
6832 13
        if ($search === '') {
6833 2
            return $str . $replacement;
6834
        }
6835
6836 11
        $searchLength = \strlen($search);
6837 11
        if (\strncasecmp($str, $search, $searchLength) === 0) {
6838 10
            return $replacement . \substr($str, $searchLength);
6839
        }
6840
6841 1
        return $str;
6842
    }
6843
6844
    /**
6845
     * Replaces $search from the ending of string with $replacement.
6846
     *
6847
     * @param string $str         <p>The input string.</p>
6848
     * @param string $search      <p>The string to search for.</p>
6849
     * @param string $replacement <p>The replacement.</p>
6850
     *
6851
     * @psalm-pure
6852
     *
6853
     * @return string
6854
     *                <p>The string after the replacement.</p>
6855
     */
6856 17
    public static function str_ireplace_ending(string $str, string $search, string $replacement): string
6857
    {
6858 17
        if ($str === '') {
6859 4
            if ($replacement === '') {
6860 2
                return '';
6861
            }
6862
6863 2
            if ($search === '') {
6864 2
                return $replacement;
6865
            }
6866
        }
6867
6868 13
        if ($search === '') {
6869 2
            return $str . $replacement;
6870
        }
6871
6872 11
        if (\stripos($str, $search, \strlen($str) - \strlen($search)) !== false) {
6873 9
            $str = \substr($str, 0, -\strlen($search)) . $replacement;
6874
        }
6875
6876 11
        return $str;
6877
    }
6878
6879
    /**
6880
     * Check if the string starts with the given substring, case-insensitive.
6881
     *
6882
     * EXAMPLE: <code>
6883
     * UTF8::str_istarts_with('ΚόσμεMiddleEnd', 'Κόσμε'); // true
6884
     * UTF8::str_istarts_with('ΚόσμεMiddleEnd', 'κόσμε'); // true
6885
     * </code>
6886
     *
6887
     * @param string $haystack <p>The string to search in.</p>
6888
     * @param string $needle   <p>The substring to search for.</p>
6889
     *
6890
     * @psalm-pure
6891
     *
6892
     * @return bool
6893
     */
6894 13
    public static function str_istarts_with(string $haystack, string $needle): bool
6895
    {
6896 13
        if ($needle === '') {
6897 2
            return true;
6898
        }
6899
6900 13
        if ($haystack === '') {
6901
            return false;
6902
        }
6903
6904 13
        return self::stripos($haystack, $needle) === 0;
6905
    }
6906
6907
    /**
6908
     * Returns true if the string begins with any of $substrings, false otherwise.
6909
     *
6910
     * - case-insensitive
6911
     *
6912
     * @param string $str        <p>The input string.</p>
6913
     * @param array  $substrings <p>Substrings to look for.</p>
6914
     *
6915
     * @psalm-pure
6916
     *
6917
     * @return bool
6918
     *              <p>Whether or not $str starts with $substring.</p>
6919
     */
6920 5
    public static function str_istarts_with_any(string $str, array $substrings): bool
6921
    {
6922 5
        if ($str === '') {
6923
            return false;
6924
        }
6925
6926 5
        if ($substrings === []) {
6927
            return false;
6928
        }
6929
6930 5
        foreach ($substrings as &$substring) {
6931 5
            if (self::str_istarts_with($str, $substring)) {
6932 5
                return true;
6933
            }
6934
        }
6935
6936 1
        return false;
6937
    }
6938
6939
    /**
6940
     * Gets the substring after the first occurrence of a separator.
6941
     *
6942
     * @param string $str       <p>The input string.</p>
6943
     * @param string $separator <p>The string separator.</p>
6944
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6945
     *
6946
     * @psalm-pure
6947
     *
6948
     * @return string
6949
     */
6950 1
    public static function str_isubstr_after_first_separator(
6951
        string $str,
6952
        string $separator,
6953
        string $encoding = 'UTF-8'
6954
    ): string {
6955 1
        if ($separator === '' || $str === '') {
6956 1
            return '';
6957
        }
6958
6959 1
        $offset = self::stripos($str, $separator);
6960 1
        if ($offset === false) {
6961 1
            return '';
6962
        }
6963
6964 1
        if ($encoding === 'UTF-8') {
6965 1
            return (string) \mb_substr(
6966 1
                $str,
6967 1
                $offset + (int) \mb_strlen($separator)
6968
            );
6969
        }
6970
6971
        return (string) self::substr(
6972
            $str,
6973
            $offset + (int) self::strlen($separator, $encoding),
6974
            null,
6975
            $encoding
6976
        );
6977
    }
6978
6979
    /**
6980
     * Gets the substring after the last occurrence of a separator.
6981
     *
6982
     * @param string $str       <p>The input string.</p>
6983
     * @param string $separator <p>The string separator.</p>
6984
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6985
     *
6986
     * @psalm-pure
6987
     *
6988
     * @return string
6989
     */
6990 1
    public static function str_isubstr_after_last_separator(
6991
        string $str,
6992
        string $separator,
6993
        string $encoding = 'UTF-8'
6994
    ): string {
6995 1
        if ($separator === '' || $str === '') {
6996 1
            return '';
6997
        }
6998
6999 1
        $offset = self::strripos($str, $separator);
7000 1
        if ($offset === false) {
7001 1
            return '';
7002
        }
7003
7004 1
        if ($encoding === 'UTF-8') {
7005 1
            return (string) \mb_substr(
7006 1
                $str,
7007 1
                $offset + (int) self::strlen($separator)
7008
            );
7009
        }
7010
7011
        return (string) self::substr(
7012
            $str,
7013
            $offset + (int) self::strlen($separator, $encoding),
7014
            null,
7015
            $encoding
7016
        );
7017
    }
7018
7019
    /**
7020
     * Gets the substring before the first occurrence of a separator.
7021
     *
7022
     * @param string $str       <p>The input string.</p>
7023
     * @param string $separator <p>The string separator.</p>
7024
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7025
     *
7026
     * @psalm-pure
7027
     *
7028
     * @return string
7029
     */
7030 1
    public static function str_isubstr_before_first_separator(
7031
        string $str,
7032
        string $separator,
7033
        string $encoding = 'UTF-8'
7034
    ): string {
7035 1
        if ($separator === '' || $str === '') {
7036 1
            return '';
7037
        }
7038
7039 1
        $offset = self::stripos($str, $separator);
7040 1
        if ($offset === false) {
7041 1
            return '';
7042
        }
7043
7044 1
        if ($encoding === 'UTF-8') {
7045 1
            return (string) \mb_substr($str, 0, $offset);
7046
        }
7047
7048
        return (string) self::substr($str, 0, $offset, $encoding);
7049
    }
7050
7051
    /**
7052
     * Gets the substring before the last occurrence of a separator.
7053
     *
7054
     * @param string $str       <p>The input string.</p>
7055
     * @param string $separator <p>The string separator.</p>
7056
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7057
     *
7058
     * @psalm-pure
7059
     *
7060
     * @return string
7061
     */
7062 1
    public static function str_isubstr_before_last_separator(
7063
        string $str,
7064
        string $separator,
7065
        string $encoding = 'UTF-8'
7066
    ): string {
7067 1
        if ($separator === '' || $str === '') {
7068 1
            return '';
7069
        }
7070
7071 1
        if ($encoding === 'UTF-8') {
7072 1
            $offset = \mb_strripos($str, $separator);
7073 1
            if ($offset === false) {
7074 1
                return '';
7075
            }
7076
7077 1
            return (string) \mb_substr($str, 0, $offset);
7078
        }
7079
7080
        $offset = self::strripos($str, $separator, 0, $encoding);
7081
        if ($offset === false) {
7082
            return '';
7083
        }
7084
7085
        return (string) self::substr($str, 0, $offset, $encoding);
7086
    }
7087
7088
    /**
7089
     * Gets the substring after (or before via "$before_needle") the first occurrence of the "$needle".
7090
     *
7091
     * @param string $str           <p>The input string.</p>
7092
     * @param string $needle        <p>The string to look for.</p>
7093
     * @param bool   $before_needle [optional] <p>Default: false</p>
7094
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
7095
     *
7096
     * @psalm-pure
7097
     *
7098
     * @return string
7099
     */
7100 2
    public static function str_isubstr_first(
7101
        string $str,
7102
        string $needle,
7103
        bool $before_needle = false,
7104
        string $encoding = 'UTF-8'
7105
    ): string {
7106
        if (
7107 2
            $needle === ''
7108
            ||
7109 2
            $str === ''
7110
        ) {
7111 2
            return '';
7112
        }
7113
7114 2
        $part = self::stristr(
7115 2
            $str,
7116 2
            $needle,
7117 2
            $before_needle,
7118 2
            $encoding
7119
        );
7120 2
        if ($part === false) {
7121 2
            return '';
7122
        }
7123
7124 2
        return $part;
7125
    }
7126
7127
    /**
7128
     * Gets the substring after (or before via "$before_needle") the last occurrence of the "$needle".
7129
     *
7130
     * @param string $str           <p>The input string.</p>
7131
     * @param string $needle        <p>The string to look for.</p>
7132
     * @param bool   $before_needle [optional] <p>Default: false</p>
7133
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
7134
     *
7135
     * @psalm-pure
7136
     *
7137
     * @return string
7138
     */
7139 1
    public static function str_isubstr_last(
7140
        string $str,
7141
        string $needle,
7142
        bool $before_needle = false,
7143
        string $encoding = 'UTF-8'
7144
    ): string {
7145
        if (
7146 1
            $needle === ''
7147
            ||
7148 1
            $str === ''
7149
        ) {
7150 1
            return '';
7151
        }
7152
7153 1
        $part = self::strrichr(
7154 1
            $str,
7155 1
            $needle,
7156 1
            $before_needle,
7157 1
            $encoding
7158
        );
7159 1
        if ($part === false) {
7160 1
            return '';
7161
        }
7162
7163 1
        return $part;
7164
    }
7165
7166
    /**
7167
     * Returns the last $n characters of the string.
7168
     *
7169
     * @param string $str      <p>The input string.</p>
7170
     * @param int    $n        <p>Number of characters to retrieve from the end.</p>
7171
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7172
     *
7173
     * @psalm-pure
7174
     *
7175
     * @return string
7176
     */
7177 12
    public static function str_last_char(
7178
        string $str,
7179
        int $n = 1,
7180
        string $encoding = 'UTF-8'
7181
    ): string {
7182 12
        if ($str === '' || $n <= 0) {
7183 4
            return '';
7184
        }
7185
7186 8
        if ($encoding === 'UTF-8') {
7187 4
            return (string) \mb_substr($str, -$n);
7188
        }
7189
7190 4
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
7191
7192 4
        return (string) self::substr($str, -$n, null, $encoding);
7193
    }
7194
7195
    /**
7196
     * Limit the number of characters in a string.
7197
     *
7198
     * @param string $str        <p>The input string.</p>
7199
     * @param int    $length     [optional] <p>Default: 100</p>
7200
     * @param string $str_add_on [optional] <p>Default: …</p>
7201
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
7202
     *
7203
     * @psalm-pure
7204
     *
7205
     * @return string
7206
     */
7207 2
    public static function str_limit(
7208
        string $str,
7209
        int $length = 100,
7210
        string $str_add_on = '…',
7211
        string $encoding = 'UTF-8'
7212
    ): string {
7213 2
        if ($str === '' || $length <= 0) {
7214 2
            return '';
7215
        }
7216
7217 2
        if ($encoding === 'UTF-8') {
7218 2
            if ((int) \mb_strlen($str) <= $length) {
7219 2
                return $str;
7220
            }
7221
7222
            /** @noinspection UnnecessaryCastingInspection */
7223 2
            return (string) \mb_substr($str, 0, $length - (int) self::strlen($str_add_on)) . $str_add_on;
7224
        }
7225
7226
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
7227
7228
        if ((int) self::strlen($str, $encoding) <= $length) {
7229
            return $str;
7230
        }
7231
7232
        return ((string) self::substr($str, 0, $length - (int) self::strlen($str_add_on), $encoding)) . $str_add_on;
7233
    }
7234
7235
    /**
7236
     * Limit the number of characters in a string, but also after the next word.
7237
     *
7238
     * EXAMPLE: <code>UTF8::str_limit_after_word('fòô bàř fòô', 8, ''); // 'fòô bàř'</code>
7239
     *
7240
     * @param string $str        <p>The input string.</p>
7241
     * @param int    $length     [optional] <p>Default: 100</p>
7242
     * @param string $str_add_on [optional] <p>Default: …</p>
7243
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
7244
     *
7245
     * @psalm-pure
7246
     *
7247
     * @return string
7248
     */
7249 6
    public static function str_limit_after_word(
7250
        string $str,
7251
        int $length = 100,
7252
        string $str_add_on = '…',
7253
        string $encoding = 'UTF-8'
7254
    ): string {
7255 6
        if ($str === '' || $length <= 0) {
7256 2
            return '';
7257
        }
7258
7259 6
        if ($encoding === 'UTF-8') {
7260
            /** @noinspection UnnecessaryCastingInspection */
7261 2
            if ((int) \mb_strlen($str) <= $length) {
7262 2
                return $str;
7263
            }
7264
7265 2
            if (\mb_substr($str, $length - 1, 1) === ' ') {
7266 2
                return ((string) \mb_substr($str, 0, $length - 1)) . $str_add_on;
7267
            }
7268
7269 2
            $str = \mb_substr($str, 0, $length);
7270
7271 2
            $array = \explode(' ', $str, -1);
7272 2
            $new_str = \implode(' ', $array);
7273
7274 2
            if ($new_str === '') {
7275 2
                return ((string) \mb_substr($str, 0, $length - 1)) . $str_add_on;
7276
            }
7277
        } else {
7278 4
            if ((int) self::strlen($str, $encoding) <= $length) {
7279
                return $str;
7280
            }
7281
7282 4
            if (self::substr($str, $length - 1, 1, $encoding) === ' ') {
7283 3
                return ((string) self::substr($str, 0, $length - 1, $encoding)) . $str_add_on;
7284
            }
7285
7286
            /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
7287 1
            $str = self::substr($str, 0, $length, $encoding);
7288
            /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
7289 1
            if ($str === false) {
7290
                return '' . $str_add_on;
7291
            }
7292
7293 1
            $array = \explode(' ', $str, -1);
7294 1
            $new_str = \implode(' ', $array);
7295
7296 1
            if ($new_str === '') {
7297
                return ((string) self::substr($str, 0, $length - 1, $encoding)) . $str_add_on;
7298
            }
7299
        }
7300
7301 3
        return $new_str . $str_add_on;
7302
    }
7303
7304
    /**
7305
     * Returns the longest common prefix between the $str1 and $str2.
7306
     *
7307
     * @param string $str1     <p>The input sting.</p>
7308
     * @param string $str2     <p>Second string for comparison.</p>
7309
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7310
     *
7311
     * @psalm-pure
7312
     *
7313
     * @return string
7314
     */
7315 10
    public static function str_longest_common_prefix(
7316
        string $str1,
7317
        string $str2,
7318
        string $encoding = 'UTF-8'
7319
    ): string {
7320
        // init
7321 10
        $longest_common_prefix = '';
7322
7323 10
        if ($encoding === 'UTF-8') {
7324 5
            $max_length = (int) \min(
7325 5
                \mb_strlen($str1),
7326 5
                \mb_strlen($str2)
7327
            );
7328
7329 5
            for ($i = 0; $i < $max_length; ++$i) {
7330 4
                $char = \mb_substr($str1, $i, 1);
7331
7332
                if (
7333 4
                    $char !== false
7334
                    &&
7335 4
                    $char === \mb_substr($str2, $i, 1)
7336
                ) {
7337 3
                    $longest_common_prefix .= $char;
7338
                } else {
7339 3
                    break;
7340
                }
7341
            }
7342
        } else {
7343 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7344
7345 5
            $max_length = (int) \min(
7346 5
                self::strlen($str1, $encoding),
7347 5
                self::strlen($str2, $encoding)
7348
            );
7349
7350 5
            for ($i = 0; $i < $max_length; ++$i) {
7351 4
                $char = self::substr($str1, $i, 1, $encoding);
7352
7353
                if (
7354 4
                    $char !== false
7355
                    &&
7356 4
                    $char === self::substr($str2, $i, 1, $encoding)
7357
                ) {
7358 3
                    $longest_common_prefix .= $char;
7359
                } else {
7360 3
                    break;
7361
                }
7362
            }
7363
        }
7364
7365 10
        return $longest_common_prefix;
7366
    }
7367
7368
    /**
7369
     * Returns the longest common substring between the $str1 and $str2.
7370
     * In the case of ties, it returns that which occurs first.
7371
     *
7372
     * @param string $str1
7373
     * @param string $str2     <p>Second string for comparison.</p>
7374
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7375
     *
7376
     * @psalm-pure
7377
     *
7378
     * @return string
7379
     *                <p>A string with its $str being the longest common substring.</p>
7380
     */
7381 11
    public static function str_longest_common_substring(
7382
        string $str1,
7383
        string $str2,
7384
        string $encoding = 'UTF-8'
7385
    ): string {
7386 11
        if ($str1 === '' || $str2 === '') {
7387 2
            return '';
7388
        }
7389
7390
        // Uses dynamic programming to solve
7391
        // http://en.wikipedia.org/wiki/Longest_common_substring_problem
7392
7393 9
        if ($encoding === 'UTF-8') {
7394 4
            $str_length = (int) \mb_strlen($str1);
7395 4
            $other_length = (int) \mb_strlen($str2);
7396
        } else {
7397 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7398
7399 5
            $str_length = (int) self::strlen($str1, $encoding);
7400 5
            $other_length = (int) self::strlen($str2, $encoding);
7401
        }
7402
7403
        // Return if either string is empty
7404 9
        if ($str_length === 0 || $other_length === 0) {
7405
            return '';
7406
        }
7407
7408 9
        $len = 0;
7409 9
        $end = 0;
7410 9
        $table = \array_fill(
7411 9
            0,
7412 9
            $str_length + 1,
7413 9
            \array_fill(0, $other_length + 1, 0)
7414
        );
7415
7416 9
        if ($encoding === 'UTF-8') {
7417 9
            for ($i = 1; $i <= $str_length; ++$i) {
7418 9
                for ($j = 1; $j <= $other_length; ++$j) {
7419 9
                    $str_char = \mb_substr($str1, $i - 1, 1);
7420 9
                    $other_char = \mb_substr($str2, $j - 1, 1);
7421
7422 9
                    if ($str_char === $other_char) {
7423 8
                        $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
7424 8
                        if ($table[$i][$j] > $len) {
7425 8
                            $len = $table[$i][$j];
7426 8
                            $end = $i;
7427
                        }
7428
                    } else {
7429 9
                        $table[$i][$j] = 0;
7430
                    }
7431
                }
7432
            }
7433
        } else {
7434
            for ($i = 1; $i <= $str_length; ++$i) {
7435
                for ($j = 1; $j <= $other_length; ++$j) {
7436
                    $str_char = self::substr($str1, $i - 1, 1, $encoding);
7437
                    $other_char = self::substr($str2, $j - 1, 1, $encoding);
7438
7439
                    if ($str_char === $other_char) {
7440
                        $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
7441
                        if ($table[$i][$j] > $len) {
7442
                            $len = $table[$i][$j];
7443
                            $end = $i;
7444
                        }
7445
                    } else {
7446
                        $table[$i][$j] = 0;
7447
                    }
7448
                }
7449
            }
7450
        }
7451
7452 9
        if ($encoding === 'UTF-8') {
7453 9
            return (string) \mb_substr($str1, $end - $len, $len);
7454
        }
7455
7456
        return (string) self::substr($str1, $end - $len, $len, $encoding);
7457
    }
7458
7459
    /**
7460
     * Returns the longest common suffix between the $str1 and $str2.
7461
     *
7462
     * @param string $str1
7463
     * @param string $str2     <p>Second string for comparison.</p>
7464
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7465
     *
7466
     * @psalm-pure
7467
     *
7468
     * @return string
7469
     */
7470 10
    public static function str_longest_common_suffix(
7471
        string $str1,
7472
        string $str2,
7473
        string $encoding = 'UTF-8'
7474
    ): string {
7475 10
        if ($str1 === '' || $str2 === '') {
7476 2
            return '';
7477
        }
7478
7479 8
        if ($encoding === 'UTF-8') {
7480 4
            $max_length = (int) \min(
7481 4
                \mb_strlen($str1, $encoding),
7482 4
                \mb_strlen($str2, $encoding)
7483
            );
7484
7485 4
            $longest_common_suffix = '';
7486 4
            for ($i = 1; $i <= $max_length; ++$i) {
7487 4
                $char = \mb_substr($str1, -$i, 1);
7488
7489
                if (
7490 4
                    $char !== false
7491
                    &&
7492 4
                    $char === \mb_substr($str2, -$i, 1)
7493
                ) {
7494 3
                    $longest_common_suffix = $char . $longest_common_suffix;
7495
                } else {
7496 3
                    break;
7497
                }
7498
            }
7499
        } else {
7500 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7501
7502 4
            $max_length = (int) \min(
7503 4
                self::strlen($str1, $encoding),
7504 4
                self::strlen($str2, $encoding)
7505
            );
7506
7507 4
            $longest_common_suffix = '';
7508 4
            for ($i = 1; $i <= $max_length; ++$i) {
7509 4
                $char = self::substr($str1, -$i, 1, $encoding);
7510
7511
                if (
7512 4
                    $char !== false
7513
                    &&
7514 4
                    $char === self::substr($str2, -$i, 1, $encoding)
7515
                ) {
7516 3
                    $longest_common_suffix = $char . $longest_common_suffix;
7517
                } else {
7518 3
                    break;
7519
                }
7520
            }
7521
        }
7522
7523 8
        return $longest_common_suffix;
7524
    }
7525
7526
    /**
7527
     * Returns true if $str matches the supplied pattern, false otherwise.
7528
     *
7529
     * @param string $str     <p>The input string.</p>
7530
     * @param string $pattern <p>Regex pattern to match against.</p>
7531
     *
7532
     * @psalm-pure
7533
     *
7534
     * @return bool
7535
     *              <p>Whether or not $str matches the pattern.</p>
7536
     */
7537 10
    public static function str_matches_pattern(string $str, string $pattern): bool
7538
    {
7539 10
        return (bool) \preg_match('/' . $pattern . '/u', $str);
7540
    }
7541
7542
    /**
7543
     * Returns whether or not a character exists at an index. Offsets may be
7544
     * negative to count from the last character in the string. Implements
7545
     * part of the ArrayAccess interface.
7546
     *
7547
     * @param string $str      <p>The input string.</p>
7548
     * @param int    $offset   <p>The index to check.</p>
7549
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7550
     *
7551
     * @psalm-pure
7552
     *
7553
     * @return bool
7554
     *              <p>Whether or not the index exists.</p>
7555
     */
7556 6
    public static function str_offset_exists(string $str, int $offset, string $encoding = 'UTF-8'): bool
7557
    {
7558
        // init
7559 6
        $length = (int) self::strlen($str, $encoding);
7560
7561 6
        if ($offset >= 0) {
7562 3
            return $length > $offset;
7563
        }
7564
7565 3
        return $length >= \abs($offset);
7566
    }
7567
7568
    /**
7569
     * Returns the character at the given index. Offsets may be negative to
7570
     * count from the last character in the string. Implements part of the
7571
     * ArrayAccess interface, and throws an OutOfBoundsException if the index
7572
     * does not exist.
7573
     *
7574
     * @param string $str      <p>The input string.</p>
7575
     * @param int    $index    <p>The <strong>index</strong> from which to retrieve the char.</p>
7576
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7577
     *
7578
     * @throws \OutOfBoundsException if the positive or negative offset does not exist
7579
     *
7580
     * @return string
7581
     *                <p>The character at the specified index.</p>
7582
     *
7583
     * @psalm-pure
7584
     */
7585 2
    public static function str_offset_get(string $str, int $index, string $encoding = 'UTF-8'): string
7586
    {
7587
        // init
7588 2
        $length = (int) self::strlen($str);
7589
7590
        if (
7591 2
            ($index >= 0 && $length <= $index)
7592
            ||
7593 2
            $length < \abs($index)
7594
        ) {
7595 1
            throw new \OutOfBoundsException('No character exists at the index');
7596
        }
7597
7598 1
        return self::char_at($str, $index, $encoding);
7599
    }
7600
7601
    /**
7602
     * Pad a UTF-8 string to a given length with another string.
7603
     *
7604
     * EXAMPLE: <code>UTF8::str_pad('中文空白', 10, '_', STR_PAD_BOTH); // '___中文空白___'</code>
7605
     *
7606
     * @param string     $str        <p>The input string.</p>
7607
     * @param int        $pad_length <p>The length of return string.</p>
7608
     * @param string     $pad_string [optional] <p>String to use for padding the input string.</p>
7609
     * @param int|string $pad_type   [optional] <p>
7610
     *                               Can be <strong>STR_PAD_RIGHT</strong> (default), [or string "right"]<br>
7611
     *                               <strong>STR_PAD_LEFT</strong> [or string "left"] or<br>
7612
     *                               <strong>STR_PAD_BOTH</strong> [or string "both"]
7613
     *                               </p>
7614
     * @param string     $encoding   [optional] <p>Default: 'UTF-8'</p>
7615
     *
7616
     * @psalm-pure
7617
     *
7618
     * @return string
7619
     *                <p>Returns the padded string.</p>
7620
     */
7621 41
    public static function str_pad(
7622
        string $str,
7623
        int $pad_length,
7624
        string $pad_string = ' ',
7625
        $pad_type = \STR_PAD_RIGHT,
7626
        string $encoding = 'UTF-8'
7627
    ): string {
7628 41
        if ($pad_length === 0 || $pad_string === '') {
7629 1
            return $str;
7630
        }
7631
7632 41
        if ($pad_type !== (int) $pad_type) {
7633 13
            if ($pad_type === 'left') {
7634 3
                $pad_type = \STR_PAD_LEFT;
7635 10
            } elseif ($pad_type === 'right') {
7636 6
                $pad_type = \STR_PAD_RIGHT;
7637 4
            } elseif ($pad_type === 'both') {
7638 3
                $pad_type = \STR_PAD_BOTH;
7639
            } else {
7640 1
                throw new \InvalidArgumentException(
7641 1
                    'Pad expects $pad_type to be "STR_PAD_*" or ' . "to be one of 'left', 'right' or 'both'"
7642
                );
7643
            }
7644
        }
7645
7646 40
        if ($encoding === 'UTF-8') {
7647 25
            $str_length = (int) \mb_strlen($str);
7648
7649 25
            if ($pad_length >= $str_length) {
7650
                switch ($pad_type) {
7651 25
                    case \STR_PAD_LEFT:
7652 8
                        $ps_length = (int) \mb_strlen($pad_string);
7653
7654 8
                        $diff = ($pad_length - $str_length);
7655
7656 8
                        $pre = (string) \mb_substr(
7657 8
                            \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
7658 8
                            0,
7659 8
                            $diff
7660
                        );
7661 8
                        $post = '';
7662
7663 8
                        break;
7664
7665 20
                    case \STR_PAD_BOTH:
7666 14
                        $diff = ($pad_length - $str_length);
7667
7668 14
                        $ps_length_left = (int) \floor($diff / 2);
7669
7670 14
                        $ps_length_right = (int) \ceil($diff / 2);
7671
7672 14
                        $pre = (string) \mb_substr(
7673 14
                            \str_repeat($pad_string, $ps_length_left),
7674 14
                            0,
7675 14
                            $ps_length_left
7676
                        );
7677 14
                        $post = (string) \mb_substr(
7678 14
                            \str_repeat($pad_string, $ps_length_right),
7679 14
                            0,
7680 14
                            $ps_length_right
7681
                        );
7682
7683 14
                        break;
7684
7685 9
                    case \STR_PAD_RIGHT:
7686
                    default:
7687 9
                        $ps_length = (int) \mb_strlen($pad_string);
7688
7689 9
                        $diff = ($pad_length - $str_length);
7690
7691 9
                        $post = (string) \mb_substr(
7692 9
                            \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
7693 9
                            0,
7694 9
                            $diff
7695
                        );
7696 9
                        $pre = '';
7697
                }
7698
7699 25
                return $pre . $str . $post;
7700
            }
7701
7702 3
            return $str;
7703
        }
7704
7705 15
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
7706
7707 15
        $str_length = (int) self::strlen($str, $encoding);
7708
7709 15
        if ($pad_length >= $str_length) {
7710
            switch ($pad_type) {
7711 14
                case \STR_PAD_LEFT:
7712 5
                    $ps_length = (int) self::strlen($pad_string, $encoding);
7713
7714 5
                    $diff = ($pad_length - $str_length);
7715
7716 5
                    $pre = (string) self::substr(
7717 5
                        \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
7718 5
                        0,
7719 5
                        $diff,
7720 5
                        $encoding
7721
                    );
7722 5
                    $post = '';
7723
7724 5
                    break;
7725
7726 9
                case \STR_PAD_BOTH:
7727 3
                    $diff = ($pad_length - $str_length);
7728
7729 3
                    $ps_length_left = (int) \floor($diff / 2);
7730
7731 3
                    $ps_length_right = (int) \ceil($diff / 2);
7732
7733 3
                    $pre = (string) self::substr(
7734 3
                        \str_repeat($pad_string, $ps_length_left),
7735 3
                        0,
7736 3
                        $ps_length_left,
7737 3
                        $encoding
7738
                    );
7739 3
                    $post = (string) self::substr(
7740 3
                        \str_repeat($pad_string, $ps_length_right),
7741 3
                        0,
7742 3
                        $ps_length_right,
7743 3
                        $encoding
7744
                    );
7745
7746 3
                    break;
7747
7748 6
                case \STR_PAD_RIGHT:
7749
                default:
7750 6
                    $ps_length = (int) self::strlen($pad_string, $encoding);
7751
7752 6
                    $diff = ($pad_length - $str_length);
7753
7754 6
                    $post = (string) self::substr(
7755 6
                        \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
7756 6
                        0,
7757 6
                        $diff,
7758 6
                        $encoding
7759
                    );
7760 6
                    $pre = '';
7761
            }
7762
7763 14
            return $pre . $str . $post;
7764
        }
7765
7766 1
        return $str;
7767
    }
7768
7769
    /**
7770
     * Returns a new string of a given length such that both sides of the
7771
     * string are padded. Alias for "UTF8::str_pad()" with a $pad_type of 'both'.
7772
     *
7773
     * @param string $str
7774
     * @param int    $length   <p>Desired string length after padding.</p>
7775
     * @param string $pad_str  [optional] <p>String used to pad, defaults to space. Default: ' '</p>
7776
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7777
     *
7778
     * @psalm-pure
7779
     *
7780
     * @return string
7781
     *                <p>The string with padding applied.</p>
7782
     */
7783 11
    public static function str_pad_both(
7784
        string $str,
7785
        int $length,
7786
        string $pad_str = ' ',
7787
        string $encoding = 'UTF-8'
7788
    ): string {
7789 11
        return self::str_pad(
7790 11
            $str,
7791 11
            $length,
7792 11
            $pad_str,
7793 11
            \STR_PAD_BOTH,
7794 11
            $encoding
7795
        );
7796
    }
7797
7798
    /**
7799
     * Returns a new string of a given length such that the beginning of the
7800
     * string is padded. Alias for "UTF8::str_pad()" with a $pad_type of 'left'.
7801
     *
7802
     * @param string $str
7803
     * @param int    $length   <p>Desired string length after padding.</p>
7804
     * @param string $pad_str  [optional] <p>String used to pad, defaults to space. Default: ' '</p>
7805
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7806
     *
7807
     * @psalm-pure
7808
     *
7809
     * @return string
7810
     *                <p>The string with left padding.</p>
7811
     */
7812 7
    public static function str_pad_left(
7813
        string $str,
7814
        int $length,
7815
        string $pad_str = ' ',
7816
        string $encoding = 'UTF-8'
7817
    ): string {
7818 7
        return self::str_pad(
7819 7
            $str,
7820 7
            $length,
7821 7
            $pad_str,
7822 7
            \STR_PAD_LEFT,
7823 7
            $encoding
7824
        );
7825
    }
7826
7827
    /**
7828
     * Returns a new string of a given length such that the end of the string
7829
     * is padded. Alias for "UTF8::str_pad()" with a $pad_type of 'right'.
7830
     *
7831
     * @param string $str
7832
     * @param int    $length   <p>Desired string length after padding.</p>
7833
     * @param string $pad_str  [optional] <p>String used to pad, defaults to space. Default: ' '</p>
7834
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7835
     *
7836
     * @psalm-pure
7837
     *
7838
     * @return string
7839
     *                <p>The string with right padding.</p>
7840
     */
7841 7
    public static function str_pad_right(
7842
        string $str,
7843
        int $length,
7844
        string $pad_str = ' ',
7845
        string $encoding = 'UTF-8'
7846
    ): string {
7847 7
        return self::str_pad(
7848 7
            $str,
7849 7
            $length,
7850 7
            $pad_str,
7851 7
            \STR_PAD_RIGHT,
7852 7
            $encoding
7853
        );
7854
    }
7855
7856
    /**
7857
     * Repeat a string.
7858
     *
7859
     * EXAMPLE: <code>UTF8::str_repeat("°~\xf0\x90\x28\xbc", 2); // '°~ð(¼°~ð(¼'</code>
7860
     *
7861
     * @param string $str        <p>
7862
     *                           The string to be repeated.
7863
     *                           </p>
7864
     * @param int    $multiplier <p>
7865
     *                           Number of time the input string should be
7866
     *                           repeated.
7867
     *                           </p>
7868
     *                           <p>
7869
     *                           multiplier has to be greater than or equal to 0.
7870
     *                           If the multiplier is set to 0, the function
7871
     *                           will return an empty string.
7872
     *                           </p>
7873
     *
7874
     * @psalm-pure
7875
     *
7876
     * @return string
7877
     *                <p>The repeated string.</p>
7878
     */
7879 9
    public static function str_repeat(string $str, int $multiplier): string
7880
    {
7881 9
        $str = self::filter($str);
7882
7883 9
        return \str_repeat($str, $multiplier);
7884
    }
7885
7886
    /**
7887
     * INFO: This is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe.
7888
     *
7889
     * Replace all occurrences of the search string with the replacement string
7890
     *
7891
     * @see http://php.net/manual/en/function.str-replace.php
7892
     *
7893
     * @param string|string[] $search  <p>
7894
     *                                 The value being searched for, otherwise known as the needle.
7895
     *                                 An array may be used to designate multiple needles.
7896
     *                                 </p>
7897
     * @param string|string[] $replace <p>
7898
     *                                 The replacement value that replaces found search
7899
     *                                 values. An array may be used to designate multiple replacements.
7900
     *                                 </p>
7901
     * @param string|string[] $subject <p>
7902
     *                                 The string or array of strings being searched and replaced on,
7903
     *                                 otherwise known as the haystack.
7904
     *                                 </p>
7905
     *                                 <p>
7906
     *                                 If subject is an array, then the search and
7907
     *                                 replace is performed with every entry of
7908
     *                                 subject, and the return value is an array as
7909
     *                                 well.
7910
     *                                 </p>
7911
     * @param int|null        $count   [optional] <p>
7912
     *                                 If passed, this will hold the number of matched and replaced needles.
7913
     *                                 </p>
7914
     *
7915
     * @psalm-pure
7916
     *
7917
     * @return string|string[]
7918
     *                         <p>This function returns a string or an array with the replaced values.</p>
7919
     *
7920
     * @template TStrReplaceSubject
7921
     * @phpstan-param TStrReplaceSubject $subject
7922
     * @phpstan-return TStrReplaceSubject
7923
     *
7924
     * @deprecated please use \str_replace() instead
7925
     */
7926 12
    public static function str_replace(
7927
        $search,
7928
        $replace,
7929
        $subject,
7930
        int &$count = null
7931
    ) {
7932
        /**
7933
         * @psalm-suppress PossiblyNullArgument
7934
         * @phpstan-var TStrReplaceSubject $return;
7935
         */
7936 12
        $return = \str_replace(
7937 12
            $search,
7938 12
            $replace,
7939 12
            $subject,
7940 12
            $count
7941
        );
7942
7943 12
        return $return;
7944
    }
7945
7946
    /**
7947
     * Replaces $search from the beginning of string with $replacement.
7948
     *
7949
     * @param string $str         <p>The input string.</p>
7950
     * @param string $search      <p>The string to search for.</p>
7951
     * @param string $replacement <p>The replacement.</p>
7952
     *
7953
     * @psalm-pure
7954
     *
7955
     * @return string
7956
     *                <p>A string after the replacements.</p>
7957
     */
7958 17
    public static function str_replace_beginning(
7959
        string $str,
7960
        string $search,
7961
        string $replacement
7962
    ): string {
7963 17
        if ($str === '') {
7964 4
            if ($replacement === '') {
7965 2
                return '';
7966
            }
7967
7968 2
            if ($search === '') {
7969 2
                return $replacement;
7970
            }
7971
        }
7972
7973 13
        if ($search === '') {
7974 2
            return $str . $replacement;
7975
        }
7976
7977 11
        $searchLength = \strlen($search);
7978 11
        if (\strncmp($str, $search, $searchLength) === 0) {
7979 9
            return $replacement . \substr($str, $searchLength);
7980
        }
7981
7982 2
        return $str;
7983
    }
7984
7985
    /**
7986
     * Replaces $search from the ending of string with $replacement.
7987
     *
7988
     * @param string $str         <p>The input string.</p>
7989
     * @param string $search      <p>The string to search for.</p>
7990
     * @param string $replacement <p>The replacement.</p>
7991
     *
7992
     * @psalm-pure
7993
     *
7994
     * @return string
7995
     *                <p>A string after the replacements.</p>
7996
     */
7997 17
    public static function str_replace_ending(
7998
        string $str,
7999
        string $search,
8000
        string $replacement
8001
    ): string {
8002 17
        if ($str === '') {
8003 4
            if ($replacement === '') {
8004 2
                return '';
8005
            }
8006
8007 2
            if ($search === '') {
8008 2
                return $replacement;
8009
            }
8010
        }
8011
8012 13
        if ($search === '') {
8013 2
            return $str . $replacement;
8014
        }
8015
8016 11
        if (\strpos($str, $search, \strlen($str) - \strlen($search)) !== false) {
8017 8
            $str = \substr($str, 0, -\strlen($search)) . $replacement;
8018
        }
8019
8020 11
        return $str;
8021
    }
8022
8023
    /**
8024
     * Replace the first "$search"-term with the "$replace"-term.
8025
     *
8026
     * @param string $search
8027
     * @param string $replace
8028
     * @param string $subject
8029
     *
8030
     * @psalm-pure
8031
     *
8032
     * @return string
8033
     *
8034
     * @psalm-suppress InvalidReturnType
8035
     */
8036 2
    public static function str_replace_first(
8037
        string $search,
8038
        string $replace,
8039
        string $subject
8040
    ): string {
8041 2
        $pos = self::strpos($subject, $search);
8042
8043 2
        if ($pos !== false) {
8044
            /**
8045
             * @psalm-suppress InvalidReturnStatement
8046
             */
8047 2
            return self::substr_replace(
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr_repl...)self::strlen($search)) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
8048 2
                $subject,
8049 2
                $replace,
8050 2
                $pos,
8051 2
                (int) self::strlen($search)
8052
            );
8053
        }
8054
8055 2
        return $subject;
8056
    }
8057
8058
    /**
8059
     * Replace the last "$search"-term with the "$replace"-term.
8060
     *
8061
     * @param string $search
8062
     * @param string $replace
8063
     * @param string $subject
8064
     *
8065
     * @psalm-pure
8066
     *
8067
     * @return string
8068
     *
8069
     * @psalm-suppress InvalidReturnType
8070
     */
8071 2
    public static function str_replace_last(
8072
        string $search,
8073
        string $replace,
8074
        string $subject
8075
    ): string {
8076 2
        $pos = self::strrpos($subject, $search);
8077 2
        if ($pos !== false) {
8078
            /**
8079
             * @psalm-suppress InvalidReturnStatement
8080
             */
8081 2
            return self::substr_replace(
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr_repl...)self::strlen($search)) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
8082 2
                $subject,
8083 2
                $replace,
8084 2
                $pos,
8085 2
                (int) self::strlen($search)
8086
            );
8087
        }
8088
8089 2
        return $subject;
8090
    }
8091
8092
    /**
8093
     * Shuffles all the characters in the string.
8094
     *
8095
     * INFO: uses random algorithm which is weak for cryptography purposes
8096
     *
8097
     * EXAMPLE: <code>UTF8::str_shuffle('fòô bàř fòô'); // 'àòôřb ffòô '</code>
8098
     *
8099
     * @param string $str      <p>The input string</p>
8100
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8101
     *
8102
     * @return string
8103
     *                <p>The shuffled string.</p>
8104
     */
8105 5
    public static function str_shuffle(string $str, string $encoding = 'UTF-8'): string
8106
    {
8107 5
        if ($encoding === 'UTF-8') {
8108 5
            $indexes = \range(0, (int) \mb_strlen($str) - 1);
8109
            /** @noinspection NonSecureShuffleUsageInspection */
8110 5
            \shuffle($indexes);
8111
8112
            // init
8113 5
            $shuffled_str = '';
8114
8115 5
            foreach ($indexes as &$i) {
8116 5
                $tmp_sub_str = \mb_substr($str, $i, 1);
8117 5
                if ($tmp_sub_str !== false) {
8118 5
                    $shuffled_str .= $tmp_sub_str;
8119
                }
8120
            }
8121
        } else {
8122
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8123
8124
            $indexes = \range(0, (int) self::strlen($str, $encoding) - 1);
8125
            /** @noinspection NonSecureShuffleUsageInspection */
8126
            \shuffle($indexes);
8127
8128
            // init
8129
            $shuffled_str = '';
8130
8131
            foreach ($indexes as &$i) {
8132
                $tmp_sub_str = self::substr($str, $i, 1, $encoding);
8133
                if ($tmp_sub_str !== false) {
8134
                    $shuffled_str .= $tmp_sub_str;
8135
                }
8136
            }
8137
        }
8138
8139 5
        return $shuffled_str;
8140
    }
8141
8142
    /**
8143
     * Returns the substring beginning at $start, and up to, but not including
8144
     * the index specified by $end. If $end is omitted, the function extracts
8145
     * the remaining string. If $end is negative, it is computed from the end
8146
     * of the string.
8147
     *
8148
     * @param string   $str
8149
     * @param int      $start    <p>Initial index from which to begin extraction.</p>
8150
     * @param int|null $end      [optional] <p>Index at which to end extraction. Default: null</p>
8151
     * @param string   $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8152
     *
8153
     * @psalm-pure
8154
     *
8155
     * @return false|string
8156
     *                      <p>The extracted substring.</p><p>If <i>str</i> is shorter than <i>start</i>
8157
     *                      characters long, <b>FALSE</b> will be returned.
8158
     */
8159 18
    public static function str_slice(
8160
        string $str,
8161
        int $start,
8162
        int $end = null,
8163
        string $encoding = 'UTF-8'
8164
    ) {
8165 18
        if ($encoding === 'UTF-8') {
8166 7
            if ($end === null) {
8167 1
                $length = (int) \mb_strlen($str);
8168 6
            } elseif ($end >= 0 && $end <= $start) {
8169 2
                return '';
8170 4
            } elseif ($end < 0) {
8171 1
                $length = (int) \mb_strlen($str) + $end - $start;
8172
            } else {
8173 3
                $length = $end - $start;
8174
            }
8175
8176 5
            return \mb_substr($str, $start, $length);
8177
        }
8178
8179 11
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
8180
8181 11
        if ($end === null) {
8182 5
            $length = (int) self::strlen($str, $encoding);
8183 6
        } elseif ($end >= 0 && $end <= $start) {
8184 2
            return '';
8185 4
        } elseif ($end < 0) {
8186 1
            $length = (int) self::strlen($str, $encoding) + $end - $start;
8187
        } else {
8188 3
            $length = $end - $start;
8189
        }
8190
8191 9
        return self::substr($str, $start, $length, $encoding);
8192
    }
8193
8194
    /**
8195
     * Convert a string to e.g.: "snake_case"
8196
     *
8197
     * @param string $str
8198
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8199
     *
8200
     * @psalm-pure
8201
     *
8202
     * @return string
8203
     *                <p>A string in snake_case.</p>
8204
     */
8205 22
    public static function str_snakeize(string $str, string $encoding = 'UTF-8'): string
8206
    {
8207 22
        if ($str === '') {
8208
            return '';
8209
        }
8210
8211 22
        $str = \str_replace(
8212 22
            '-',
8213 22
            '_',
8214 22
            self::normalize_whitespace($str)
8215
        );
8216
8217 22
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8218 19
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8219
        }
8220
8221 22
        $str = (string) \preg_replace_callback(
8222 22
            '/([\\p{N}|\\p{Lu}])/u',
8223
            /**
8224
             * @param string[] $matches
8225
             *
8226
             * @psalm-pure
8227
             *
8228
             * @return string
8229
             */
8230
            static function (array $matches) use ($encoding): string {
8231 9
                $match = $matches[1];
8232 9
                $match_int = (int) $match;
8233
8234 9
                if ((string) $match_int === $match) {
8235 4
                    return '_' . $match . '_';
8236
                }
8237
8238 5
                if ($encoding === 'UTF-8') {
8239 5
                    return '_' . \mb_strtolower($match);
8240
                }
8241
8242
                return '_' . self::strtolower($match, $encoding);
8243 22
            },
8244 22
            $str
8245
        );
8246
8247 22
        $str = (string) \preg_replace(
8248
            [
8249 22
                '/\\s+/u',           // convert spaces to "_"
8250
                '/^\\s+|\\s+$/u', // trim leading & trailing spaces
8251
                '/_+/',                 // remove double "_"
8252
            ],
8253
            [
8254 22
                '_',
8255
                '',
8256
                '_',
8257
            ],
8258 22
            $str
8259
        );
8260
8261 22
        return \trim(\trim($str, '_')); // trim leading & trailing "_" + whitespace
8262
    }
8263
8264
    /**
8265
     * Sort all characters according to code points.
8266
     *
8267
     * EXAMPLE: <code>UTF8::str_sort('  -ABC-中文空白-  '); // '    ---ABC中文白空'</code>
8268
     *
8269
     * @param string $str    <p>A UTF-8 string.</p>
8270
     * @param bool   $unique <p>Sort unique. If <strong>true</strong>, repeated characters are ignored.</p>
8271
     * @param bool   $desc   <p>If <strong>true</strong>, will sort characters in reverse code point order.</p>
8272
     *
8273
     * @psalm-pure
8274
     *
8275
     * @return string
8276
     *                <p>A string of sorted characters.</p>
8277
     */
8278 2
    public static function str_sort(string $str, bool $unique = false, bool $desc = false): string
8279
    {
8280 2
        $array = self::codepoints($str);
8281
8282 2
        if ($unique) {
8283 2
            $array = \array_flip(\array_flip($array));
8284
        }
8285
8286 2
        if ($desc) {
8287 2
            \arsort($array);
0 ignored issues
show
Bug introduced by
It seems like $array can also be of type null; however, parameter $array of arsort() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8287
            \arsort(/** @scrutinizer ignore-type */ $array);
Loading history...
8288
        } else {
8289 2
            \asort($array);
0 ignored issues
show
Bug introduced by
It seems like $array can also be of type null; however, parameter $array of asort() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8289
            \asort(/** @scrutinizer ignore-type */ $array);
Loading history...
8290
        }
8291
8292 2
        return self::string($array);
8293
    }
8294
8295
    /**
8296
     * Convert a string to an array of Unicode characters.
8297
     *
8298
     * EXAMPLE: <code>
8299
     * UTF8::str_split_array(['中文空白', 'test'], 2); // [['中文', '空白'], ['te', 'st']]
8300
     * </code>
8301
     *
8302
     * @param int[]|string[] $input                   <p>The string[] or int[] to split into array.</p>
8303
     * @param int            $length                  [optional] <p>Max character length of each array
8304
     *                                                lement.</p>
8305
     * @param bool           $clean_utf8              [optional] <p>Remove non UTF-8 chars from the
8306
     *                                                string.</p>
8307
     * @param bool           $try_to_use_mb_functions [optional] <p>Set to false, if you don't want to use
8308
     *                                                "mb_substr"</p>
8309
     *
8310
     * @psalm-pure
8311
     *
8312
     * @return string[][]
8313
     *                    <p>An array containing chunks of the input.</p>
8314
     */
8315 1
    public static function str_split_array(
8316
        array $input,
8317
        int $length = 1,
8318
        bool $clean_utf8 = false,
8319
        bool $try_to_use_mb_functions = true
8320
    ): array {
8321 1
        foreach ($input as $k => &$v) {
8322 1
            $v = self::str_split(
8323 1
                $v,
8324 1
                $length,
8325 1
                $clean_utf8,
8326 1
                $try_to_use_mb_functions
8327
            );
8328
        }
8329
8330
        /** @var string[][] $input */
8331 1
        return $input;
8332
    }
8333
8334
    /**
8335
     * Convert a string to an array of unicode characters.
8336
     *
8337
     * EXAMPLE: <code>UTF8::str_split('中文空白'); // array('中', '文', '空', '白')</code>
8338
     *
8339
     * @param int|string $input                   <p>The string or int to split into array.</p>
8340
     * @param int        $length                  [optional] <p>Max character length of each array
8341
     *                                            element.</p>
8342
     * @param bool       $clean_utf8              [optional] <p>Remove non UTF-8 chars from the
8343
     *                                            string.</p>
8344
     * @param bool       $try_to_use_mb_functions [optional] <p>Set to false, if you don't want to use
8345
     *                                            "mb_substr"</p>
8346
     *
8347
     * @psalm-pure
8348
     *
8349
     * @return string[]
8350
     *                  <p>An array containing chunks of chars from the input.</p>
8351
     *
8352
     * @noinspection SuspiciousBinaryOperationInspection
8353
     * @noinspection OffsetOperationsInspection
8354
     */
8355 90
    public static function str_split(
8356
        $input,
8357
        int $length = 1,
8358
        bool $clean_utf8 = false,
8359
        bool $try_to_use_mb_functions = true
8360
    ): array {
8361 90
        if ($length <= 0) {
8362 3
            return [];
8363
        }
8364
8365
        // this is only an old fallback
8366
        /** @noinspection PhpSillyAssignmentInspection - hack for phpstan */
8367
        /** @var int|int[]|string|string[] $input */
8368 89
        $input = $input;
8369 89
        if (\is_array($input)) {
8370
            /**
8371
             * @psalm-suppress InvalidReturnStatement
8372
             */
8373
            return self::str_split_array(
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::str_split_a...ry_to_use_mb_functions) returns the type array<mixed,string[]> which is incompatible with the documented return type string[].
Loading history...
8374
                $input,
8375
                $length,
8376
                $clean_utf8,
8377
                $try_to_use_mb_functions
8378
            );
8379
        }
8380
8381
        // init
8382 89
        $input = (string) $input;
8383
8384 89
        if ($input === '') {
8385 14
            return [];
8386
        }
8387
8388 86
        if ($clean_utf8) {
8389 19
            $input = self::clean($input);
8390
        }
8391
8392
        if (
8393 86
            $try_to_use_mb_functions
8394
            &&
8395 86
            self::$SUPPORT['mbstring'] === true
8396
        ) {
8397 82
            if (\function_exists('mb_str_split')) {
8398
                /**
8399
                 * @psalm-suppress ImpureFunctionCall - why?
8400
                 */
8401 82
                $return = \mb_str_split($input, $length);
8402 82
                if ($return !== false) {
8403 82
                    return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
8404
                }
8405
            }
8406
8407
            $i_max = \mb_strlen($input);
8408
            if ($i_max <= 127) {
8409
                $ret = [];
8410
                for ($i = 0; $i < $i_max; ++$i) {
8411
                    $ret[] = \mb_substr($input, $i, 1);
8412
                }
8413
            } else {
8414
                $return_array = [];
8415
                \preg_match_all('/./us', $input, $return_array);
8416
                $ret = $return_array[0] ?? [];
8417
            }
8418 23
        } elseif (self::$SUPPORT['pcre_utf8'] === true) {
8419 17
            $return_array = [];
8420 17
            \preg_match_all('/./us', $input, $return_array);
8421 17
            $ret = $return_array[0] ?? [];
8422
        } else {
8423
8424
            // fallback
8425
8426 8
            $ret = [];
8427 8
            $len = \strlen($input);
8428
8429
            /** @noinspection ForeachInvariantsInspection */
8430 8
            for ($i = 0; $i < $len; ++$i) {
8431 8
                if (($input[$i] & "\x80") === "\x00") {
8432 8
                    $ret[] = $input[$i];
8433
                } elseif (
8434 8
                    isset($input[$i + 1])
8435
                    &&
8436 8
                    ($input[$i] & "\xE0") === "\xC0"
8437
                ) {
8438 4
                    if (($input[$i + 1] & "\xC0") === "\x80") {
8439 4
                        $ret[] = $input[$i] . $input[$i + 1];
8440
8441 4
                        ++$i;
8442
                    }
8443
                } elseif (
8444 6
                    isset($input[$i + 2])
8445
                    &&
8446 6
                    ($input[$i] & "\xF0") === "\xE0"
8447
                ) {
8448
                    if (
8449 6
                        ($input[$i + 1] & "\xC0") === "\x80"
8450
                        &&
8451 6
                        ($input[$i + 2] & "\xC0") === "\x80"
8452
                    ) {
8453 6
                        $ret[] = $input[$i] . $input[$i + 1] . $input[$i + 2];
8454
8455 6
                        $i += 2;
8456
                    }
8457
                } elseif (
8458
                    isset($input[$i + 3])
8459
                    &&
8460
                    ($input[$i] & "\xF8") === "\xF0"
8461
                ) {
8462
                    if (
8463
                        ($input[$i + 1] & "\xC0") === "\x80"
8464
                        &&
8465
                        ($input[$i + 2] & "\xC0") === "\x80"
8466
                        &&
8467
                        ($input[$i + 3] & "\xC0") === "\x80"
8468
                    ) {
8469
                        $ret[] = $input[$i] . $input[$i + 1] . $input[$i + 2] . $input[$i + 3];
8470
8471
                        $i += 3;
8472
                    }
8473
                }
8474
            }
8475
        }
8476
8477 23
        if ($length > 1) {
8478 2
            $ret = \array_chunk($ret, $length);
8479
8480 2
            return \array_map(
8481
                static function (array $item): string {
8482 2
                    return \implode('', $item);
8483 2
                },
8484 2
                $ret
8485
            );
8486
        }
8487
8488 23
        if (isset($ret[0]) && $ret[0] === '') {
8489
            return [];
8490
        }
8491
8492 23
        return $ret;
8493
    }
8494
8495
    /**
8496
     * Splits the string with the provided regular expression, returning an
8497
     * array of strings. An optional integer $limit will truncate the
8498
     * results.
8499
     *
8500
     * @param string $str
8501
     * @param string $pattern <p>The regex with which to split the string.</p>
8502
     * @param int    $limit   [optional] <p>Maximum number of results to return. Default: -1 === no limit</p>
8503
     *
8504
     * @psalm-pure
8505
     *
8506
     * @return string[]
8507
     *                  <p>An array of strings.</p>
8508
     */
8509 16
    public static function str_split_pattern(string $str, string $pattern, int $limit = -1): array
8510
    {
8511 16
        if ($limit === 0) {
8512 2
            return [];
8513
        }
8514
8515 14
        if ($pattern === '') {
8516 1
            return [$str];
8517
        }
8518
8519 13
        if (self::$SUPPORT['mbstring'] === true) {
8520 13
            if ($limit >= 0) {
8521
                /** @noinspection PhpComposerExtensionStubsInspection */
8522 8
                $result_tmp = \mb_split($pattern, $str);
8523
8524 8
                $result = [];
8525 8
                foreach ($result_tmp as $item_tmp) {
8526 8
                    if ($limit === 0) {
8527 4
                        break;
8528
                    }
8529 8
                    --$limit;
8530
8531 8
                    $result[] = $item_tmp;
8532
                }
8533
8534 8
                return $result;
8535
            }
8536
8537
            /** @noinspection PhpComposerExtensionStubsInspection */
8538 5
            return \mb_split($pattern, $str);
8539
        }
8540
8541
        if ($limit > 0) {
8542
            ++$limit;
8543
        } else {
8544
            $limit = -1;
8545
        }
8546
8547
        $array = \preg_split('/' . \preg_quote($pattern, '/') . '/u', $str, $limit);
8548
8549
        if ($array === false) {
8550
            return [];
8551
        }
8552
8553
        if ($limit > 0 && \count($array) === $limit) {
8554
            \array_pop($array);
8555
        }
8556
8557
        return $array;
8558
    }
8559
8560
    /**
8561
     * Check if the string starts with the given substring.
8562
     *
8563
     * EXAMPLE: <code>
8564
     * UTF8::str_starts_with('ΚόσμεMiddleEnd', 'Κόσμε'); // true
8565
     * UTF8::str_starts_with('ΚόσμεMiddleEnd', 'κόσμε'); // false
8566
     * </code>
8567
     *
8568
     * @param string $haystack <p>The string to search in.</p>
8569
     * @param string $needle   <p>The substring to search for.</p>
8570
     *
8571
     * @psalm-pure
8572
     *
8573
     * @return bool
8574
     */
8575 19
    public static function str_starts_with(string $haystack, string $needle): bool
8576
    {
8577 19
        if ($needle === '') {
8578 2
            return true;
8579
        }
8580
8581 19
        if ($haystack === '') {
8582
            return false;
8583
        }
8584
8585 19
        if (\PHP_VERSION_ID >= 80000) {
8586
            /** @phpstan-ignore-next-line - only for PHP8 */
8587
            return \str_starts_with($haystack, $needle);
0 ignored issues
show
Bug introduced by
The function str_starts_with was not found. Maybe you did not declare it correctly or list all dependencies? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

8587
            return /** @scrutinizer ignore-call */ \str_starts_with($haystack, $needle);
Loading history...
8588
        }
8589
8590 19
        return \strncmp($haystack, $needle, \strlen($needle)) === 0;
8591
    }
8592
8593
    /**
8594
     * Returns true if the string begins with any of $substrings, false otherwise.
8595
     *
8596
     * - case-sensitive
8597
     *
8598
     * @param string $str        <p>The input string.</p>
8599
     * @param array  $substrings <p>Substrings to look for.</p>
8600
     *
8601
     * @psalm-pure
8602
     *
8603
     * @return bool
8604
     *              <p>Whether or not $str starts with $substring.</p>
8605
     */
8606 8
    public static function str_starts_with_any(string $str, array $substrings): bool
8607
    {
8608 8
        if ($str === '') {
8609
            return false;
8610
        }
8611
8612 8
        if ($substrings === []) {
8613
            return false;
8614
        }
8615
8616 8
        foreach ($substrings as &$substring) {
8617 8
            if (self::str_starts_with($str, $substring)) {
8618 8
                return true;
8619
            }
8620
        }
8621
8622 6
        return false;
8623
    }
8624
8625
    /**
8626
     * Gets the substring after the first occurrence of a separator.
8627
     *
8628
     * @param string $str       <p>The input string.</p>
8629
     * @param string $separator <p>The string separator.</p>
8630
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
8631
     *
8632
     * @psalm-pure
8633
     *
8634
     * @return string
8635
     */
8636 1
    public static function str_substr_after_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
8637
    {
8638 1
        if ($separator === '' || $str === '') {
8639 1
            return '';
8640
        }
8641
8642 1
        if ($encoding === 'UTF-8') {
8643 1
            $offset = \mb_strpos($str, $separator);
8644 1
            if ($offset === false) {
8645 1
                return '';
8646
            }
8647
8648 1
            return (string) \mb_substr(
8649 1
                $str,
8650 1
                $offset + (int) \mb_strlen($separator)
8651
            );
8652
        }
8653
8654
        $offset = self::strpos($str, $separator, 0, $encoding);
8655
        if ($offset === false) {
8656
            return '';
8657
        }
8658
8659
        return (string) \mb_substr(
8660
            $str,
8661
            $offset + (int) self::strlen($separator, $encoding),
8662
            null,
8663
            $encoding
8664
        );
8665
    }
8666
8667
    /**
8668
     * Gets the substring after the last occurrence of a separator.
8669
     *
8670
     * @param string $str       <p>The input string.</p>
8671
     * @param string $separator <p>The string separator.</p>
8672
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
8673
     *
8674
     * @psalm-pure
8675
     *
8676
     * @return string
8677
     */
8678 1
    public static function str_substr_after_last_separator(
8679
        string $str,
8680
        string $separator,
8681
        string $encoding = 'UTF-8'
8682
    ): string {
8683 1
        if ($separator === '' || $str === '') {
8684 1
            return '';
8685
        }
8686
8687 1
        if ($encoding === 'UTF-8') {
8688 1
            $offset = \mb_strrpos($str, $separator);
8689 1
            if ($offset === false) {
8690 1
                return '';
8691
            }
8692
8693 1
            return (string) \mb_substr(
8694 1
                $str,
8695 1
                $offset + (int) \mb_strlen($separator)
8696
            );
8697
        }
8698
8699
        $offset = self::strrpos($str, $separator, 0, $encoding);
8700
        if ($offset === false) {
8701
            return '';
8702
        }
8703
8704
        return (string) self::substr(
8705
            $str,
8706
            $offset + (int) self::strlen($separator, $encoding),
8707
            null,
8708
            $encoding
8709
        );
8710
    }
8711
8712
    /**
8713
     * Gets the substring before the first occurrence of a separator.
8714
     *
8715
     * @param string $str       <p>The input string.</p>
8716
     * @param string $separator <p>The string separator.</p>
8717
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
8718
     *
8719
     * @psalm-pure
8720
     *
8721
     * @return string
8722
     */
8723 1
    public static function str_substr_before_first_separator(
8724
        string $str,
8725
        string $separator,
8726
        string $encoding = 'UTF-8'
8727
    ): string {
8728 1
        if ($separator === '' || $str === '') {
8729 1
            return '';
8730
        }
8731
8732 1
        if ($encoding === 'UTF-8') {
8733 1
            $offset = \mb_strpos($str, $separator);
8734 1
            if ($offset === false) {
8735 1
                return '';
8736
            }
8737
8738 1
            return (string) \mb_substr(
8739 1
                $str,
8740 1
                0,
8741 1
                $offset
8742
            );
8743
        }
8744
8745
        $offset = self::strpos($str, $separator, 0, $encoding);
8746
        if ($offset === false) {
8747
            return '';
8748
        }
8749
8750
        return (string) self::substr(
8751
            $str,
8752
            0,
8753
            $offset,
8754
            $encoding
8755
        );
8756
    }
8757
8758
    /**
8759
     * Gets the substring before the last occurrence of a separator.
8760
     *
8761
     * @param string $str       <p>The input string.</p>
8762
     * @param string $separator <p>The string separator.</p>
8763
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
8764
     *
8765
     * @psalm-pure
8766
     *
8767
     * @return string
8768
     */
8769 1
    public static function str_substr_before_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
8770
    {
8771 1
        if ($separator === '' || $str === '') {
8772 1
            return '';
8773
        }
8774
8775 1
        if ($encoding === 'UTF-8') {
8776 1
            $offset = \mb_strrpos($str, $separator);
8777 1
            if ($offset === false) {
8778 1
                return '';
8779
            }
8780
8781 1
            return (string) \mb_substr(
8782 1
                $str,
8783 1
                0,
8784 1
                $offset
8785
            );
8786
        }
8787
8788
        $offset = self::strrpos($str, $separator, 0, $encoding);
8789
        if ($offset === false) {
8790
            return '';
8791
        }
8792
8793
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
8794
8795
        return (string) self::substr(
8796
            $str,
8797
            0,
8798
            $offset,
8799
            $encoding
8800
        );
8801
    }
8802
8803
    /**
8804
     * Gets the substring after (or before via "$before_needle") the first occurrence of the "$needle".
8805
     *
8806
     * @param string $str           <p>The input string.</p>
8807
     * @param string $needle        <p>The string to look for.</p>
8808
     * @param bool   $before_needle [optional] <p>Default: false</p>
8809
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
8810
     *
8811
     * @psalm-pure
8812
     *
8813
     * @return string
8814
     */
8815 2
    public static function str_substr_first(
8816
        string $str,
8817
        string $needle,
8818
        bool $before_needle = false,
8819
        string $encoding = 'UTF-8'
8820
    ): string {
8821 2
        if ($str === '' || $needle === '') {
8822 2
            return '';
8823
        }
8824
8825 2
        if ($encoding === 'UTF-8') {
8826 2
            if ($before_needle) {
8827 1
                $part = \mb_strstr(
8828 1
                    $str,
8829 1
                    $needle,
8830 1
                    $before_needle
8831
                );
8832
            } else {
8833 1
                $part = \mb_strstr(
8834 1
                    $str,
8835 2
                    $needle
8836
                );
8837
            }
8838
        } else {
8839
            $part = self::strstr(
8840
                $str,
8841
                $needle,
8842
                $before_needle,
8843
                $encoding
8844
            );
8845
        }
8846
8847 2
        return $part === false ? '' : $part;
8848
    }
8849
8850
    /**
8851
     * Gets the substring after (or before via "$before_needle") the last occurrence of the "$needle".
8852
     *
8853
     * @param string $str           <p>The input string.</p>
8854
     * @param string $needle        <p>The string to look for.</p>
8855
     * @param bool   $before_needle [optional] <p>Default: false</p>
8856
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
8857
     *
8858
     * @psalm-pure
8859
     *
8860
     * @return string
8861
     */
8862 2
    public static function str_substr_last(
8863
        string $str,
8864
        string $needle,
8865
        bool $before_needle = false,
8866
        string $encoding = 'UTF-8'
8867
    ): string {
8868 2
        if ($str === '' || $needle === '') {
8869 2
            return '';
8870
        }
8871
8872 2
        if ($encoding === 'UTF-8') {
8873 2
            if ($before_needle) {
8874 1
                $part = \mb_strrchr(
8875 1
                    $str,
8876 1
                    $needle,
8877 1
                    $before_needle
8878
                );
8879
            } else {
8880 1
                $part = \mb_strrchr(
8881 1
                    $str,
8882 2
                    $needle
8883
                );
8884
            }
8885
        } else {
8886
            $part = self::strrchr(
8887
                $str,
8888
                $needle,
8889
                $before_needle,
8890
                $encoding
8891
            );
8892
        }
8893
8894 2
        return $part === false ? '' : $part;
8895
    }
8896
8897
    /**
8898
     * Surrounds $str with the given substring.
8899
     *
8900
     * @param string $str
8901
     * @param string $substring <p>The substring to add to both sides.</p>
8902
     *
8903
     * @psalm-pure
8904
     *
8905
     * @return string
8906
     *                <p>A string with the substring both prepended and appended.</p>
8907
     */
8908 5
    public static function str_surround(string $str, string $substring): string
8909
    {
8910 5
        return $substring . $str . $substring;
8911
    }
8912
8913
    /**
8914
     * Returns a trimmed string with the first letter of each word capitalized.
8915
     * Also accepts an array, $ignore, allowing you to list words not to be
8916
     * capitalized.
8917
     *
8918
     * @param string              $str
8919
     * @param array|string[]|null $ignore                        [optional] <p>An array of words not to capitalize or
8920
     *                                                           null. Default: null</p>
8921
     * @param string              $encoding                      [optional] <p>Default: 'UTF-8'</p>
8922
     * @param bool                $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the
8923
     *                                                           string.</p>
8924
     * @param string|null         $lang                          [optional] <p>Set the language for special cases: az,
8925
     *                                                           el, lt, tr</p>
8926
     * @param bool                $try_to_keep_the_string_length [optional] <p>true === try to keep the string length:
8927
     *                                                           e.g. ẞ -> ß</p>
8928
     * @param bool                $use_trim_first                [optional] <p>true === trim the input string,
8929
     *                                                           first</p>
8930
     * @param string|null         $word_define_chars             [optional] <p>An string of chars that will be used as
8931
     *                                                           whitespace separator === words.</p>
8932
     *
8933
     * @psalm-pure
8934
     *
8935
     * @return string
8936
     *                <p>The titleized string.</p>
8937
     *
8938
     * @noinspection PhpTooManyParametersInspection
8939
     */
8940 10
    public static function str_titleize(
8941
        string $str,
8942
        array $ignore = null,
8943
        string $encoding = 'UTF-8',
8944
        bool $clean_utf8 = false,
8945
        string $lang = null,
8946
        bool $try_to_keep_the_string_length = false,
8947
        bool $use_trim_first = true,
8948
        string $word_define_chars = null
8949
    ): string {
8950 10
        if ($str === '') {
8951
            return '';
8952
        }
8953
8954 10
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8955 9
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8956
        }
8957
8958 10
        if ($use_trim_first) {
8959 10
            $str = \trim($str);
8960
        }
8961
8962 10
        if ($clean_utf8) {
8963
            $str = self::clean($str);
8964
        }
8965
8966 10
        $use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
8967
8968 10
        if ($word_define_chars) {
8969 4
            $word_define_chars = \preg_quote($word_define_chars, '/');
8970
        } else {
8971 6
            $word_define_chars = '';
8972
        }
8973
8974 10
        $str = (string) \preg_replace_callback(
8975 10
            '/([^\\s' . $word_define_chars . ']+)/u',
8976
            static function (array $match) use ($try_to_keep_the_string_length, $lang, $ignore, $use_mb_functions, $encoding): string {
8977 10
                if ($ignore !== null && \in_array($match[0], $ignore, true)) {
8978 4
                    return $match[0];
8979
                }
8980
8981 10
                if ($use_mb_functions) {
8982 10
                    if ($encoding === 'UTF-8') {
8983 10
                        return \mb_strtoupper(\mb_substr($match[0], 0, 1))
8984 10
                               . \mb_strtolower(\mb_substr($match[0], 1));
8985
                    }
8986
8987
                    return \mb_strtoupper(\mb_substr($match[0], 0, 1, $encoding), $encoding)
8988
                           . \mb_strtolower(\mb_substr($match[0], 1, null, $encoding), $encoding);
8989
                }
8990
8991
                return self::ucfirst(
8992
                    self::strtolower(
8993
                        $match[0],
8994
                        $encoding,
8995
                        false,
8996
                        $lang,
8997
                        $try_to_keep_the_string_length
8998
                    ),
8999
                    $encoding,
9000
                    false,
9001
                    $lang,
9002
                    $try_to_keep_the_string_length
9003
                );
9004 10
            },
9005 10
            $str
9006
        );
9007
9008 10
        return $str;
9009
    }
9010
9011
    /**
9012
     * Convert a string into a obfuscate string.
9013
     *
9014
     * EXAMPLE: <code>
9015
     *
9016
     * UTF8::str_obfuscate('[email protected]', 0.5, '*', ['@', '.']); // e.g. "l***@m**lleke*.*r*"
9017
     * </code>
9018
     *
9019
     * @param string   $str
9020
     * @param float    $percent
9021
     * @param string   $obfuscateChar
9022
     * @param string[] $keepChars
9023
     *
9024
     * @psalm-pure
9025
     *
9026
     * @return string
9027
     *                <p>The obfuscate string.</p>
9028
     */
9029 1
    public static function str_obfuscate(
9030
        string $str,
9031
        float $percent = 0.5,
9032
        string $obfuscateChar = '*',
9033
        array $keepChars = []
9034
    ): string {
9035 1
        $obfuscateCharHelper = "\u{2603}";
9036 1
        $str = \str_replace($obfuscateChar, $obfuscateCharHelper, $str);
9037
9038 1
        $chars = self::chars($str);
9039 1
        $charsMax = \count($chars);
9040 1
        $charsMaxChange = \round($charsMax * $percent);
9041 1
        $charsCounter = 0;
9042 1
        $charKeyDone = [];
9043
9044 1
        while ($charsCounter < $charsMaxChange) {
9045 1
            foreach ($chars as $charKey => $char) {
9046 1
                if (isset($charKeyDone[$charKey])) {
9047 1
                    continue;
9048
                }
9049
9050 1
                if (\random_int(0, 100) > 50) {
9051 1
                    continue;
9052
                }
9053
9054 1
                if ($char === $obfuscateChar) {
9055
                    continue;
9056
                }
9057
9058 1
                ++$charsCounter;
9059 1
                $charKeyDone[$charKey] = true;
9060
9061 1
                if ($charsCounter > $charsMaxChange) {
9062
                    break;
9063
                }
9064
9065 1
                if (\in_array($char, $keepChars, true)) {
9066 1
                    continue;
9067
                }
9068
9069 1
                $chars[$charKey] = $obfuscateChar;
9070
            }
9071
        }
9072
9073 1
        $str = \implode('', $chars);
9074
9075 1
        return \str_replace($obfuscateCharHelper, $obfuscateChar, $str);
9076
    }
9077
9078
    /**
9079
     * Returns a trimmed string in proper title case.
9080
     *
9081
     * Also accepts an array, $ignore, allowing you to list words not to be
9082
     * capitalized.
9083
     *
9084
     * Adapted from John Gruber's script.
9085
     *
9086
     * @see https://gist.github.com/gruber/9f9e8650d68b13ce4d78
9087
     *
9088
     * @param string $str
9089
     * @param array  $ignore   <p>An array of words not to capitalize.</p>
9090
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
9091
     *
9092
     * @psalm-pure
9093
     *
9094
     * @return string
9095
     *                <p>The titleized string.</p>
9096
     */
9097 35
    public static function str_titleize_for_humans(
9098
        string $str,
9099
        array $ignore = [],
9100
        string $encoding = 'UTF-8'
9101
    ): string {
9102 35
        if ($str === '') {
9103
            return '';
9104
        }
9105
9106
        $small_words = [
9107 35
            '(?<!q&)a',
9108
            'an',
9109
            'and',
9110
            'as',
9111
            'at(?!&t)',
9112
            'but',
9113
            'by',
9114
            'en',
9115
            'for',
9116
            'if',
9117
            'in',
9118
            'of',
9119
            'on',
9120
            'or',
9121
            'the',
9122
            'to',
9123
            'v[.]?',
9124
            'via',
9125
            'vs[.]?',
9126
        ];
9127
9128 35
        if ($ignore !== []) {
9129 1
            $small_words = \array_merge($small_words, $ignore);
9130
        }
9131
9132 35
        $small_words_rx = \implode('|', $small_words);
9133 35
        $apostrophe_rx = '(?x: [\'’] [[:lower:]]* )?';
9134
9135 35
        $str = \trim($str);
9136
9137 35
        if (!self::has_lowercase($str)) {
9138 2
            $str = self::strtolower($str, $encoding);
9139
        }
9140
9141
        // the main substitutions
9142
        /** @noinspection RegExpDuplicateAlternationBranch - false-positive - https://youtrack.jetbrains.com/issue/WI-51002 */
9143 35
        $str = (string) \preg_replace_callback(
9144
            '~\\b (_*) (?:                                                           # 1. Leading underscore and
9145
                        ( (?<=[ ][/\\\\]) [[:alpha:]]+ [-_[:alpha:]/\\\\]+ |                # 2. file path or 
9146 35
                          [-_[:alpha:]]+ [@.:] [-_[:alpha:]@.:/]+ ' . $apostrophe_rx . ' )  #    URL, domain, or email
9147
                        |
9148 35
                        ( (?i: ' . $small_words_rx . ' ) ' . $apostrophe_rx . ' )           # 3. or small word (case-insensitive)
9149
                        |
9150 35
                        ( [[:alpha:]] [[:lower:]\'’()\[\]{}]* ' . $apostrophe_rx . ' )     # 4. or word w/o internal caps
9151
                        |
9152 35
                        ( [[:alpha:]] [[:alpha:]\'’()\[\]{}]* ' . $apostrophe_rx . ' )     # 5. or some other word
9153
                      ) (_*) \\b                                                          # 6. With trailing underscore
9154
                    ~ux',
9155
            /**
9156
             * @param string[] $matches
9157
             *
9158
             * @psalm-pure
9159
             *
9160
             * @return string
9161
             */
9162
            static function (array $matches) use ($encoding): string {
9163
                // preserve leading underscore
9164 35
                $str = $matches[1];
9165 35
                if ($matches[2]) {
9166
                    // preserve URLs, domains, emails and file paths
9167 5
                    $str .= $matches[2];
9168 35
                } elseif ($matches[3]) {
9169
                    // lower-case small words
9170 25
                    $str .= self::strtolower($matches[3], $encoding);
9171 35
                } elseif ($matches[4]) {
9172
                    // capitalize word w/o internal caps
9173 34
                    $str .= static::ucfirst($matches[4], $encoding);
9174
                } else {
9175
                    // preserve other kinds of word (iPhone)
9176 7
                    $str .= $matches[5];
9177
                }
9178
                // preserve trailing underscore
9179 35
                $str .= $matches[6];
9180
9181 35
                return $str;
9182 35
            },
9183 35
            $str
9184
        );
9185
9186
        // Exceptions for small words: capitalize at start of title...
9187 35
        $str = (string) \preg_replace_callback(
9188
            '~(  \\A [[:punct:]]*            # start of title...
9189
                      |  [:.;?!][ ]+                # or of subsentence...
9190
                      |  [ ][\'"“‘(\[][ ]* )        # or of inserted subphrase...
9191 35
                      ( ' . $small_words_rx . ' ) \\b # ...followed by small word
9192
                     ~uxi',
9193
            /**
9194
             * @param string[] $matches
9195
             *
9196
             * @psalm-pure
9197
             *
9198
             * @return string
9199
             */
9200
            static function (array $matches) use ($encoding): string {
9201 11
                return $matches[1] . static::ucfirst($matches[2], $encoding);
9202 35
            },
9203 35
            $str
9204
        );
9205
9206
        // ...and end of title
9207 35
        $str = (string) \preg_replace_callback(
9208 35
            '~\\b ( ' . $small_words_rx . ' ) # small word...
9209
                      (?= [[:punct:]]* \Z          # ...at the end of the title...
9210
                      |   [\'"’”)\]] [ ] )         # ...or of an inserted subphrase?
9211
                     ~uxi',
9212
            /**
9213
             * @param string[] $matches
9214
             *
9215
             * @psalm-pure
9216
             *
9217
             * @return string
9218
             */
9219
            static function (array $matches) use ($encoding): string {
9220 3
                return static::ucfirst($matches[1], $encoding);
9221 35
            },
9222 35
            $str
9223
        );
9224
9225
        // Exceptions for small words in hyphenated compound words.
9226
        // e.g. "in-flight" -> In-Flight
9227 35
        $str = (string) \preg_replace_callback(
9228
            '~\\b
9229
                        (?<! -)                   # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (in-flight)
9230 35
                        ( ' . $small_words_rx . ' )
9231
                        (?= -[[:alpha:]]+)        # lookahead for "-someword"
9232
                       ~uxi',
9233
            /**
9234
             * @param string[] $matches
9235
             *
9236
             * @psalm-pure
9237
             *
9238
             * @return string
9239
             */
9240
            static function (array $matches) use ($encoding): string {
9241
                return static::ucfirst($matches[1], $encoding);
9242 35
            },
9243 35
            $str
9244
        );
9245
9246
        // e.g. "Stand-in" -> "Stand-In" (Stand is already capped at this point)
9247 35
        $str = (string) \preg_replace_callback(
9248
            '~\\b
9249
                      (?<!…)                    # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (stand-in)
9250
                      ( [[:alpha:]]+- )         # $1 = first word and hyphen, should already be properly capped
9251 35
                      ( ' . $small_words_rx . ' ) # ...followed by small word
9252
                      (?!	- )                 # Negative lookahead for another -
9253
                     ~uxi',
9254
            /**
9255
             * @param string[] $matches
9256
             *
9257
             * @psalm-pure
9258
             *
9259
             * @return string
9260
             */
9261
            static function (array $matches) use ($encoding): string {
9262
                return $matches[1] . static::ucfirst($matches[2], $encoding);
9263 35
            },
9264 35
            $str
9265
        );
9266
9267 35
        return $str;
9268
    }
9269
9270
    /**
9271
     * Get a binary representation of a specific string.
9272
     *
9273
     * EXAPLE: <code>UTF8::str_to_binary('😃'); // '11110000100111111001100010000011'</code>
9274
     *
9275
     * @param string $str <p>The input string.</p>
9276
     *
9277
     * @psalm-pure
9278
     *
9279
     * @return false|string
9280
     *                      <p>false on error</p>
9281
     */
9282 2
    public static function str_to_binary(string $str)
9283
    {
9284
        /** @var array|false $value - needed for PhpStan (stubs error) */
9285 2
        $value = \unpack('H*', $str);
9286 2
        if ($value === false) {
9287
            return false;
9288
        }
9289
9290
        /** @noinspection OffsetOperationsInspection */
9291 2
        return \base_convert($value[1], 16, 2);
9292
    }
9293
9294
    /**
9295
     * @param string   $str
9296
     * @param bool     $remove_empty_values <p>Remove empty values.</p>
9297
     * @param int|null $remove_short_values <p>The min. string length or null to disable</p>
9298
     *
9299
     * @psalm-pure
9300
     *
9301
     * @return string[]
9302
     */
9303 17
    public static function str_to_lines(string $str, bool $remove_empty_values = false, int $remove_short_values = null): array
9304
    {
9305 17
        if ($str === '') {
9306 1
            return $remove_empty_values ? [] : [''];
9307
        }
9308
9309 16
        if (self::$SUPPORT['mbstring'] === true) {
9310
            /** @noinspection PhpComposerExtensionStubsInspection */
9311 16
            $return = \mb_split("[\r\n]{1,2}", $str);
9312
        } else {
9313
            $return = \preg_split("/[\r\n]{1,2}/u", $str);
9314
        }
9315
9316 16
        if ($return === false) {
9317
            return $remove_empty_values ? [] : [''];
9318
        }
9319
9320
        if (
9321 16
            $remove_short_values === null
9322
            &&
9323 16
            !$remove_empty_values
9324
        ) {
9325 16
            return $return;
9326
        }
9327
9328
        return self::reduce_string_array(
9329
            $return,
9330
            $remove_empty_values,
9331
            $remove_short_values
9332
        );
9333
    }
9334
9335
    /**
9336
     * Convert a string into an array of words.
9337
     *
9338
     * EXAMPLE: <code>UTF8::str_to_words('中文空白 oöäü#s', '#') // array('', '中文空白', ' ', 'oöäü#s', '')</code>
9339
     *
9340
     * @param string   $str
9341
     * @param string   $char_list           <p>Additional chars for the definition of "words".</p>
9342
     * @param bool     $remove_empty_values <p>Remove empty values.</p>
9343
     * @param int|null $remove_short_values <p>The min. string length or null to disable</p>
9344
     *
9345
     * @psalm-pure
9346
     *
9347
     * @return string[]
9348
     */
9349 13
    public static function str_to_words(
9350
        string $str,
9351
        string $char_list = '',
9352
        bool $remove_empty_values = false,
9353
        int $remove_short_values = null
9354
    ): array {
9355 13
        if ($str === '') {
9356 4
            return $remove_empty_values ? [] : [''];
9357
        }
9358
9359 13
        $char_list = self::rxClass($char_list, '\pL');
9360
9361 13
        $return = \preg_split("/({$char_list}+(?:[\p{Pd}’']{$char_list}+)*)/u", $str, -1, \PREG_SPLIT_DELIM_CAPTURE);
9362 13
        if ($return === false) {
9363
            return $remove_empty_values ? [] : [''];
9364
        }
9365
9366
        if (
9367 13
            $remove_short_values === null
9368
            &&
9369 13
            !$remove_empty_values
9370
        ) {
9371 13
            return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
9372
        }
9373
9374 2
        $tmp_return = self::reduce_string_array(
9375 2
            $return,
9376 2
            $remove_empty_values,
9377 2
            $remove_short_values
9378
        );
9379
9380 2
        foreach ($tmp_return as &$item) {
9381 2
            $item = (string) $item;
9382
        }
9383
9384 2
        return $tmp_return;
9385
    }
9386
9387
    /**
9388
     * alias for "UTF8::to_ascii()"
9389
     *
9390
     * @param string $str
9391
     * @param string $unknown
9392
     * @param bool   $strict
9393
     *
9394
     * @psalm-pure
9395
     *
9396
     * @return string
9397
     *
9398
     * @see        UTF8::to_ascii()
9399
     * @deprecated <p>please use "UTF8::to_ascii()"</p>
9400
     */
9401 7
    public static function str_transliterate(
9402
        string $str,
9403
        string $unknown = '?',
9404
        bool $strict = false
9405
    ): string {
9406 7
        return self::to_ascii($str, $unknown, $strict);
9407
    }
9408
9409
    /**
9410
     * Truncates the string to a given length. If $substring is provided, and
9411
     * truncating occurs, the string is further truncated so that the substring
9412
     * may be appended without exceeding the desired length.
9413
     *
9414
     * @param string $str
9415
     * @param int    $length    <p>Desired length of the truncated string.</p>
9416
     * @param string $substring [optional] <p>The substring to append if it can fit. Default: ''</p>
9417
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
9418
     *
9419
     * @psalm-pure
9420
     *
9421
     * @return string
9422
     *                <p>A string after truncating.</p>
9423
     */
9424 22
    public static function str_truncate(
9425
        string $str,
9426
        int $length,
9427
        string $substring = '',
9428
        string $encoding = 'UTF-8'
9429
    ): string {
9430 22
        if ($str === '') {
9431
            return '';
9432
        }
9433
9434 22
        if ($encoding === 'UTF-8') {
9435 10
            if ($length >= (int) \mb_strlen($str)) {
9436 2
                return $str;
9437
            }
9438
9439 8
            if ($substring !== '') {
9440 4
                $length -= (int) \mb_strlen($substring);
9441
9442
                /** @noinspection UnnecessaryCastingInspection */
9443 4
                return (string) \mb_substr($str, 0, $length) . $substring;
9444
            }
9445
9446
            /** @noinspection UnnecessaryCastingInspection */
9447 4
            return (string) \mb_substr($str, 0, $length);
9448
        }
9449
9450 12
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
9451
9452 12
        if ($length >= (int) self::strlen($str, $encoding)) {
9453 2
            return $str;
9454
        }
9455
9456 10
        if ($substring !== '') {
9457 6
            $length -= (int) self::strlen($substring, $encoding);
9458
        }
9459
9460
        return (
9461 10
               (string) self::substr(
9462 10
                   $str,
9463 10
                   0,
9464 10
                   $length,
9465 10
                   $encoding
9466
               )
9467 10
               ) . $substring;
9468
    }
9469
9470
    /**
9471
     * Truncates the string to a given length, while ensuring that it does not
9472
     * split words. If $substring is provided, and truncating occurs, the
9473
     * string is further truncated so that the substring may be appended without
9474
     * exceeding the desired length.
9475
     *
9476
     * @param string $str
9477
     * @param int    $length                                 <p>Desired length of the truncated string.</p>
9478
     * @param string $substring                              [optional] <p>The substring to append if it can fit.
9479
     *                                                       Default:
9480
     *                                                       ''</p>
9481
     * @param string $encoding                               [optional] <p>Default: 'UTF-8'</p>
9482
     * @param bool   $ignore_do_not_split_words_for_one_word [optional] <p>Default: false</p>
9483
     *
9484
     * @psalm-pure
9485
     *
9486
     * @return string
9487
     *                <p>A string after truncating.</p>
9488
     */
9489 47
    public static function str_truncate_safe(
9490
        string $str,
9491
        int $length,
9492
        string $substring = '',
9493
        string $encoding = 'UTF-8',
9494
        bool $ignore_do_not_split_words_for_one_word = false
9495
    ): string {
9496 47
        if ($str === '' || $length <= 0) {
9497 1
            return $substring;
9498
        }
9499
9500 47
        if ($encoding === 'UTF-8') {
9501 21
            if ($length >= (int) \mb_strlen($str)) {
9502 5
                return $str;
9503
            }
9504
9505
            // need to further trim the string so we can append the substring
9506 17
            $length -= (int) \mb_strlen($substring);
9507 17
            if ($length <= 0) {
9508 1
                return $substring;
9509
            }
9510
9511
            /** @var false|string $truncated - needed for PhpStan (stubs error) */
9512 17
            $truncated = \mb_substr($str, 0, $length);
9513 17
            if ($truncated === false) {
9514
                return '';
9515
            }
9516
9517
            // if the last word was truncated
9518 17
            $space_position = \mb_strpos($str, ' ', $length - 1);
9519 17
            if ($space_position !== $length) {
9520
                // find pos of the last occurrence of a space, get up to that
9521 13
                $last_position = \mb_strrpos($truncated, ' ', 0);
9522
9523
                if (
9524 13
                    $last_position !== false
9525
                    ||
9526
                    (
9527 3
                        $space_position !== false
9528
                        &&
9529 13
                         !$ignore_do_not_split_words_for_one_word
9530
                    )
9531
                ) {
9532 17
                    $truncated = (string) \mb_substr($truncated, 0, (int) $last_position);
9533
                }
9534
            }
9535
        } else {
9536 26
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9537
9538 26
            if ($length >= (int) self::strlen($str, $encoding)) {
9539 4
                return $str;
9540
            }
9541
9542
            // need to further trim the string so we can append the substring
9543 22
            $length -= (int) self::strlen($substring, $encoding);
9544 22
            if ($length <= 0) {
9545
                return $substring;
9546
            }
9547
9548 22
            $truncated = self::substr($str, 0, $length, $encoding);
9549
9550 22
            if ($truncated === false) {
9551
                return '';
9552
            }
9553
9554
            // if the last word was truncated
9555 22
            $space_position = self::strpos($str, ' ', $length - 1, $encoding);
9556 22
            if ($space_position !== $length) {
9557
                // find pos of the last occurrence of a space, get up to that
9558 12
                $last_position = self::strrpos($truncated, ' ', 0, $encoding);
9559
9560
                if (
9561 12
                    $last_position !== false
9562
                    ||
9563
                    (
9564 4
                        $space_position !== false
9565
                        &&
9566 12
                        !$ignore_do_not_split_words_for_one_word
9567
                    )
9568
                ) {
9569 9
                    $truncated = (string) self::substr($truncated, 0, (int) $last_position, $encoding);
9570
                }
9571
            }
9572
        }
9573
9574 39
        return $truncated . $substring;
9575
    }
9576
9577
    /**
9578
     * Returns a lowercase and trimmed string separated by underscores.
9579
     * Underscores are inserted before uppercase characters (with the exception
9580
     * of the first character of the string), and in place of spaces as well as
9581
     * dashes.
9582
     *
9583
     * @param string $str
9584
     *
9585
     * @psalm-pure
9586
     *
9587
     * @return string
9588
     *                <p>The underscored string.</p>
9589
     */
9590 16
    public static function str_underscored(string $str): string
9591
    {
9592 16
        return self::str_delimit($str, '_');
9593
    }
9594
9595
    /**
9596
     * Returns an UpperCamelCase version of the supplied string. It trims
9597
     * surrounding spaces, capitalizes letters following digits, spaces, dashes
9598
     * and underscores, and removes spaces, dashes, underscores.
9599
     *
9600
     * @param string      $str                           <p>The input string.</p>
9601
     * @param string      $encoding                      [optional] <p>Default: 'UTF-8'</p>
9602
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
9603
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
9604
     *                                                   tr</p>
9605
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
9606
     *                                                   -> ß</p>
9607
     *
9608
     * @psalm-pure
9609
     *
9610
     * @return string
9611
     *                <p>A string in UpperCamelCase.</p>
9612
     */
9613 13
    public static function str_upper_camelize(
9614
        string $str,
9615
        string $encoding = 'UTF-8',
9616
        bool $clean_utf8 = false,
9617
        string $lang = null,
9618
        bool $try_to_keep_the_string_length = false
9619
    ): string {
9620 13
        return self::ucfirst(self::str_camelize($str, $encoding), $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
9621
    }
9622
9623
    /**
9624
     * alias for "UTF8::ucfirst()"
9625
     *
9626
     * @param string      $str
9627
     * @param string      $encoding
9628
     * @param bool        $clean_utf8
9629
     * @param string|null $lang
9630
     * @param bool        $try_to_keep_the_string_length
9631
     *
9632
     * @psalm-pure
9633
     *
9634
     * @return string
9635
     *
9636
     * @see        UTF8::ucfirst()
9637
     * @deprecated <p>please use "UTF8::ucfirst()"</p>
9638
     */
9639 5
    public static function str_upper_first(
9640
        string $str,
9641
        string $encoding = 'UTF-8',
9642
        bool $clean_utf8 = false,
9643
        string $lang = null,
9644
        bool $try_to_keep_the_string_length = false
9645
    ): string {
9646 5
        return self::ucfirst(
9647 5
            $str,
9648 5
            $encoding,
9649 5
            $clean_utf8,
9650 5
            $lang,
9651 5
            $try_to_keep_the_string_length
9652
        );
9653
    }
9654
9655
    /**
9656
     * Get the number of words in a specific string.
9657
     *
9658
     * EXAMPLES: <code>
9659
     * // format: 0 -> return only word count (int)
9660
     * //
9661
     * UTF8::str_word_count('中文空白 öäü abc#c'); // 4
9662
     * UTF8::str_word_count('中文空白 öäü abc#c', 0, '#'); // 3
9663
     *
9664
     * // format: 1 -> return words (array)
9665
     * //
9666
     * UTF8::str_word_count('中文空白 öäü abc#c', 1); // array('中文空白', 'öäü', 'abc', 'c')
9667
     * UTF8::str_word_count('中文空白 öäü abc#c', 1, '#'); // array('中文空白', 'öäü', 'abc#c')
9668
     *
9669
     * // format: 2 -> return words with offset (array)
9670
     * //
9671
     * UTF8::str_word_count('中文空白 öäü ab#c', 2); // array(0 => '中文空白', 5 => 'öäü', 9 => 'abc', 13 => 'c')
9672
     * UTF8::str_word_count('中文空白 öäü ab#c', 2, '#'); // array(0 => '中文空白', 5 => 'öäü', 9 => 'abc#c')
9673
     * </code>
9674
     *
9675
     * @param string $str       <p>The input string.</p>
9676
     * @param int    $format    [optional] <p>
9677
     *                          <strong>0</strong> => return a number of words (default)<br>
9678
     *                          <strong>1</strong> => return an array of words<br>
9679
     *                          <strong>2</strong> => return an array of words with word-offset as key
9680
     *                          </p>
9681
     * @param string $char_list [optional] <p>Additional chars that contains to words and do not start a new word.</p>
9682
     *
9683
     * @psalm-pure
9684
     *
9685
     * @return int|string[]
9686
     *                      <p>The number of words in the string.</p>
9687
     */
9688 2
    public static function str_word_count(string $str, int $format = 0, string $char_list = '')
9689
    {
9690 2
        $str_parts = self::str_to_words($str, $char_list);
9691
9692 2
        $len = \count($str_parts);
9693
9694 2
        if ($format === 1) {
9695 2
            $number_of_words = [];
9696 2
            for ($i = 1; $i < $len; $i += 2) {
9697 2
                $number_of_words[] = $str_parts[$i];
9698
            }
9699 2
        } elseif ($format === 2) {
9700 2
            $number_of_words = [];
9701 2
            $offset = (int) self::strlen($str_parts[0]);
9702 2
            for ($i = 1; $i < $len; $i += 2) {
9703 2
                $number_of_words[$offset] = $str_parts[$i];
9704 2
                $offset += (int) self::strlen($str_parts[$i]) + (int) self::strlen($str_parts[$i + 1]);
9705
            }
9706
        } else {
9707 2
            $number_of_words = (int) (($len - 1) / 2);
9708
        }
9709
9710 2
        return $number_of_words;
9711
    }
9712
9713
    /**
9714
     * Case-insensitive string comparison.
9715
     *
9716
     * INFO: Case-insensitive version of UTF8::strcmp()
9717
     *
9718
     * EXAMPLE: <code>UTF8::strcasecmp("iñtërnâtiôn\nàlizætiøn", "Iñtërnâtiôn\nàlizætiøn"); // 0</code>
9719
     *
9720
     * @param string $str1     <p>The first string.</p>
9721
     * @param string $str2     <p>The second string.</p>
9722
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
9723
     *
9724
     * @psalm-pure
9725
     *
9726
     * @return int
9727
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
9728
     *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
9729
     *             <strong>0</strong> if they are equal
9730
     */
9731 23
    public static function strcasecmp(
9732
        string $str1,
9733
        string $str2,
9734
        string $encoding = 'UTF-8'
9735
    ): int {
9736 23
        return self::strcmp(
9737 23
            self::strtocasefold(
9738 23
                $str1,
9739 23
                true,
9740 23
                false,
9741 23
                $encoding,
9742 23
                null,
9743 23
                false
9744
            ),
9745 23
            self::strtocasefold(
9746 23
                $str2,
9747 23
                true,
9748 23
                false,
9749 23
                $encoding,
9750 23
                null,
9751 23
                false
9752
            )
9753
        );
9754
    }
9755
9756
    /**
9757
     * alias for "UTF8::strstr()"
9758
     *
9759
     * @param string $haystack
9760
     * @param string $needle
9761
     * @param bool   $before_needle
9762
     * @param string $encoding
9763
     * @param bool   $clean_utf8
9764
     *
9765
     * @psalm-pure
9766
     *
9767
     * @return false|string
9768
     *
9769
     * @see        UTF8::strstr()
9770
     * @deprecated <p>please use "UTF8::strstr()"</p>
9771
     */
9772 2
    public static function strchr(
9773
        string $haystack,
9774
        string $needle,
9775
        bool $before_needle = false,
9776
        string $encoding = 'UTF-8',
9777
        bool $clean_utf8 = false
9778
    ) {
9779 2
        return self::strstr(
9780 2
            $haystack,
9781 2
            $needle,
9782 2
            $before_needle,
9783 2
            $encoding,
9784 2
            $clean_utf8
9785
        );
9786
    }
9787
9788
    /**
9789
     * Case-sensitive string comparison.
9790
     *
9791
     * EXAMPLE: <code>UTF8::strcmp("iñtërnâtiôn\nàlizætiøn", "iñtërnâtiôn\nàlizætiøn"); // 0</code>
9792
     *
9793
     * @param string $str1 <p>The first string.</p>
9794
     * @param string $str2 <p>The second string.</p>
9795
     *
9796
     * @psalm-pure
9797
     *
9798
     * @return int
9799
     *             <strong>&lt; 0</strong> if str1 is less than str2<br>
9800
     *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
9801
     *             <strong>0</strong> if they are equal
9802
     */
9803 29
    public static function strcmp(string $str1, string $str2): int
9804
    {
9805 29
        if ($str1 === $str2) {
9806 21
            return 0;
9807
        }
9808
9809 24
        return \strcmp(
9810 24
            \Normalizer::normalize($str1, \Normalizer::NFD),
9811 24
            \Normalizer::normalize($str2, \Normalizer::NFD)
9812
        );
9813
    }
9814
9815
    /**
9816
     * Find length of initial segment not matching mask.
9817
     *
9818
     * @param string   $str
9819
     * @param string   $char_list
9820
     * @param int      $offset
9821
     * @param int|null $length
9822
     * @param string   $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
9823
     *
9824
     * @psalm-pure
9825
     *
9826
     * @return int
9827
     */
9828 12
    public static function strcspn(
9829
        string $str,
9830
        string $char_list,
9831
        int $offset = 0,
9832
        int $length = null,
9833
        string $encoding = 'UTF-8'
9834
    ): int {
9835 12
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9836
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9837
        }
9838
9839 12
        if ($char_list === '') {
9840 2
            return (int) self::strlen($str, $encoding);
9841
        }
9842
9843 11
        if ($offset || $length !== null) {
9844 3
            if ($encoding === 'UTF-8') {
9845 3
                if ($length === null) {
9846 2
                    $str_tmp = \mb_substr($str, $offset);
9847
                } else {
9848 3
                    $str_tmp = \mb_substr($str, $offset, $length);
9849
                }
9850
            } else {
9851
                $str_tmp = self::substr($str, $offset, $length, $encoding);
9852
            }
9853
9854 3
            if ($str_tmp === false) {
9855
                return 0;
9856
            }
9857
9858
            /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
9859 3
            $str = $str_tmp;
9860
        }
9861
9862 11
        if ($str === '') {
9863 2
            return 0;
9864
        }
9865
9866 10
        $matches = [];
9867 10
        if (\preg_match('/^(.*?)' . self::rxClass($char_list) . '/us', $str, $matches)) {
9868 9
            $return = self::strlen($matches[1], $encoding);
9869 9
            if ($return === false) {
9870
                return 0;
9871
            }
9872
9873 9
            return $return;
9874
        }
9875
9876 2
        return (int) self::strlen($str, $encoding);
9877
    }
9878
9879
    /**
9880
     * alias for "UTF8::stristr()"
9881
     *
9882
     * @param string $haystack
9883
     * @param string $needle
9884
     * @param bool   $before_needle
9885
     * @param string $encoding
9886
     * @param bool   $clean_utf8
9887
     *
9888
     * @psalm-pure
9889
     *
9890
     * @return false|string
9891
     *
9892
     * @see        UTF8::stristr()
9893
     * @deprecated <p>please use "UTF8::stristr()"</p>
9894
     */
9895 1
    public static function strichr(
9896
        string $haystack,
9897
        string $needle,
9898
        bool $before_needle = false,
9899
        string $encoding = 'UTF-8',
9900
        bool $clean_utf8 = false
9901
    ) {
9902 1
        return self::stristr(
9903 1
            $haystack,
9904 1
            $needle,
9905 1
            $before_needle,
9906 1
            $encoding,
9907 1
            $clean_utf8
9908
        );
9909
    }
9910
9911
    /**
9912
     * Create a UTF-8 string from code points.
9913
     *
9914
     * INFO: opposite to UTF8::codepoints()
9915
     *
9916
     * EXAMPLE: <code>UTF8::string(array(246, 228, 252)); // 'öäü'</code>
9917
     *
9918
     * @param int|int[]|string|string[] $intOrHex <p>Integer or Hexadecimal codepoints.</p>
9919
     *
9920
     * @phpstan-param int[]|numeric-string[]|int|numeric-string $intOrHex
9921
     *
9922
     * @psalm-pure
9923
     *
9924
     * @return string
9925
     *                <p>A UTF-8 encoded string.</p>
9926
     */
9927 4
    public static function string($intOrHex): string
9928
    {
9929 4
        if ($intOrHex === []) {
9930 4
            return '';
9931
        }
9932
9933 4
        if (!\is_array($intOrHex)) {
9934 1
            $intOrHex = [$intOrHex];
9935
        }
9936
9937 4
        $str = '';
9938 4
        foreach ($intOrHex as $strPart) {
9939 4
            $str .= '&#' . (int) $strPart . ';';
9940
        }
9941
9942 4
        return self::html_entity_decode($str, \ENT_QUOTES | \ENT_HTML5);
9943
    }
9944
9945
    /**
9946
     * Checks if string starts with "BOM" (Byte Order Mark Character) character.
9947
     *
9948
     * EXAMPLE: <code>UTF8::string_has_bom("\xef\xbb\xbf foobar"); // true</code>
9949
     *
9950
     * @param string $str <p>The input string.</p>
9951
     *
9952
     * @psalm-pure
9953
     *
9954
     * @return bool
9955
     *              <p>
9956
     *              <strong>true</strong> if the string has BOM at the start,<br>
9957
     *              <strong>false</strong> otherwise
9958
     *              </p>
9959
     */
9960 6
    public static function string_has_bom(string $str): bool
9961
    {
9962
        /** @noinspection PhpUnusedLocalVariableInspection */
9963 6
        foreach (self::$BOM as $bom_string => &$bom_byte_length) {
9964 6
            if (\strncmp($str, $bom_string, $bom_byte_length) === 0) {
9965 6
                return true;
9966
            }
9967
        }
9968
9969 6
        return false;
9970
    }
9971
9972
    /**
9973
     * Strip HTML and PHP tags from a string + clean invalid UTF-8.
9974
     *
9975
     * EXAMPLE: <code>UTF8::strip_tags("<span>κόσμε\xa0\xa1</span>"); // 'κόσμε'</code>
9976
     *
9977
     * @see http://php.net/manual/en/function.strip-tags.php
9978
     *
9979
     * @param string      $str            <p>
9980
     *                                    The input string.
9981
     *                                    </p>
9982
     * @param string|null $allowable_tags [optional] <p>
9983
     *                                    You can use the optional second parameter to specify tags which should
9984
     *                                    not be stripped.
9985
     *                                    </p>
9986
     *                                    <p>
9987
     *                                    HTML comments and PHP tags are also stripped. This is hardcoded and
9988
     *                                    can not be changed with allowable_tags.
9989
     *                                    </p>
9990
     * @param bool        $clean_utf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
9991
     *
9992
     * @psalm-pure
9993
     *
9994
     * @return string
9995
     *                <p>The stripped string.</p>
9996
     */
9997 4
    public static function strip_tags(
9998
        string $str,
9999
        string $allowable_tags = null,
10000
        bool $clean_utf8 = false
10001
    ): string {
10002 4
        if ($str === '') {
10003 1
            return '';
10004
        }
10005
10006 4
        if ($clean_utf8) {
10007 2
            $str = self::clean($str);
10008
        }
10009
10010 4
        if ($allowable_tags === null) {
10011 4
            return \strip_tags($str);
10012
        }
10013
10014 2
        return \strip_tags($str, $allowable_tags);
10015
    }
10016
10017
    /**
10018
     * Strip all whitespace characters. This includes tabs and newline
10019
     * characters, as well as multibyte whitespace such as the thin space
10020
     * and ideographic space.
10021
     *
10022
     * EXAMPLE: <code>UTF8::strip_whitespace('   Ο     συγγραφέας  '); // 'Οσυγγραφέας'</code>
10023
     *
10024
     * @param string $str
10025
     *
10026
     * @psalm-pure
10027
     *
10028
     * @return string
10029
     */
10030 36
    public static function strip_whitespace(string $str): string
10031
    {
10032 36
        if ($str === '') {
10033 3
            return '';
10034
        }
10035
10036 33
        return (string) \preg_replace('/[[:space:]]+/u', '', $str);
10037
    }
10038
10039
    /**
10040
     * Find the position of the first occurrence of a substring in a string, case-insensitive.
10041
     *
10042
     * INFO: use UTF8::stripos_in_byte() for the byte-length
10043
     *
10044
     * EXAMPLE: <code>UTF8::stripos('aσσb', 'ΣΣ'); // 1</code> (σσ == ΣΣ)
10045
     *
10046
     * @see http://php.net/manual/en/function.mb-stripos.php
10047
     *
10048
     * @param string $haystack   <p>The string from which to get the position of the first occurrence of needle.</p>
10049
     * @param string $needle     <p>The string to find in haystack.</p>
10050
     * @param int    $offset     [optional] <p>The position in haystack to start searching.</p>
10051
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
10052
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10053
     *
10054
     * @psalm-pure
10055
     *
10056
     * @return false|int
10057
     *                   Return the <strong>(int)</strong> numeric position of the first occurrence of needle in the
10058
     *                   haystack string,<br> or <strong>false</strong> if needle is not found
10059
     */
10060 25
    public static function stripos(
10061
        string $haystack,
10062
        string $needle,
10063
        int $offset = 0,
10064
        string $encoding = 'UTF-8',
10065
        bool $clean_utf8 = false
10066
    ) {
10067 25
        if ($haystack === '') {
10068 5
            if (\PHP_VERSION_ID >= 80000 && $needle === '') {
10069
                return 0;
10070
            }
10071
10072 5
            return false;
10073
        }
10074
10075 24
        if ($needle === '' && \PHP_VERSION_ID < 80000) {
10076 2
            return false;
10077
        }
10078
10079 24
        if ($clean_utf8) {
10080
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10081
            // if invalid characters are found in $haystack before $needle
10082 1
            $haystack = self::clean($haystack);
10083 1
            $needle = self::clean($needle);
10084
        }
10085
10086 24
        if (self::$SUPPORT['mbstring'] === true) {
10087 24
            if ($encoding === 'UTF-8') {
10088 24
                return \mb_stripos($haystack, $needle, $offset);
10089
            }
10090
10091 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10092
10093 2
            return \mb_stripos($haystack, $needle, $offset, $encoding);
10094
        }
10095
10096 2
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
10097
10098
        if (
10099 2
            $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
10100
            &&
10101 2
            $offset >= 0 // grapheme_stripos() can't handle negative offset
10102
            &&
10103 2
            self::$SUPPORT['intl'] === true
10104
        ) {
10105
            $return_tmp = \grapheme_stripos($haystack, $needle, $offset);
10106
            if ($return_tmp !== false) {
10107
                return $return_tmp;
10108
            }
10109
        }
10110
10111
        //
10112
        // fallback for ascii only
10113
        //
10114
10115 2
        if (ASCII::is_ascii($haystack . $needle)) {
10116 2
            return \stripos($haystack, $needle, $offset);
10117
        }
10118
10119
        //
10120
        // fallback via vanilla php
10121
        //
10122
10123 2
        $haystack = self::strtocasefold($haystack, true, false, $encoding, null, false);
10124 2
        $needle = self::strtocasefold($needle, true, false, $encoding, null, false);
10125
10126 2
        return self::strpos($haystack, $needle, $offset, $encoding);
10127
    }
10128
10129
    /**
10130
     * Returns all of haystack starting from and including the first occurrence of needle to the end.
10131
     *
10132
     * EXAMPLE: <code>
10133
     * $str = 'iñtërnâtiônàlizætiøn';
10134
     * $search = 'NÂT';
10135
     *
10136
     * UTF8::stristr($str, $search)); // 'nâtiônàlizætiøn'
10137
     * UTF8::stristr($str, $search, true)); // 'iñtër'
10138
     * </code>
10139
     *
10140
     * @param string $haystack      <p>The input string. Must be valid UTF-8.</p>
10141
     * @param string $needle        <p>The string to look for. Must be valid UTF-8.</p>
10142
     * @param bool   $before_needle [optional] <p>
10143
     *                              If <b>TRUE</b>, it returns the part of the
10144
     *                              haystack before the first occurrence of the needle (excluding the needle).
10145
     *                              </p>
10146
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
10147
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
10148
     *
10149
     * @psalm-pure
10150
     *
10151
     * @return false|string
10152
     *                      <p>A sub-string,<br>or <strong>false</strong> if needle is not found.</p>
10153
     */
10154 13
    public static function stristr(
10155
        string $haystack,
10156
        string $needle,
10157
        bool $before_needle = false,
10158
        string $encoding = 'UTF-8',
10159
        bool $clean_utf8 = false
10160
    ) {
10161 13
        if ($haystack === '') {
10162 3
            if (\PHP_VERSION_ID >= 80000 && $needle === '') {
10163
                return '';
10164
            }
10165
10166 3
            return false;
10167
        }
10168
10169 11
        if ($clean_utf8) {
10170
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10171
            // if invalid characters are found in $haystack before $needle
10172 1
            $needle = self::clean($needle);
10173 1
            $haystack = self::clean($haystack);
10174
        }
10175
10176 11
        if ($needle === '') {
10177 2
            if (\PHP_VERSION_ID >= 80000) {
10178
                return $haystack;
10179
            }
10180
10181 2
            return false;
10182
        }
10183
10184 10
        if (self::$SUPPORT['mbstring'] === true) {
10185 10
            if ($encoding === 'UTF-8') {
10186 10
                return \mb_stristr($haystack, $needle, $before_needle);
10187
            }
10188
10189 1
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10190
10191 1
            return \mb_stristr($haystack, $needle, $before_needle, $encoding);
10192
        }
10193
10194
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
10195
10196
        if (
10197
            $encoding !== 'UTF-8'
10198
            &&
10199
            self::$SUPPORT['mbstring'] === false
10200
        ) {
10201
            /**
10202
             * @psalm-suppress ImpureFunctionCall - is is only a warning
10203
             */
10204
            \trigger_error('UTF8::stristr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10205
        }
10206
10207
        if (
10208
            $encoding === 'UTF-8' // INFO: "grapheme_stristr()" can't handle other encodings
10209
            &&
10210
            self::$SUPPORT['intl'] === true
10211
        ) {
10212
            $return_tmp = \grapheme_stristr($haystack, $needle, $before_needle);
10213
            if ($return_tmp !== false) {
10214
                return $return_tmp;
10215
            }
10216
        }
10217
10218
        if (ASCII::is_ascii($needle . $haystack)) {
10219
            return \stristr($haystack, $needle, $before_needle);
10220
        }
10221
10222
        \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/usi', $haystack, $match);
10223
10224
        if (!isset($match[1])) {
10225
            return false;
10226
        }
10227
10228
        if ($before_needle) {
10229
            return $match[1];
10230
        }
10231
10232
        return self::substr($haystack, (int) self::strlen($match[1], $encoding), null, $encoding);
10233
    }
10234
10235
    /**
10236
     * Get the string length, not the byte-length!
10237
     *
10238
     * INFO: use UTF8::strwidth() for the char-length
10239
     *
10240
     * EXAMPLE: <code>UTF8::strlen("Iñtërnâtiôn\xE9àlizætiøn")); // 20</code>
10241
     *
10242
     * @see http://php.net/manual/en/function.mb-strlen.php
10243
     *
10244
     * @param string $str        <p>The string being checked for length.</p>
10245
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
10246
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10247
     *
10248
     * @psalm-pure
10249
     *
10250
     * @return false|int
10251
     *                   <p>
10252
     *                   The number <strong>(int)</strong> of characters in the string $str having character encoding
10253
     *                   $encoding.
10254
     *                   (One multi-byte character counted as +1).
10255
     *                   <br>
10256
     *                   Can return <strong>false</strong>, if e.g. mbstring is not installed and we process invalid
10257
     *                   chars.
10258
     *                   </p>
10259
     */
10260 174
    public static function strlen(
10261
        string $str,
10262
        string $encoding = 'UTF-8',
10263
        bool $clean_utf8 = false
10264
    ) {
10265 174
        if ($str === '') {
10266 21
            return 0;
10267
        }
10268
10269 172
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10270 12
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10271
        }
10272
10273 172
        if ($clean_utf8) {
10274
            // "mb_strlen" and "\iconv_strlen" returns wrong length,
10275
            // if invalid characters are found in $str
10276 5
            $str = self::clean($str);
10277
        }
10278
10279
        //
10280
        // fallback via mbstring
10281
        //
10282
10283 172
        if (self::$SUPPORT['mbstring'] === true) {
10284 166
            if ($encoding === 'UTF-8') {
10285
                /** @noinspection PhpUsageOfSilenceOperatorInspection - ignore warnings, it's working anyway */
10286 166
                return @\mb_strlen($str);
10287
            }
10288
10289
            /** @noinspection PhpUsageOfSilenceOperatorInspection - ignore warnings, it's working anyway */
10290 4
            return @\mb_strlen($str, $encoding);
10291
        }
10292
10293
        //
10294
        // fallback for binary || ascii only
10295
        //
10296
10297
        if (
10298 8
            $encoding === 'CP850'
10299
            ||
10300 8
            $encoding === 'ASCII'
10301
        ) {
10302
            return \strlen($str);
10303
        }
10304
10305
        if (
10306 8
            $encoding !== 'UTF-8'
10307
            &&
10308 8
            self::$SUPPORT['mbstring'] === false
10309
            &&
10310 8
            self::$SUPPORT['iconv'] === false
10311
        ) {
10312
            /**
10313
             * @psalm-suppress ImpureFunctionCall - is is only a warning
10314
             */
10315 2
            \trigger_error('UTF8::strlen() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10316
        }
10317
10318
        //
10319
        // fallback via iconv
10320
        //
10321
10322 8
        if (self::$SUPPORT['iconv'] === true) {
10323
            $return_tmp = \iconv_strlen($str, $encoding);
10324
            if ($return_tmp !== false) {
10325
                return $return_tmp;
10326
            }
10327
        }
10328
10329
        //
10330
        // fallback via intl
10331
        //
10332
10333
        if (
10334 8
            $encoding === 'UTF-8' // INFO: "grapheme_strlen()" can't handle other encodings
10335
            &&
10336 8
            self::$SUPPORT['intl'] === true
10337
        ) {
10338
            $return_tmp = \grapheme_strlen($str);
10339
            if ($return_tmp !== null) {
10340
                return $return_tmp;
10341
            }
10342
        }
10343
10344
        //
10345
        // fallback for ascii only
10346
        //
10347
10348 8
        if (ASCII::is_ascii($str)) {
10349 4
            return \strlen($str);
10350
        }
10351
10352
        //
10353
        // fallback via vanilla php
10354
        //
10355
10356 8
        \preg_match_all('/./us', $str, $parts);
10357
10358 8
        $return_tmp = \count($parts[0]);
10359 8
        if ($return_tmp === 0) {
10360
            return false;
10361
        }
10362
10363 8
        return $return_tmp;
10364
    }
10365
10366
    /**
10367
     * Get string length in byte.
10368
     *
10369
     * @param string $str
10370
     *
10371
     * @psalm-pure
10372
     *
10373
     * @return int
10374
     */
10375 1
    public static function strlen_in_byte(string $str): int
10376
    {
10377 1
        if ($str === '') {
10378
            return 0;
10379
        }
10380
10381 1
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10382
            // "mb_" is available if overload is used, so use it ...
10383
            return \mb_strlen($str, 'CP850'); // 8-BIT
10384
        }
10385
10386 1
        return \strlen($str);
10387
    }
10388
10389
    /**
10390
     * Case-insensitive string comparisons using a "natural order" algorithm.
10391
     *
10392
     * INFO: natural order version of UTF8::strcasecmp()
10393
     *
10394
     * EXAMPLES: <code>
10395
     * UTF8::strnatcasecmp('2', '10Hello WORLD 中文空白!'); // -1
10396
     * UTF8::strcasecmp('2Hello world 中文空白!', '10Hello WORLD 中文空白!'); // 1
10397
     *
10398
     * UTF8::strnatcasecmp('10Hello world 中文空白!', '2Hello WORLD 中文空白!'); // 1
10399
     * UTF8::strcasecmp('10Hello world 中文空白!', '2Hello WORLD 中文空白!'); // -1
10400
     * </code>
10401
     *
10402
     * @param string $str1     <p>The first string.</p>
10403
     * @param string $str2     <p>The second string.</p>
10404
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
10405
     *
10406
     * @psalm-pure
10407
     *
10408
     * @return int
10409
     *             <strong>&lt; 0</strong> if str1 is less than str2<br>
10410
     *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
10411
     *             <strong>0</strong> if they are equal
10412
     */
10413 2
    public static function strnatcasecmp(string $str1, string $str2, string $encoding = 'UTF-8'): int
10414
    {
10415 2
        return self::strnatcmp(
10416 2
            self::strtocasefold($str1, true, false, $encoding, null, false),
10417 2
            self::strtocasefold($str2, true, false, $encoding, null, false)
10418
        );
10419
    }
10420
10421
    /**
10422
     * String comparisons using a "natural order" algorithm
10423
     *
10424
     * INFO: natural order version of UTF8::strcmp()
10425
     *
10426
     * EXAMPLES: <code>
10427
     * UTF8::strnatcmp('2Hello world 中文空白!', '10Hello WORLD 中文空白!'); // -1
10428
     * UTF8::strcmp('2Hello world 中文空白!', '10Hello WORLD 中文空白!'); // 1
10429
     *
10430
     * UTF8::strnatcmp('10Hello world 中文空白!', '2Hello WORLD 中文空白!'); // 1
10431
     * UTF8::strcmp('10Hello world 中文空白!', '2Hello WORLD 中文空白!'); // -1
10432
     * </code>
10433
     *
10434
     * @see http://php.net/manual/en/function.strnatcmp.php
10435
     *
10436
     * @param string $str1 <p>The first string.</p>
10437
     * @param string $str2 <p>The second string.</p>
10438
     *
10439
     * @psalm-pure
10440
     *
10441
     * @return int
10442
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
10443
     *             <strong>&gt; 0</strong> if str1 is greater than str2;<br>
10444
     *             <strong>0</strong> if they are equal
10445
     */
10446 4
    public static function strnatcmp(string $str1, string $str2): int
10447
    {
10448 4
        if ($str1 === $str2) {
10449 4
            return 0;
10450
        }
10451
10452 4
        return \strnatcmp(
10453 4
            (string) self::strtonatfold($str1),
10454 4
            (string) self::strtonatfold($str2)
10455
        );
10456
    }
10457
10458
    /**
10459
     * Case-insensitive string comparison of the first n characters.
10460
     *
10461
     * EXAMPLE: <code>
10462
     * UTF8::strcasecmp("iñtërnâtiôn\nàlizætiøn321", "iñtërnâtiôn\nàlizætiøn123", 5); // 0
10463
     * </code>
10464
     *
10465
     * @see http://php.net/manual/en/function.strncasecmp.php
10466
     *
10467
     * @param string $str1     <p>The first string.</p>
10468
     * @param string $str2     <p>The second string.</p>
10469
     * @param int    $len      <p>The length of strings to be used in the comparison.</p>
10470
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
10471
     *
10472
     * @psalm-pure
10473
     *
10474
     * @return int
10475
     *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
10476
     *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
10477
     *             <strong>0</strong> if they are equal
10478
     */
10479 2
    public static function strncasecmp(
10480
        string $str1,
10481
        string $str2,
10482
        int $len,
10483
        string $encoding = 'UTF-8'
10484
    ): int {
10485 2
        return self::strncmp(
10486 2
            self::strtocasefold($str1, true, false, $encoding, null, false),
10487 2
            self::strtocasefold($str2, true, false, $encoding, null, false),
10488 2
            $len
10489
        );
10490
    }
10491
10492
    /**
10493
     * String comparison of the first n characters.
10494
     *
10495
     * EXAMPLE: <code>
10496
     * UTF8::strncmp("Iñtërnâtiôn\nàlizætiøn321", "Iñtërnâtiôn\nàlizætiøn123", 5); // 0
10497
     * </code>
10498
     *
10499
     * @see http://php.net/manual/en/function.strncmp.php
10500
     *
10501
     * @param string $str1     <p>The first string.</p>
10502
     * @param string $str2     <p>The second string.</p>
10503
     * @param int    $len      <p>Number of characters to use in the comparison.</p>
10504
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
10505
     *
10506
     * @psalm-pure
10507
     *
10508
     * @return int
10509
     *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
10510
     *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
10511
     *             <strong>0</strong> if they are equal
10512
     */
10513 4
    public static function strncmp(
10514
        string $str1,
10515
        string $str2,
10516
        int $len,
10517
        string $encoding = 'UTF-8'
10518
    ): int {
10519 4
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10520
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10521
        }
10522
10523 4
        if ($encoding === 'UTF-8') {
10524 4
            $str1 = (string) \mb_substr($str1, 0, $len);
10525 4
            $str2 = (string) \mb_substr($str2, 0, $len);
10526
        } else {
10527
            $str1 = (string) self::substr($str1, 0, $len, $encoding);
10528
            $str2 = (string) self::substr($str2, 0, $len, $encoding);
10529
        }
10530
10531 4
        return self::strcmp($str1, $str2);
10532
    }
10533
10534
    /**
10535
     * Search a string for any of a set of characters.
10536
     *
10537
     * EXAMPLE: <code>UTF8::strpbrk('-中文空白-', '白'); // '白-'</code>
10538
     *
10539
     * @see http://php.net/manual/en/function.strpbrk.php
10540
     *
10541
     * @param string $haystack  <p>The string where char_list is looked for.</p>
10542
     * @param string $char_list <p>This parameter is case-sensitive.</p>
10543
     *
10544
     * @psalm-pure
10545
     *
10546
     * @return false|string
10547
     *                      <p>The string starting from the character found, or false if it is not found.</p>
10548
     */
10549 2
    public static function strpbrk(string $haystack, string $char_list)
10550
    {
10551 2
        if ($haystack === '' || $char_list === '') {
10552 2
            return false;
10553
        }
10554
10555 2
        if (\preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
10556 2
            return \substr($haystack, (int) \strpos($haystack, $m[0]));
10557
        }
10558
10559 2
        return false;
10560
    }
10561
10562
    /**
10563
     * Find the position of the first occurrence of a substring in a string.
10564
     *
10565
     * INFO: use UTF8::strpos_in_byte() for the byte-length
10566
     *
10567
     * EXAMPLE: <code>UTF8::strpos('ABC-ÖÄÜ-中文空白-中文空白', '中'); // 8</code>
10568
     *
10569
     * @see http://php.net/manual/en/function.mb-strpos.php
10570
     *
10571
     * @param string     $haystack   <p>The string from which to get the position of the first occurrence of needle.</p>
10572
     * @param int|string $needle     <p>The string to find in haystack.<br>Or a code point as int.</p>
10573
     * @param int        $offset     [optional] <p>The search offset. If it is not specified, 0 is used.</p>
10574
     * @param string     $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
10575
     * @param bool       $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10576
     *
10577
     * @psalm-pure
10578
     *
10579
     * @return false|int
10580
     *                   The <strong>(int)</strong> numeric position of the first occurrence of needle in the haystack
10581
     *                   string.<br> If needle is not found it returns false.
10582
     */
10583 52
    public static function strpos(
10584
        string $haystack,
10585
        $needle,
10586
        int $offset = 0,
10587
        string $encoding = 'UTF-8',
10588
        bool $clean_utf8 = false
10589
    ) {
10590 52
        if ($haystack === '') {
10591 4
            if (\PHP_VERSION_ID >= 80000) {
10592
                if ($needle === '') {
10593
                    return 0;
10594
                }
10595
            } else {
10596 4
                return false;
10597
            }
10598
        }
10599
10600
        // iconv and mbstring do not support integer $needle
10601 51
        if ((int) $needle === $needle) {
10602
            $needle = (string) self::chr($needle);
10603
        }
10604 51
        $needle = (string) $needle;
10605
10606 51
        if ($haystack === '') {
10607
            if (\PHP_VERSION_ID >= 80000 && $needle === '') {
10608
                return 0;
10609
            }
10610
10611
            return false;
10612
        }
10613
10614 51
        if ($needle === '' && \PHP_VERSION_ID < 80000) {
10615 2
            return false;
10616
        }
10617
10618 51
        if ($clean_utf8) {
10619
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10620
            // if invalid characters are found in $haystack before $needle
10621 3
            $needle = self::clean($needle);
10622 3
            $haystack = self::clean($haystack);
10623
        }
10624
10625 51
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10626 10
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10627
        }
10628
10629
        //
10630
        // fallback via mbstring
10631
        //
10632
10633 51
        if (self::$SUPPORT['mbstring'] === true) {
10634 49
            if ($encoding === 'UTF-8') {
10635
                /** @noinspection PhpUsageOfSilenceOperatorInspection - Offset not contained in string */
10636 49
                return @\mb_strpos($haystack, $needle, $offset);
10637
            }
10638
10639
            /** @noinspection PhpUsageOfSilenceOperatorInspection - Offset not contained in string */
10640 2
            return @\mb_strpos($haystack, $needle, $offset, $encoding);
10641
        }
10642
10643
        //
10644
        // fallback for binary || ascii only
10645
        //
10646
        if (
10647 4
            $encoding === 'CP850'
10648
            ||
10649 4
            $encoding === 'ASCII'
10650
        ) {
10651 2
            return \strpos($haystack, $needle, $offset);
10652
        }
10653
10654
        if (
10655 4
            $encoding !== 'UTF-8'
10656
            &&
10657 4
            self::$SUPPORT['iconv'] === false
10658
            &&
10659 4
            self::$SUPPORT['mbstring'] === false
10660
        ) {
10661
            /**
10662
             * @psalm-suppress ImpureFunctionCall - is is only a warning
10663
             */
10664 2
            \trigger_error('UTF8::strpos() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10665
        }
10666
10667
        //
10668
        // fallback via intl
10669
        //
10670
10671
        if (
10672 4
            $encoding === 'UTF-8' // INFO: "grapheme_strpos()" can't handle other encodings
10673
            &&
10674 4
            $offset >= 0 // grapheme_strpos() can't handle negative offset
10675
            &&
10676 4
            self::$SUPPORT['intl'] === true
10677
        ) {
10678
            $return_tmp = \grapheme_strpos($haystack, $needle, $offset);
10679
            if ($return_tmp !== false) {
10680
                return $return_tmp;
10681
            }
10682
        }
10683
10684
        //
10685
        // fallback via iconv
10686
        //
10687
10688
        if (
10689 4
            $offset >= 0 // iconv_strpos() can't handle negative offset
10690
            &&
10691 4
            self::$SUPPORT['iconv'] === true
10692
        ) {
10693
            // ignore invalid negative offset to keep compatibility
10694
            // with php < 5.5.35, < 5.6.21, < 7.0.6
10695
            $return_tmp = \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
10696
            if ($return_tmp !== false) {
10697
                return $return_tmp;
10698
            }
10699
        }
10700
10701
        //
10702
        // fallback for ascii only
10703
        //
10704
10705 4
        if (ASCII::is_ascii($haystack . $needle)) {
10706
            /** @noinspection PhpUsageOfSilenceOperatorInspection - Offset not contained in string */
10707 2
            return @\strpos($haystack, $needle, $offset);
10708
        }
10709
10710
        //
10711
        // fallback via vanilla php
10712
        //
10713
10714 4
        $haystack_tmp = self::substr($haystack, $offset, null, $encoding);
10715 4
        if ($haystack_tmp === false) {
10716
            $haystack_tmp = '';
10717
        }
10718 4
        $haystack = (string) $haystack_tmp;
10719
10720 4
        if ($offset < 0) {
10721
            $offset = 0;
10722
        }
10723
10724 4
        $pos = \strpos($haystack, $needle);
10725 4
        if ($pos === false) {
10726 3
            return false;
10727
        }
10728
10729 4
        if ($pos) {
10730 4
            return $offset + (int) self::strlen(\substr($haystack, 0, $pos), $encoding);
10731
        }
10732
10733 2
        return $offset + 0;
10734
    }
10735
10736
    /**
10737
     * Find the position of the first occurrence of a substring in a string.
10738
     *
10739
     * @param string $haystack <p>
10740
     *                         The string being checked.
10741
     *                         </p>
10742
     * @param string $needle   <p>
10743
     *                         The position counted from the beginning of haystack.
10744
     *                         </p>
10745
     * @param int    $offset   [optional] <p>
10746
     *                         The search offset. If it is not specified, 0 is used.
10747
     *                         </p>
10748
     *
10749
     * @psalm-pure
10750
     *
10751
     * @return false|int
10752
     *                   <p>The numeric position of the first occurrence of needle in the
10753
     *                   haystack string. If needle is not found, it returns false.</p>
10754
     */
10755 2
    public static function strpos_in_byte(string $haystack, string $needle, int $offset = 0)
10756
    {
10757 2
        if ($haystack === '' || $needle === '') {
10758
            return false;
10759
        }
10760
10761 2
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10762
            // "mb_" is available if overload is used, so use it ...
10763
            return \mb_strpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
10764
        }
10765
10766 2
        return \strpos($haystack, $needle, $offset);
10767
    }
10768
10769
    /**
10770
     * Find the position of the first occurrence of a substring in a string, case-insensitive.
10771
     *
10772
     * @param string $haystack <p>
10773
     *                         The string being checked.
10774
     *                         </p>
10775
     * @param string $needle   <p>
10776
     *                         The position counted from the beginning of haystack.
10777
     *                         </p>
10778
     * @param int    $offset   [optional] <p>
10779
     *                         The search offset. If it is not specified, 0 is used.
10780
     *                         </p>
10781
     *
10782
     * @psalm-pure
10783
     *
10784
     * @return false|int
10785
     *                   <p>The numeric position of the first occurrence of needle in the
10786
     *                   haystack string. If needle is not found, it returns false.</p>
10787
     */
10788 2
    public static function stripos_in_byte(string $haystack, string $needle, int $offset = 0)
10789
    {
10790 2
        if ($haystack === '' || $needle === '') {
10791
            return false;
10792
        }
10793
10794 2
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10795
            // "mb_" is available if overload is used, so use it ...
10796
            return \mb_stripos($haystack, $needle, $offset, 'CP850'); // 8-BIT
10797
        }
10798
10799 2
        return \stripos($haystack, $needle, $offset);
10800
    }
10801
10802
    /**
10803
     * Find the last occurrence of a character in a string within another.
10804
     *
10805
     * EXAMPLE: <code>UTF8::strrchr('κόσμεκόσμε-äöü', 'κόσμε'); // 'κόσμε-äöü'</code>
10806
     *
10807
     * @see http://php.net/manual/en/function.mb-strrchr.php
10808
     *
10809
     * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
10810
     * @param string $needle        <p>The string to find in haystack</p>
10811
     * @param bool   $before_needle [optional] <p>
10812
     *                              Determines which portion of haystack
10813
     *                              this function returns.
10814
     *                              If set to true, it returns all of haystack
10815
     *                              from the beginning to the last occurrence of needle.
10816
     *                              If set to false, it returns all of haystack
10817
     *                              from the last occurrence of needle to the end,
10818
     *                              </p>
10819
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
10820
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
10821
     *
10822
     * @psalm-pure
10823
     *
10824
     * @return false|string
10825
     *                      <p>The portion of haystack or false if needle is not found.</p>
10826
     */
10827 2
    public static function strrchr(
10828
        string $haystack,
10829
        string $needle,
10830
        bool $before_needle = false,
10831
        string $encoding = 'UTF-8',
10832
        bool $clean_utf8 = false
10833
    ) {
10834 2
        if ($haystack === '' || $needle === '') {
10835 2
            return false;
10836
        }
10837
10838 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10839 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10840
        }
10841
10842 2
        if ($clean_utf8) {
10843
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10844
            // if invalid characters are found in $haystack before $needle
10845 2
            $needle = self::clean($needle);
10846 2
            $haystack = self::clean($haystack);
10847
        }
10848
10849
        //
10850
        // fallback via mbstring
10851
        //
10852
10853 2
        if (self::$SUPPORT['mbstring'] === true) {
10854 2
            if ($encoding === 'UTF-8') {
10855 2
                return \mb_strrchr($haystack, $needle, $before_needle);
10856
            }
10857
10858 2
            return \mb_strrchr($haystack, $needle, $before_needle, $encoding);
10859
        }
10860
10861
        //
10862
        // fallback for binary || ascii only
10863
        //
10864
10865
        if (
10866
            !$before_needle
10867
            &&
10868
            (
10869
                $encoding === 'CP850'
10870
                ||
10871
                $encoding === 'ASCII'
10872
            )
10873
        ) {
10874
            return \strrchr($haystack, $needle);
10875
        }
10876
10877
        if (
10878
            $encoding !== 'UTF-8'
10879
            &&
10880
            self::$SUPPORT['mbstring'] === false
10881
        ) {
10882
            /**
10883
             * @psalm-suppress ImpureFunctionCall - is is only a warning
10884
             */
10885
            \trigger_error('UTF8::strrchr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10886
        }
10887
10888
        //
10889
        // fallback via iconv
10890
        //
10891
10892
        if (self::$SUPPORT['iconv'] === true) {
10893
            $needle_tmp = self::substr($needle, 0, 1, $encoding);
10894
            if ($needle_tmp === false) {
10895
                return false;
10896
            }
10897
            $needle = (string) $needle_tmp;
10898
10899
            $pos = \iconv_strrpos($haystack, $needle, $encoding);
10900
            if ($pos === false) {
10901
                return false;
10902
            }
10903
10904
            if ($before_needle) {
10905
                return self::substr($haystack, 0, $pos, $encoding);
10906
            }
10907
10908
            return self::substr($haystack, $pos, null, $encoding);
10909
        }
10910
10911
        //
10912
        // fallback via vanilla php
10913
        //
10914
10915
        $needle_tmp = self::substr($needle, 0, 1, $encoding);
10916
        if ($needle_tmp === false) {
10917
            return false;
10918
        }
10919
        $needle = (string) $needle_tmp;
10920
10921
        $pos = self::strrpos($haystack, $needle, 0, $encoding);
10922
        if ($pos === false) {
10923
            return false;
10924
        }
10925
10926
        if ($before_needle) {
10927
            return self::substr($haystack, 0, $pos, $encoding);
10928
        }
10929
10930
        return self::substr($haystack, $pos, null, $encoding);
10931
    }
10932
10933
    /**
10934
     * Reverses characters order in the string.
10935
     *
10936
     * EXAMPLE: <code>UTF8::strrev('κ-öäü'); // 'üäö-κ'</code>
10937
     *
10938
     * @param string $str      <p>The input string.</p>
10939
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
10940
     *
10941
     * @psalm-pure
10942
     *
10943
     * @return string
10944
     *                <p>The string with characters in the reverse sequence.</p>
10945
     */
10946 10
    public static function strrev(string $str, string $encoding = 'UTF-8'): string
10947
    {
10948 10
        if ($str === '') {
10949 4
            return '';
10950
        }
10951
10952
        // init
10953 8
        $reversed = '';
10954
10955 8
        $str = self::emoji_encode($str, true);
10956
10957 8
        if ($encoding === 'UTF-8') {
10958 8
            if (self::$SUPPORT['intl'] === true) {
10959
                // try "grapheme" first: https://stackoverflow.com/questions/17496493/strrev-dosent-support-utf-8
10960 8
                $i = (int) \grapheme_strlen($str);
10961 8
                while ($i--) {
10962 8
                    $reversed_tmp = \grapheme_substr($str, $i, 1);
10963 8
                    if ($reversed_tmp !== false) {
10964 8
                        $reversed .= $reversed_tmp;
10965
                    }
10966
                }
10967
            } else {
10968
                $i = (int) \mb_strlen($str);
10969 8
                while ($i--) {
10970
                    $reversed_tmp = \mb_substr($str, $i, 1);
10971
                    if ($reversed_tmp !== false) {
10972
                        $reversed .= $reversed_tmp;
10973
                    }
10974
                }
10975
            }
10976
        } else {
10977
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10978
10979
            $i = (int) self::strlen($str, $encoding);
10980
            while ($i--) {
10981
                $reversed_tmp = self::substr($str, $i, 1, $encoding);
10982
                if ($reversed_tmp !== false) {
10983
                    $reversed .= $reversed_tmp;
10984
                }
10985
            }
10986
        }
10987
10988 8
        return self::emoji_decode($reversed, true);
10989
    }
10990
10991
    /**
10992
     * Find the last occurrence of a character in a string within another, case-insensitive.
10993
     *
10994
     * EXAMPLE: <code>UTF8::strrichr('Aκόσμεκόσμε-äöü', 'aκόσμε'); // 'Aκόσμεκόσμε-äöü'</code>
10995
     *
10996
     * @see http://php.net/manual/en/function.mb-strrichr.php
10997
     *
10998
     * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
10999
     * @param string $needle        <p>The string to find in haystack.</p>
11000
     * @param bool   $before_needle [optional] <p>
11001
     *                              Determines which portion of haystack
11002
     *                              this function returns.
11003
     *                              If set to true, it returns all of haystack
11004
     *                              from the beginning to the last occurrence of needle.
11005
     *                              If set to false, it returns all of haystack
11006
     *                              from the last occurrence of needle to the end,
11007
     *                              </p>
11008
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
11009
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
11010
     *
11011
     * @psalm-pure
11012
     *
11013
     * @return false|string
11014
     *                      <p>The portion of haystack or<br>false if needle is not found.</p>
11015
     */
11016 3
    public static function strrichr(
11017
        string $haystack,
11018
        string $needle,
11019
        bool $before_needle = false,
11020
        string $encoding = 'UTF-8',
11021
        bool $clean_utf8 = false
11022
    ) {
11023 3
        if ($haystack === '' || $needle === '') {
11024 2
            return false;
11025
        }
11026
11027 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
11028 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11029
        }
11030
11031 3
        if ($clean_utf8) {
11032
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11033
            // if invalid characters are found in $haystack before $needle
11034 2
            $needle = self::clean($needle);
11035 2
            $haystack = self::clean($haystack);
11036
        }
11037
11038
        //
11039
        // fallback via mbstring
11040
        //
11041
11042 3
        if (self::$SUPPORT['mbstring'] === true) {
11043 3
            if ($encoding === 'UTF-8') {
11044 3
                return \mb_strrichr($haystack, $needle, $before_needle);
11045
            }
11046
11047 2
            return \mb_strrichr($haystack, $needle, $before_needle, $encoding);
11048
        }
11049
11050
        //
11051
        // fallback via vanilla php
11052
        //
11053
11054
        $needle_tmp = self::substr($needle, 0, 1, $encoding);
11055
        if ($needle_tmp === false) {
11056
            return false;
11057
        }
11058
        $needle = (string) $needle_tmp;
11059
11060
        $pos = self::strripos($haystack, $needle, 0, $encoding);
11061
        if ($pos === false) {
11062
            return false;
11063
        }
11064
11065
        if ($before_needle) {
11066
            return self::substr($haystack, 0, $pos, $encoding);
11067
        }
11068
11069
        return self::substr($haystack, $pos, null, $encoding);
11070
    }
11071
11072
    /**
11073
     * Find the position of the last occurrence of a substring in a string, case-insensitive.
11074
     *
11075
     * EXAMPLE: <code>UTF8::strripos('ABC-ÖÄÜ-中文空白-中文空白', '中'); // 13</code>
11076
     *
11077
     * @param string     $haystack   <p>The string to look in.</p>
11078
     * @param int|string $needle     <p>The string to look for.</p>
11079
     * @param int        $offset     [optional] <p>Number of characters to ignore in the beginning or end.</p>
11080
     * @param string     $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
11081
     * @param bool       $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
11082
     *
11083
     * @psalm-pure
11084
     *
11085
     * @return false|int
11086
     *                   <p>The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
11087
     *                   string.<br>If needle is not found, it returns false.</p>
11088
     */
11089 14
    public static function strripos(
11090
        string $haystack,
11091
        $needle,
11092
        int $offset = 0,
11093
        string $encoding = 'UTF-8',
11094
        bool $clean_utf8 = false
11095
    ) {
11096 14
        if ($haystack === '') {
11097 3
            if (\PHP_VERSION_ID >= 80000) {
11098
                if ($needle === '') {
11099
                    return 0;
11100
                }
11101
            } else {
11102 3
                return false;
11103
            }
11104
        }
11105
11106
        // iconv and mbstring do not support integer $needle
11107 14
        if ((int) $needle === $needle && $needle >= 0) {
11108
            $needle = (string) self::chr($needle);
11109
        }
11110 14
        $needle = (string) $needle;
11111
11112 14
        if ($haystack === '') {
11113
            if (\PHP_VERSION_ID >= 80000 && $needle === '') {
11114
                return 0;
11115
            }
11116
11117
            return false;
11118
        }
11119
11120 14
        if ($needle === '' && \PHP_VERSION_ID < 80000) {
11121 3
            return false;
11122
        }
11123
11124 14
        if ($clean_utf8) {
11125
            // mb_strripos() && iconv_strripos() is not tolerant to invalid characters
11126 3
            $needle = self::clean($needle);
11127 3
            $haystack = self::clean($haystack);
11128
        }
11129
11130 14
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
11131 9
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11132
        }
11133
11134
        //
11135
        // fallback via mbstrig
11136
        //
11137
11138 14
        if (self::$SUPPORT['mbstring'] === true) {
11139 14
            if ($encoding === 'UTF-8') {
11140 14
                return \mb_strripos($haystack, $needle, $offset);
11141
            }
11142
11143
            return \mb_strripos($haystack, $needle, $offset, $encoding);
11144
        }
11145
11146
        //
11147
        // fallback for binary || ascii only
11148
        //
11149
11150
        if (
11151
            $encoding === 'CP850'
11152
            ||
11153
            $encoding === 'ASCII'
11154
        ) {
11155
            return \strripos($haystack, $needle, $offset);
11156
        }
11157
11158
        if (
11159
            $encoding !== 'UTF-8'
11160
            &&
11161
            self::$SUPPORT['mbstring'] === false
11162
        ) {
11163
            /**
11164
             * @psalm-suppress ImpureFunctionCall - is is only a warning
11165
             */
11166
            \trigger_error('UTF8::strripos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
11167
        }
11168
11169
        //
11170
        // fallback via intl
11171
        //
11172
11173
        if (
11174
            $encoding === 'UTF-8' // INFO: "grapheme_strripos()" can't handle other encodings
11175
            &&
11176
            $offset >= 0 // grapheme_strripos() can't handle negative offset
11177
            &&
11178
            self::$SUPPORT['intl'] === true
11179
        ) {
11180
            $return_tmp = \grapheme_strripos($haystack, $needle, $offset);
11181
            if ($return_tmp !== false) {
11182
                return $return_tmp;
11183
            }
11184
        }
11185
11186
        //
11187
        // fallback for ascii only
11188
        //
11189
11190
        if (ASCII::is_ascii($haystack . $needle)) {
11191
            return \strripos($haystack, $needle, $offset);
11192
        }
11193
11194
        //
11195
        // fallback via vanilla php
11196
        //
11197
11198
        $haystack = self::strtocasefold($haystack, true, false, $encoding);
11199
        $needle = self::strtocasefold($needle, true, false, $encoding);
11200
11201
        return self::strrpos($haystack, $needle, $offset, $encoding, $clean_utf8);
11202
    }
11203
11204
    /**
11205
     * Finds position of last occurrence of a string within another, case-insensitive.
11206
     *
11207
     * @param string $haystack <p>
11208
     *                         The string from which to get the position of the last occurrence
11209
     *                         of needle.
11210
     *                         </p>
11211
     * @param string $needle   <p>
11212
     *                         The string to find in haystack.
11213
     *                         </p>
11214
     * @param int    $offset   [optional] <p>
11215
     *                         The position in haystack
11216
     *                         to start searching.
11217
     *                         </p>
11218
     *
11219
     * @psalm-pure
11220
     *
11221
     * @return false|int
11222
     *                   <p>eturn the numeric position of the last occurrence of needle in the
11223
     *                   haystack string, or false if needle is not found.</p>
11224
     */
11225 2
    public static function strripos_in_byte(string $haystack, string $needle, int $offset = 0)
11226
    {
11227 2
        if ($haystack === '' || $needle === '') {
11228
            return false;
11229
        }
11230
11231 2
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
11232
            // "mb_" is available if overload is used, so use it ...
11233
            return \mb_strripos($haystack, $needle, $offset, 'CP850'); // 8-BIT
11234
        }
11235
11236 2
        return \strripos($haystack, $needle, $offset);
11237
    }
11238
11239
    /**
11240
     * Find the position of the last occurrence of a substring in a string.
11241
     *
11242
     * EXAMPLE: <code>UTF8::strrpos('ABC-ÖÄÜ-中文空白-中文空白', '中'); // 13</code>
11243
     *
11244
     * @see http://php.net/manual/en/function.mb-strrpos.php
11245
     *
11246
     * @param string     $haystack   <p>The string being checked, for the last occurrence of needle</p>
11247
     * @param int|string $needle     <p>The string to find in haystack.<br>Or a code point as int.</p>
11248
     * @param int        $offset     [optional] <p>May be specified to begin searching an arbitrary number of characters
11249
     *                               into the string. Negative values will stop searching at an arbitrary point prior to
11250
     *                               the end of the string.
11251
     *                               </p>
11252
     * @param string     $encoding   [optional] <p>Set the charset.</p>
11253
     * @param bool       $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
11254
     *
11255
     * @psalm-pure
11256
     *
11257
     * @return false|int
11258
     *                   <p>The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
11259
     *                   string.<br>If needle is not found, it returns false.</p>
11260
     */
11261 35
    public static function strrpos(
11262
        string $haystack,
11263
        $needle,
11264
        int $offset = 0,
11265
        string $encoding = 'UTF-8',
11266
        bool $clean_utf8 = false
11267
    ) {
11268 35
        if ($haystack === '') {
11269 4
            if (\PHP_VERSION_ID >= 80000) {
11270
                if ($needle === '') {
11271
                    return 0;
11272
                }
11273
            } else {
11274 4
                return false;
11275
            }
11276
        }
11277
11278
        // iconv and mbstring do not support integer $needle
11279 34
        if ((int) $needle === $needle && $needle >= 0) {
11280 1
            $needle = (string) self::chr($needle);
11281
        }
11282 34
        $needle = (string) $needle;
11283
11284 34
        if ($haystack === '') {
11285
            if (\PHP_VERSION_ID >= 80000 && $needle === '') {
11286
                return 0;
11287
            }
11288
11289
            return false;
11290
        }
11291
11292 34
        if ($needle === '' && \PHP_VERSION_ID < 80000) {
11293 2
            return false;
11294
        }
11295
11296 34
        if ($clean_utf8) {
11297
            // mb_strrpos && iconv_strrpos is not tolerant to invalid characters
11298 4
            $needle = self::clean($needle);
11299 4
            $haystack = self::clean($haystack);
11300
        }
11301
11302 34
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
11303 8
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11304
        }
11305
11306
        //
11307
        // fallback via mbstring
11308
        //
11309
11310 34
        if (self::$SUPPORT['mbstring'] === true) {
11311 34
            if ($encoding === 'UTF-8') {
11312 34
                return \mb_strrpos($haystack, $needle, $offset);
11313
            }
11314
11315 2
            return \mb_strrpos($haystack, $needle, $offset, $encoding);
11316
        }
11317
11318
        //
11319
        // fallback for binary || ascii only
11320
        //
11321
11322
        if (
11323
            $encoding === 'CP850'
11324
            ||
11325
            $encoding === 'ASCII'
11326
        ) {
11327
            return \strrpos($haystack, $needle, $offset);
11328
        }
11329
11330
        if (
11331
            $encoding !== 'UTF-8'
11332
            &&
11333
            self::$SUPPORT['mbstring'] === false
11334
        ) {
11335
            /**
11336
             * @psalm-suppress ImpureFunctionCall - is is only a warning
11337
             */
11338
            \trigger_error('UTF8::strrpos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
11339
        }
11340
11341
        //
11342
        // fallback via intl
11343
        //
11344
11345
        if (
11346
            $offset >= 0 // grapheme_strrpos() can't handle negative offset
11347
            &&
11348
            $encoding === 'UTF-8' // INFO: "grapheme_strrpos()" can't handle other encodings
11349
            &&
11350
            self::$SUPPORT['intl'] === true
11351
        ) {
11352
            $return_tmp = \grapheme_strrpos($haystack, $needle, $offset);
11353
            if ($return_tmp !== false) {
11354
                return $return_tmp;
11355
            }
11356
        }
11357
11358
        //
11359
        // fallback for ascii only
11360
        //
11361
11362
        if (ASCII::is_ascii($haystack . $needle)) {
11363
            return \strrpos($haystack, $needle, $offset);
11364
        }
11365
11366
        //
11367
        // fallback via vanilla php
11368
        //
11369
11370
        $haystack_tmp = null;
11371
        if ($offset > 0) {
11372
            $haystack_tmp = self::substr($haystack, $offset);
11373
        } elseif ($offset < 0) {
11374
            $haystack_tmp = self::substr($haystack, 0, $offset);
11375
            $offset = 0;
11376
        }
11377
11378
        if ($haystack_tmp !== null) {
11379
            if ($haystack_tmp === false) {
11380
                $haystack_tmp = '';
11381
            }
11382
            $haystack = (string) $haystack_tmp;
11383
        }
11384
11385
        $pos = \strrpos($haystack, $needle);
11386
        if ($pos === false) {
11387
            return false;
11388
        }
11389
11390
        /** @var false|string $str_tmp - needed for PhpStan (stubs error) */
11391
        $str_tmp = \substr($haystack, 0, $pos);
11392
        if ($str_tmp === false) {
11393
            return false;
11394
        }
11395
11396
        return $offset + (int) self::strlen($str_tmp);
11397
    }
11398
11399
    /**
11400
     * Find the position of the last occurrence of a substring in a string.
11401
     *
11402
     * @param string $haystack <p>
11403
     *                         The string being checked, for the last occurrence
11404
     *                         of needle.
11405
     *                         </p>
11406
     * @param string $needle   <p>
11407
     *                         The string to find in haystack.
11408
     *                         </p>
11409
     * @param int    $offset   [optional] <p>May be specified to begin searching an arbitrary number of characters into
11410
     *                         the string. Negative values will stop searching at an arbitrary point
11411
     *                         prior to the end of the string.
11412
     *                         </p>
11413
     *
11414
     * @psalm-pure
11415
     *
11416
     * @return false|int
11417
     *                   <p>The numeric position of the last occurrence of needle in the
11418
     *                   haystack string. If needle is not found, it returns false.</p>
11419
     */
11420 2
    public static function strrpos_in_byte(string $haystack, string $needle, int $offset = 0)
11421
    {
11422 2
        if ($haystack === '' || $needle === '') {
11423
            return false;
11424
        }
11425
11426 2
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
11427
            // "mb_" is available if overload is used, so use it ...
11428
            return \mb_strrpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
11429
        }
11430
11431 2
        return \strrpos($haystack, $needle, $offset);
11432
    }
11433
11434
    /**
11435
     * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
11436
     * mask.
11437
     *
11438
     * EXAMPLE: <code>UTF8::strspn('iñtërnâtiônàlizætiøn', 'itñ'); // '3'</code>
11439
     *
11440
     * @param string   $str      <p>The input string.</p>
11441
     * @param string   $mask     <p>The mask of chars</p>
11442
     * @param int      $offset   [optional]
11443
     * @param int|null $length   [optional]
11444
     * @param string   $encoding [optional] <p>Set the charset.</p>
11445
     *
11446
     * @psalm-pure
11447
     *
11448
     * @return false|int
11449
     */
11450 10
    public static function strspn(
11451
        string $str,
11452
        string $mask,
11453
        int $offset = 0,
11454
        int $length = null,
11455
        string $encoding = 'UTF-8'
11456
    ) {
11457 10
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
11458
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11459
        }
11460
11461 10
        if ($offset || $length !== null) {
11462 2
            if ($encoding === 'UTF-8') {
11463 2
                if ($length === null) {
11464
                    $str = (string) \mb_substr($str, $offset);
11465
                } else {
11466 2
                    $str = (string) \mb_substr($str, $offset, $length);
11467
                }
11468
            } else {
11469
                $str = (string) self::substr($str, $offset, $length, $encoding);
11470
            }
11471
        }
11472
11473 10
        if ($str === '' || $mask === '') {
11474 2
            return 0;
11475
        }
11476
11477 8
        $matches = [];
11478
11479 8
        return \preg_match('/^' . self::rxClass($mask) . '+/u', $str, $matches) ? (int) self::strlen($matches[0], $encoding) : 0;
11480
    }
11481
11482
    /**
11483
     * Returns part of haystack string from the first occurrence of needle to the end of haystack.
11484
     *
11485
     * EXAMPLE: <code>
11486
     * $str = 'iñtërnâtiônàlizætiøn';
11487
     * $search = 'nât';
11488
     *
11489
     * UTF8::strstr($str, $search)); // 'nâtiônàlizætiøn'
11490
     * UTF8::strstr($str, $search, true)); // 'iñtër'
11491
     * </code>
11492
     *
11493
     * @param string $haystack      <p>The input string. Must be valid UTF-8.</p>
11494
     * @param string $needle        <p>The string to look for. Must be valid UTF-8.</p>
11495
     * @param bool   $before_needle [optional] <p>
11496
     *                              If <b>TRUE</b>, strstr() returns the part of the
11497
     *                              haystack before the first occurrence of the needle (excluding the needle).
11498
     *                              </p>
11499
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
11500
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
11501
     *
11502
     * @psalm-pure
11503
     *
11504
     * @return false|string
11505
     *                      <p>A sub-string,<br>or <strong>false</strong> if needle is not found.</p>
11506
     */
11507 3
    public static function strstr(
11508
        string $haystack,
11509
        string $needle,
11510
        bool $before_needle = false,
11511
        string $encoding = 'UTF-8',
11512
        bool $clean_utf8 = false
11513
    ) {
11514 3
        if ($haystack === '') {
11515 2
            if (\PHP_VERSION_ID >= 80000 && $needle === '') {
11516
                return '';
11517
            }
11518
11519 2
            return false;
11520
        }
11521
11522 3
        if ($clean_utf8) {
11523
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11524
            // if invalid characters are found in $haystack before $needle
11525
            $needle = self::clean($needle);
11526
            $haystack = self::clean($haystack);
11527
        }
11528
11529 3
        if ($needle === '') {
11530 1
            if (\PHP_VERSION_ID >= 80000) {
11531
                return $haystack;
11532
            }
11533
11534 1
            return false;
11535
        }
11536
11537 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
11538 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11539
        }
11540
11541
        //
11542
        // fallback via mbstring
11543
        //
11544
11545 3
        if (self::$SUPPORT['mbstring'] === true) {
11546 3
            if ($encoding === 'UTF-8') {
11547 3
                return \mb_strstr($haystack, $needle, $before_needle);
11548
            }
11549
11550 2
            return \mb_strstr($haystack, $needle, $before_needle, $encoding);
11551
        }
11552
11553
        //
11554
        // fallback for binary || ascii only
11555
        //
11556
11557
        if (
11558
            $encoding === 'CP850'
11559
            ||
11560
            $encoding === 'ASCII'
11561
        ) {
11562
            return \strstr($haystack, $needle, $before_needle);
11563
        }
11564
11565
        if (
11566
            $encoding !== 'UTF-8'
11567
            &&
11568
            self::$SUPPORT['mbstring'] === false
11569
        ) {
11570
            /**
11571
             * @psalm-suppress ImpureFunctionCall - is is only a warning
11572
             */
11573
            \trigger_error('UTF8::strstr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
11574
        }
11575
11576
        //
11577
        // fallback via intl
11578
        //
11579
11580
        if (
11581
            $encoding === 'UTF-8' // INFO: "grapheme_strstr()" can't handle other encodings
11582
            &&
11583
            self::$SUPPORT['intl'] === true
11584
        ) {
11585
            $return_tmp = \grapheme_strstr($haystack, $needle, $before_needle);
11586
            if ($return_tmp !== false) {
11587
                return $return_tmp;
11588
            }
11589
        }
11590
11591
        //
11592
        // fallback for ascii only
11593
        //
11594
11595
        if (ASCII::is_ascii($haystack . $needle)) {
11596
            return \strstr($haystack, $needle, $before_needle);
11597
        }
11598
11599
        //
11600
        // fallback via vanilla php
11601
        //
11602
11603
        \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/us', $haystack, $match);
11604
11605
        if (!isset($match[1])) {
11606
            return false;
11607
        }
11608
11609
        if ($before_needle) {
11610
            return $match[1];
11611
        }
11612
11613
        return self::substr($haystack, (int) self::strlen($match[1]));
11614
    }
11615
11616
    /**
11617
     * Finds first occurrence of a string within another.
11618
     *
11619
     * @param string $haystack      <p>
11620
     *                              The string from which to get the first occurrence
11621
     *                              of needle.
11622
     *                              </p>
11623
     * @param string $needle        <p>
11624
     *                              The string to find in haystack.
11625
     *                              </p>
11626
     * @param bool   $before_needle [optional] <p>
11627
     *                              Determines which portion of haystack
11628
     *                              this function returns.
11629
     *                              If set to true, it returns all of haystack
11630
     *                              from the beginning to the first occurrence of needle.
11631
     *                              If set to false, it returns all of haystack
11632
     *                              from the first occurrence of needle to the end,
11633
     *                              </p>
11634
     *
11635
     * @psalm-pure
11636
     *
11637
     * @return false|string
11638
     *                      <p>The portion of haystack,
11639
     *                      or false if needle is not found.</p>
11640
     */
11641 2
    public static function strstr_in_byte(
11642
        string $haystack,
11643
        string $needle,
11644
        bool $before_needle = false
11645
    ) {
11646 2
        if ($haystack === '' || $needle === '') {
11647
            return false;
11648
        }
11649
11650 2
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
11651
            // "mb_" is available if overload is used, so use it ...
11652
            return \mb_strstr($haystack, $needle, $before_needle, 'CP850'); // 8-BIT
11653
        }
11654
11655 2
        return \strstr($haystack, $needle, $before_needle);
11656
    }
11657
11658
    /**
11659
     * Unicode transformation for case-less matching.
11660
     *
11661
     * EXAMPLE: <code>UTF8::strtocasefold('ǰ◌̱'); // 'ǰ◌̱'</code>
11662
     *
11663
     * @see http://unicode.org/reports/tr21/tr21-5.html
11664
     *
11665
     * @param string      $str        <p>The input string.</p>
11666
     * @param bool        $full       [optional] <p>
11667
     *                                <b>true</b>, replace full case folding chars (default)<br>
11668
     *                                <b>false</b>, use only limited static array [UTF8::$COMMON_CASE_FOLD]
11669
     *                                </p>
11670
     * @param bool        $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
11671
     * @param string      $encoding   [optional] <p>Set the charset.</p>
11672
     * @param string|null $lang       [optional] <p>Set the language for special cases: az, el, lt, tr</p>
11673
     * @param bool        $lower      [optional] <p>Use lowercase string, otherwise use uppercase string. PS: uppercase
11674
     *                                is for some languages better ...</p>
11675
     *
11676
     * @psalm-pure
11677
     *
11678
     * @return string
11679
     */
11680 32
    public static function strtocasefold(
11681
        string $str,
11682
        bool $full = true,
11683
        bool $clean_utf8 = false,
11684
        string $encoding = 'UTF-8',
11685
        string $lang = null,
11686
        bool $lower = true
11687
    ): string {
11688 32
        if ($str === '') {
11689 5
            return '';
11690
        }
11691
11692 31
        if ($clean_utf8) {
11693
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11694
            // if invalid characters are found in $haystack before $needle
11695 2
            $str = self::clean($str);
11696
        }
11697
11698 31
        $str = self::fixStrCaseHelper($str, $lower, $full);
11699
11700 31
        if ($lang === null && $encoding === 'UTF-8') {
11701 31
            if ($lower) {
11702 2
                return \mb_strtolower($str);
11703
            }
11704
11705 29
            return \mb_strtoupper($str);
11706
        }
11707
11708 2
        if ($lower) {
11709
            return self::strtolower($str, $encoding, false, $lang);
11710
        }
11711
11712 2
        return self::strtoupper($str, $encoding, false, $lang);
11713
    }
11714
11715
    /**
11716
     * Make a string lowercase.
11717
     *
11718
     * EXAMPLE: <code>UTF8::strtolower('DÉJÀ Σσς Iıİi'); // 'déjà σσς iıii'</code>
11719
     *
11720
     * @see http://php.net/manual/en/function.mb-strtolower.php
11721
     *
11722
     * @param string      $str                           <p>The string being lowercased.</p>
11723
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
11724
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
11725
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
11726
     *                                                   tr</p>
11727
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
11728
     *                                                   -> ß</p>
11729
     *
11730
     * @psalm-pure
11731
     *
11732
     * @return string
11733
     *                <p>String with all alphabetic characters converted to lowercase.</p>
11734
     */
11735 73
    public static function strtolower(
11736
        $str,
11737
        string $encoding = 'UTF-8',
11738
        bool $clean_utf8 = false,
11739
        string $lang = null,
11740
        bool $try_to_keep_the_string_length = false
11741
    ): string {
11742
        // init
11743 73
        $str = (string) $str;
11744
11745 73
        if ($str === '') {
11746 1
            return '';
11747
        }
11748
11749 72
        if ($clean_utf8) {
11750
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11751
            // if invalid characters are found in $haystack before $needle
11752 2
            $str = self::clean($str);
11753
        }
11754
11755
        // hack for old php version or for the polyfill ...
11756 72
        if ($try_to_keep_the_string_length) {
11757
            $str = self::fixStrCaseHelper($str, true);
11758
        }
11759
11760 72
        if ($lang === null && $encoding === 'UTF-8') {
11761 13
            return \mb_strtolower($str);
11762
        }
11763
11764 61
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
11765
11766 61
        if ($lang !== null) {
11767 2
            if (self::$SUPPORT['intl'] === true) {
11768 2
                if (self::$INTL_TRANSLITERATOR_LIST === null) {
11769
                    self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
11770
                }
11771
11772 2
                $language_code = $lang . '-Lower';
11773 2
                if (!\in_array($language_code, self::$INTL_TRANSLITERATOR_LIST, true)) {
11774
                    /**
11775
                     * @psalm-suppress ImpureFunctionCall - is is only a warning
11776
                     */
11777
                    \trigger_error('UTF8::strtolower() cannot handle special language: ' . $lang . ' | supported: ' . \print_r(self::$INTL_TRANSLITERATOR_LIST, true), \E_USER_WARNING);
11778
11779
                    $language_code = 'Any-Lower';
11780
                }
11781
11782
                /** @noinspection PhpComposerExtensionStubsInspection */
11783
                /** @noinspection UnnecessaryCastingInspection */
11784 2
                return (string) \transliterator_transliterate($language_code, $str);
11785
            }
11786
11787
            /**
11788
             * @psalm-suppress ImpureFunctionCall - is is only a warning
11789
             */
11790
            \trigger_error('UTF8::strtolower() without intl cannot handle the "lang" parameter: ' . $lang, \E_USER_WARNING);
11791
        }
11792
11793
        // always fallback via symfony polyfill
11794 61
        return \mb_strtolower($str, $encoding);
11795
    }
11796
11797
    /**
11798
     * Make a string uppercase.
11799
     *
11800
     * EXAMPLE: <code>UTF8::strtoupper('Déjà Σσς Iıİi'); // 'DÉJÀ ΣΣΣ IIİI'</code>
11801
     *
11802
     * @see http://php.net/manual/en/function.mb-strtoupper.php
11803
     *
11804
     * @param string      $str                           <p>The string being uppercased.</p>
11805
     * @param string      $encoding                      [optional] <p>Set the charset.</p>
11806
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
11807
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
11808
     *                                                   tr</p>
11809
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
11810
     *                                                   -> ß</p>
11811
     *
11812
     * @psalm-pure
11813
     *
11814
     * @return string
11815
     *                <p>String with all alphabetic characters converted to uppercase.</p>
11816
     */
11817 17
    public static function strtoupper(
11818
        $str,
11819
        string $encoding = 'UTF-8',
11820
        bool $clean_utf8 = false,
11821
        string $lang = null,
11822
        bool $try_to_keep_the_string_length = false
11823
    ): string {
11824
        // init
11825 17
        $str = (string) $str;
11826
11827 17
        if ($str === '') {
11828 1
            return '';
11829
        }
11830
11831 16
        if ($clean_utf8) {
11832
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11833
            // if invalid characters are found in $haystack before $needle
11834 2
            $str = self::clean($str);
11835
        }
11836
11837
        // hack for old php version or for the polyfill ...
11838 16
        if ($try_to_keep_the_string_length) {
11839 2
            $str = self::fixStrCaseHelper($str);
11840
        }
11841
11842 16
        if ($lang === null && $encoding === 'UTF-8') {
11843 8
            return \mb_strtoupper($str);
11844
        }
11845
11846 10
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
11847
11848 10
        if ($lang !== null) {
11849 2
            if (self::$SUPPORT['intl'] === true) {
11850 2
                if (self::$INTL_TRANSLITERATOR_LIST === null) {
11851
                    self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
11852
                }
11853
11854 2
                $language_code = $lang . '-Upper';
11855 2
                if (!\in_array($language_code, self::$INTL_TRANSLITERATOR_LIST, true)) {
11856
                    /**
11857
                     * @psalm-suppress ImpureFunctionCall - is is only a warning
11858
                     */
11859
                    \trigger_error('UTF8::strtoupper() without intl for special language: ' . $lang, \E_USER_WARNING);
11860
11861
                    $language_code = 'Any-Upper';
11862
                }
11863
11864
                /** @noinspection PhpComposerExtensionStubsInspection */
11865
                /** @noinspection UnnecessaryCastingInspection */
11866 2
                return (string) \transliterator_transliterate($language_code, $str);
11867
            }
11868
11869
            /**
11870
             * @psalm-suppress ImpureFunctionCall - is is only a warning
11871
             */
11872
            \trigger_error('UTF8::strtolower() without intl cannot handle the "lang"-parameter: ' . $lang, \E_USER_WARNING);
11873
        }
11874
11875
        // always fallback via symfony polyfill
11876 10
        return \mb_strtoupper($str, $encoding);
11877
    }
11878
11879
    /**
11880
     * Translate characters or replace sub-strings.
11881
     *
11882
     * EXAMPLE:
11883
     * <code>
11884
     * $array = [
11885
     *     'Hello'   => '○●◎',
11886
     *     '中文空白' => 'earth',
11887
     * ];
11888
     * UTF8::strtr('Hello 中文空白', $array); // '○●◎ earth'
11889
     * </code>
11890
     *
11891
     * @see http://php.net/manual/en/function.strtr.php
11892
     *
11893
     * @param string          $str  <p>The string being translated.</p>
11894
     * @param string|string[] $from <p>The string replacing from.</p>
11895
     * @param string|string[] $to   [optional] <p>The string being translated to to.</p>
11896
     *
11897
     * @psalm-pure
11898
     *
11899
     * @return string
11900
     *                <p>This function returns a copy of str, translating all occurrences of each character in "from"
11901
     *                to the corresponding character in "to".</p>
11902
     */
11903 2
    public static function strtr(string $str, $from, $to = ''): string
11904
    {
11905 2
        if ($str === '') {
11906
            return '';
11907
        }
11908
11909 2
        if ($from === $to) {
11910
            return $str;
11911
        }
11912
11913 2
        if ($to !== '') {
11914 2
            if (!\is_array($from)) {
11915 2
                $from = self::str_split($from);
11916
            }
11917
11918 2
            if (!\is_array($to)) {
11919 2
                $to = self::str_split($to);
11920
            }
11921
11922 2
            $count_from = \count($from);
11923 2
            $count_to = \count($to);
11924
11925 2
            if ($count_from !== $count_to) {
11926 2
                if ($count_from > $count_to) {
11927 2
                    $from = \array_slice($from, 0, $count_to);
11928 2
                } elseif ($count_from < $count_to) {
11929 2
                    $to = \array_slice($to, 0, $count_from);
11930
                }
11931
            }
11932
11933 2
            $from = \array_combine($from, $to);
11934
            /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
11935 2
            if ($from === false) {
11936
                throw new \InvalidArgumentException('The number of elements for each array isn\'t equal or the arrays are empty: (from: ' . \print_r($from, true) . ' | to: ' . \print_r($to, true) . ')');
11937
            }
11938
        }
11939
11940 2
        if (\is_string($from)) {
11941 2
            return \str_replace($from, $to, $str);
0 ignored issues
show
Bug introduced by
It seems like $to can also be of type array<mixed,array> and array<mixed,string[]>; however, parameter $replace of str_replace() does only seem to accept string|string[], maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

11941
            return \str_replace($from, /** @scrutinizer ignore-type */ $to, $str);
Loading history...
11942
        }
11943
11944 2
        return \strtr($str, $from);
11945
    }
11946
11947
    /**
11948
     * Return the width of a string.
11949
     *
11950
     * INFO: use UTF8::strlen() for the byte-length
11951
     *
11952
     * EXAMPLE: <code>UTF8::strwidth("Iñtërnâtiôn\xE9àlizætiøn")); // 21</code>
11953
     *
11954
     * @param string $str        <p>The input string.</p>
11955
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
11956
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
11957
     *
11958
     * @psalm-pure
11959
     *
11960
     * @return int
11961
     */
11962 2
    public static function strwidth(
11963
        string $str,
11964
        string $encoding = 'UTF-8',
11965
        bool $clean_utf8 = false
11966
    ): int {
11967 2
        if ($str === '') {
11968 2
            return 0;
11969
        }
11970
11971 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
11972 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11973
        }
11974
11975 2
        if ($clean_utf8) {
11976
            // iconv and mbstring are not tolerant to invalid encoding
11977
            // further, their behaviour is inconsistent with that of PHP's substr
11978 2
            $str = self::clean($str);
11979
        }
11980
11981
        //
11982
        // fallback via mbstring
11983
        //
11984
11985 2
        if (self::$SUPPORT['mbstring'] === true) {
11986 2
            if ($encoding === 'UTF-8') {
11987 2
                return \mb_strwidth($str);
11988
            }
11989
11990
            return \mb_strwidth($str, $encoding);
11991
        }
11992
11993
        //
11994
        // fallback via vanilla php
11995
        //
11996
11997
        if ($encoding !== 'UTF-8') {
11998
            $str = self::encode('UTF-8', $str, false, $encoding);
11999
        }
12000
12001
        $wide = 0;
12002
        $str = (string) \preg_replace('/[\x{1100}-\x{115F}\x{2329}\x{232A}\x{2E80}-\x{303E}\x{3040}-\x{A4CF}\x{AC00}-\x{D7A3}\x{F900}-\x{FAFF}\x{FE10}-\x{FE19}\x{FE30}-\x{FE6F}\x{FF00}-\x{FF60}\x{FFE0}-\x{FFE6}\x{20000}-\x{2FFFD}\x{30000}-\x{3FFFD}]/u', '', $str, -1, $wide);
12003
12004
        return ($wide << 1) + (int) self::strlen($str);
12005
    }
12006
12007
    /**
12008
     * Get part of a string.
12009
     *
12010
     * EXAMPLE: <code>UTF8::substr('中文空白', 1, 2); // '文空'</code>
12011
     *
12012
     * @see http://php.net/manual/en/function.mb-substr.php
12013
     *
12014
     * @param string   $str        <p>The string being checked.</p>
12015
     * @param int      $offset     <p>The first position used in str.</p>
12016
     * @param int|null $length     [optional] <p>The maximum length of the returned string.</p>
12017
     * @param string   $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
12018
     * @param bool     $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
12019
     *
12020
     * @psalm-pure
12021
     *
12022
     * @return false|string
12023
     *                      The portion of <i>str</i> specified by the <i>offset</i> and
12024
     *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
12025
     *                      characters long, <b>FALSE</b> will be returned.
12026
     */
12027 172
    public static function substr(
12028
        string $str,
12029
        int $offset = 0,
12030
        int $length = null,
12031
        string $encoding = 'UTF-8',
12032
        bool $clean_utf8 = false
12033
    ) {
12034
        // empty string
12035 172
        if ($str === '' || $length === 0) {
12036 8
            return '';
12037
        }
12038
12039 168
        if ($clean_utf8) {
12040
            // iconv and mbstring are not tolerant to invalid encoding
12041
            // further, their behaviour is inconsistent with that of PHP's substr
12042 2
            $str = self::clean($str);
12043
        }
12044
12045
        // whole string
12046 168
        if (!$offset && $length === null) {
12047 7
            return $str;
12048
        }
12049
12050 163
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
12051 19
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
12052
        }
12053
12054
        //
12055
        // fallback via mbstring
12056
        //
12057
12058 163
        if (self::$SUPPORT['mbstring'] === true && $encoding === 'UTF-8') {
12059 161
            if ($length === null) {
12060 64
                return \mb_substr($str, $offset);
12061
            }
12062
12063 102
            return \mb_substr($str, $offset, $length);
12064
        }
12065
12066
        //
12067
        // fallback for binary || ascii only
12068
        //
12069
12070
        if (
12071 4
            $encoding === 'CP850'
12072
            ||
12073 4
            $encoding === 'ASCII'
12074
        ) {
12075
            if ($length === null) {
12076
                return \substr($str, $offset);
12077
            }
12078
12079
            return \substr($str, $offset, $length);
12080
        }
12081
12082
        // otherwise we need the string-length
12083 4
        $str_length = 0;
12084 4
        if ($offset || $length === null) {
12085 4
            $str_length = self::strlen($str, $encoding);
12086
        }
12087
12088
        // e.g.: invalid chars + mbstring not installed
12089 4
        if ($str_length === false) {
12090
            return false;
12091
        }
12092
12093
        // empty string
12094 4
        if ($offset === $str_length && !$length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type integer|null is loosely compared to false; this is ambiguous if the integer can be 0. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
12095
            return '';
12096
        }
12097
12098
        // impossible
12099 4
        if ($offset && $offset > $str_length) {
12100
            return '';
12101
        }
12102
12103 4
        $length = $length ?? (int) $str_length;
12104
12105
        if (
12106 4
            $encoding !== 'UTF-8'
12107
            &&
12108 4
            self::$SUPPORT['mbstring'] === false
12109
        ) {
12110
            /**
12111
             * @psalm-suppress ImpureFunctionCall - is is only a warning
12112
             */
12113 2
            \trigger_error('UTF8::substr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
12114
        }
12115
12116
        //
12117
        // fallback via intl
12118
        //
12119
12120
        if (
12121 4
            $encoding === 'UTF-8' // INFO: "grapheme_substr()" can't handle other encodings
12122
            &&
12123 4
            $offset >= 0 // grapheme_substr() can't handle negative offset
12124
            &&
12125 4
            self::$SUPPORT['intl'] === true
12126
        ) {
12127
            $return_tmp = \grapheme_substr($str, $offset, $length);
12128
            if ($return_tmp !== false) {
12129
                return $return_tmp;
12130
            }
12131
        }
12132
12133
        //
12134
        // fallback via iconv
12135
        //
12136
12137
        if (
12138 4
            $length >= 0 // "iconv_substr()" can't handle negative length
12139
            &&
12140 4
            self::$SUPPORT['iconv'] === true
12141
        ) {
12142
            $return_tmp = \iconv_substr($str, $offset, $length);
12143
            if ($return_tmp !== false) {
12144
                return $return_tmp;
12145
            }
12146
        }
12147
12148
        //
12149
        // fallback for ascii only
12150
        //
12151
12152 4
        if (ASCII::is_ascii($str)) {
12153
            return \substr($str, $offset, $length);
12154
        }
12155
12156
        //
12157
        // fallback via vanilla php
12158
        //
12159
12160
        // split to array, and remove invalid characters
12161 4
        $array = self::str_split($str);
12162
12163
        // extract relevant part, and join to make sting again
12164 4
        return \implode('', \array_slice($array, $offset, $length));
12165
    }
12166
12167
    /**
12168
     * Binary-safe comparison of two strings from an offset, up to a length of characters.
12169
     *
12170
     * EXAMPLE: <code>
12171
     * UTF8::substr_compare("○●◎\r", '●◎', 0, 2); // -1
12172
     * UTF8::substr_compare("○●◎\r", '◎●', 1, 2); // 1
12173
     * UTF8::substr_compare("○●◎\r", '●◎', 1, 2); // 0
12174
     * </code>
12175
     *
12176
     * @param string   $str1               <p>The main string being compared.</p>
12177
     * @param string   $str2               <p>The secondary string being compared.</p>
12178
     * @param int      $offset             [optional] <p>The start position for the comparison. If negative, it starts
12179
     *                                     counting from the end of the string.</p>
12180
     * @param int|null $length             [optional] <p>The length of the comparison. The default value is the largest
12181
     *                                     of the length of the str compared to the length of main_str less the
12182
     *                                     offset.</p>
12183
     * @param bool     $case_insensitivity [optional] <p>If case_insensitivity is TRUE, comparison is case
12184
     *                                     insensitive.</p>
12185
     * @param string   $encoding           [optional] <p>Set the charset for e.g. "mb_" function</p>
12186
     *
12187
     * @psalm-pure
12188
     *
12189
     * @return int
12190
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
12191
     *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
12192
     *             <strong>0</strong> if they are equal
12193
     */
12194 2
    public static function substr_compare(
12195
        string $str1,
12196
        string $str2,
12197
        int $offset = 0,
12198
        int $length = null,
12199
        bool $case_insensitivity = false,
12200
        string $encoding = 'UTF-8'
12201
    ): int {
12202
        if (
12203 2
            $offset !== 0
12204
            ||
12205 2
            $length !== null
12206
        ) {
12207 2
            if ($encoding === 'UTF-8') {
12208 2
                if ($length === null) {
12209 2
                    $str1 = (string) \mb_substr($str1, $offset);
12210
                } else {
12211 2
                    $str1 = (string) \mb_substr($str1, $offset, $length);
12212
                }
12213 2
                $str2 = (string) \mb_substr($str2, 0, (int) self::strlen($str1));
12214
            } else {
12215
                $encoding = self::normalize_encoding($encoding, 'UTF-8');
12216
12217
                $str1 = (string) self::substr($str1, $offset, $length, $encoding);
12218
                $str2 = (string) self::substr($str2, 0, (int) self::strlen($str1), $encoding);
12219
            }
12220
        }
12221
12222 2
        if ($case_insensitivity) {
12223 2
            return self::strcasecmp($str1, $str2, $encoding);
12224
        }
12225
12226 2
        return self::strcmp($str1, $str2);
12227
    }
12228
12229
    /**
12230
     * Count the number of substring occurrences.
12231
     *
12232
     * EXAMPLE: <code>UTF8::substr_count('中文空白', '文空', 1, 2); // 1</code>
12233
     *
12234
     * @see http://php.net/manual/en/function.substr-count.php
12235
     *
12236
     * @param string   $haystack   <p>The string to search in.</p>
12237
     * @param string   $needle     <p>The substring to search for.</p>
12238
     * @param int      $offset     [optional] <p>The offset where to start counting.</p>
12239
     * @param int|null $length     [optional] <p>
12240
     *                             The maximum length after the specified offset to search for the
12241
     *                             substring. It outputs a warning if the offset plus the length is
12242
     *                             greater than the haystack length.
12243
     *                             </p>
12244
     * @param string   $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
12245
     * @param bool     $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
12246
     *
12247
     * @psalm-pure
12248
     *
12249
     * @return false|int
12250
     *                   <p>This functions returns an integer or false if there isn't a string.</p>
12251
     */
12252 5
    public static function substr_count(
12253
        string $haystack,
12254
        string $needle,
12255
        int $offset = 0,
12256
        int $length = null,
12257
        string $encoding = 'UTF-8',
12258
        bool $clean_utf8 = false
12259
    ) {
12260 5
        if ($needle === '') {
12261 2
            return false;
12262
        }
12263
12264 5
        if ($haystack === '') {
12265 2
            if (\PHP_VERSION_ID >= 80000) {
12266
                return 0;
12267
            }
12268
12269 2
            return 0;
12270
        }
12271
12272 5
        if ($length === 0) {
12273 2
            return 0;
12274
        }
12275
12276 5
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
12277 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
12278
        }
12279
12280 5
        if ($clean_utf8) {
12281
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
12282
            // if invalid characters are found in $haystack before $needle
12283
            $needle = self::clean($needle);
12284
            $haystack = self::clean($haystack);
12285
        }
12286
12287 5
        if ($offset || $length > 0) {
12288 2
            if ($length === null) {
12289 2
                $length_tmp = self::strlen($haystack, $encoding);
12290 2
                if ($length_tmp === false) {
12291
                    return false;
12292
                }
12293 2
                $length = (int) $length_tmp;
12294
            }
12295
12296 2
            if ($encoding === 'UTF-8') {
12297 2
                $haystack = (string) \mb_substr($haystack, $offset, $length);
12298
            } else {
12299 2
                $haystack = (string) \mb_substr($haystack, $offset, $length, $encoding);
12300
            }
12301
        }
12302
12303
        if (
12304 5
            $encoding !== 'UTF-8'
12305
            &&
12306 5
            self::$SUPPORT['mbstring'] === false
12307
        ) {
12308
            /**
12309
             * @psalm-suppress ImpureFunctionCall - is is only a warning
12310
             */
12311
            \trigger_error('UTF8::substr_count() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
12312
        }
12313
12314 5
        if (self::$SUPPORT['mbstring'] === true) {
12315 5
            if ($encoding === 'UTF-8') {
12316 5
                return \mb_substr_count($haystack, $needle);
12317
            }
12318
12319 2
            return \mb_substr_count($haystack, $needle, $encoding);
12320
        }
12321
12322
        \preg_match_all('/' . \preg_quote($needle, '/') . '/us', $haystack, $matches, \PREG_SET_ORDER);
12323
12324
        return \count($matches);
12325
    }
12326
12327
    /**
12328
     * Count the number of substring occurrences.
12329
     *
12330
     * @param string   $haystack <p>
12331
     *                           The string being checked.
12332
     *                           </p>
12333
     * @param string   $needle   <p>
12334
     *                           The string being found.
12335
     *                           </p>
12336
     * @param int      $offset   [optional] <p>
12337
     *                           The offset where to start counting
12338
     *                           </p>
12339
     * @param int|null $length   [optional] <p>
12340
     *                           The maximum length after the specified offset to search for the
12341
     *                           substring. It outputs a warning if the offset plus the length is
12342
     *                           greater than the haystack length.
12343
     *                           </p>
12344
     *
12345
     * @psalm-pure
12346
     *
12347
     * @return false|int
12348
     *                   <p>The number of times the
12349
     *                   needle substring occurs in the
12350
     *                   haystack string.</p>
12351
     */
12352 4
    public static function substr_count_in_byte(
12353
        string $haystack,
12354
        string $needle,
12355
        int $offset = 0,
12356
        int $length = null
12357
    ) {
12358 4
        if ($haystack === '' || $needle === '') {
12359 1
            return 0;
12360
        }
12361
12362
        if (
12363 3
            ($offset || $length !== null)
12364
            &&
12365 3
            self::$SUPPORT['mbstring_func_overload'] === true
12366
        ) {
12367
            if ($length === null) {
12368
                $length_tmp = self::strlen($haystack);
12369
                if ($length_tmp === false) {
12370
                    return false;
12371
                }
12372
                $length = (int) $length_tmp;
12373
            }
12374
12375
            if (
12376
                (
12377
                    $length !== 0
12378
                    &&
12379
                    $offset !== 0
12380
                )
12381
                &&
12382
                ($length + $offset) <= 0
12383
                &&
12384
                \PHP_VERSION_ID < 71000 // output from "substr_count()" have changed in PHP 7.1
12385
            ) {
12386
                return false;
12387
            }
12388
12389
            /** @var false|string $haystack_tmp - needed for PhpStan (stubs error) */
12390
            $haystack_tmp = \substr($haystack, $offset, $length);
12391
            if ($haystack_tmp === false) {
12392
                $haystack_tmp = '';
12393
            }
12394
            $haystack = (string) $haystack_tmp;
12395
        }
12396
12397 3
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
12398
            // "mb_" is available if overload is used, so use it ...
12399
            return \mb_substr_count($haystack, $needle, 'CP850'); // 8-BIT
12400
        }
12401
12402 3
        if ($length === null) {
12403 3
            return \substr_count($haystack, $needle, $offset);
12404
        }
12405
12406
        return \substr_count($haystack, $needle, $offset, $length);
12407
    }
12408
12409
    /**
12410
     * Returns the number of occurrences of $substring in the given string.
12411
     * By default, the comparison is case-sensitive, but can be made insensitive
12412
     * by setting $case_sensitive to false.
12413
     *
12414
     * @param string $str            <p>The input string.</p>
12415
     * @param string $substring      <p>The substring to search for.</p>
12416
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
12417
     * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
12418
     *
12419
     * @psalm-pure
12420
     *
12421
     * @return int
12422
     */
12423 15
    public static function substr_count_simple(
12424
        string $str,
12425
        string $substring,
12426
        bool $case_sensitive = true,
12427
        string $encoding = 'UTF-8'
12428
    ): int {
12429 15
        if ($str === '' || $substring === '') {
12430 2
            return 0;
12431
        }
12432
12433 13
        if ($encoding === 'UTF-8') {
12434 7
            if ($case_sensitive) {
12435
                return (int) \mb_substr_count($str, $substring);
12436
            }
12437
12438 7
            return (int) \mb_substr_count(
12439 7
                \mb_strtoupper($str),
12440 7
                \mb_strtoupper($substring)
12441
            );
12442
        }
12443
12444 6
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
12445
12446 6
        if ($case_sensitive) {
12447 3
            return (int) \mb_substr_count($str, $substring, $encoding);
12448
        }
12449
12450 3
        return (int) \mb_substr_count(
12451 3
            self::strtocasefold($str, true, false, $encoding, null, false),
12452 3
            self::strtocasefold($substring, true, false, $encoding, null, false),
12453 3
            $encoding
12454
        );
12455
    }
12456
12457
    /**
12458
     * Removes a prefix ($needle) from the beginning of the string ($haystack), case-insensitive.
12459
     *
12460
     * EXMAPLE: <code>
12461
     * UTF8::substr_ileft('ΚόσμεMiddleEnd', 'Κόσμε'); // 'MiddleEnd'
12462
     * UTF8::substr_ileft('ΚόσμεMiddleEnd', 'κόσμε'); // 'MiddleEnd'
12463
     * </code>
12464
     *
12465
     * @param string $haystack <p>The string to search in.</p>
12466
     * @param string $needle   <p>The substring to search for.</p>
12467
     *
12468
     * @psalm-pure
12469
     *
12470
     * @return string
12471
     *                <p>Return the sub-string.</p>
12472
     */
12473 2
    public static function substr_ileft(string $haystack, string $needle): string
12474
    {
12475 2
        if ($haystack === '') {
12476 2
            return '';
12477
        }
12478
12479 2
        if ($needle === '') {
12480 2
            return $haystack;
12481
        }
12482
12483 2
        if (self::str_istarts_with($haystack, $needle)) {
12484 2
            $haystack = (string) \mb_substr($haystack, (int) self::strlen($needle));
12485
        }
12486
12487 2
        return $haystack;
12488
    }
12489
12490
    /**
12491
     * Get part of a string process in bytes.
12492
     *
12493
     * @param string   $str    <p>The string being checked.</p>
12494
     * @param int      $offset <p>The first position used in str.</p>
12495
     * @param int|null $length [optional] <p>The maximum length of the returned string.</p>
12496
     *
12497
     * @psalm-pure
12498
     *
12499
     * @return false|string
12500
     *                      The portion of <i>str</i> specified by the <i>offset</i> and
12501
     *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
12502
     *                      characters long, <b>FALSE</b> will be returned.
12503
     */
12504 1
    public static function substr_in_byte(string $str, int $offset = 0, int $length = null)
12505
    {
12506
        // empty string
12507 1
        if ($str === '' || $length === 0) {
12508
            return '';
12509
        }
12510
12511
        // whole string
12512 1
        if (!$offset && $length === null) {
12513
            return $str;
12514
        }
12515
12516 1
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
12517
            // "mb_" is available if overload is used, so use it ...
12518
            return \mb_substr($str, $offset, $length, 'CP850'); // 8-BIT
12519
        }
12520
12521 1
        return \substr($str, $offset, $length ?? 2147483647);
12522
    }
12523
12524
    /**
12525
     * Removes a suffix ($needle) from the end of the string ($haystack), case-insensitive.
12526
     *
12527
     * EXAMPLE: <code>
12528
     * UTF8::substr_iright('BeginMiddleΚόσμε', 'Κόσμε'); // 'BeginMiddle'
12529
     * UTF8::substr_iright('BeginMiddleΚόσμε', 'κόσμε'); // 'BeginMiddle'
12530
     * </code>
12531
     *
12532
     * @param string $haystack <p>The string to search in.</p>
12533
     * @param string $needle   <p>The substring to search for.</p>
12534
     *
12535
     * @psalm-pure
12536
     *
12537
     * @return string
12538
     *                <p>Return the sub-string.<p>
12539
     */
12540 2
    public static function substr_iright(string $haystack, string $needle): string
12541
    {
12542 2
        if ($haystack === '') {
12543 2
            return '';
12544
        }
12545
12546 2
        if ($needle === '') {
12547 2
            return $haystack;
12548
        }
12549
12550 2
        if (self::str_iends_with($haystack, $needle)) {
12551 2
            $haystack = (string) \mb_substr($haystack, 0, (int) self::strlen($haystack) - (int) self::strlen($needle));
12552
        }
12553
12554 2
        return $haystack;
12555
    }
12556
12557
    /**
12558
     * Removes a prefix ($needle) from the beginning of the string ($haystack).
12559
     *
12560
     * EXAMPLE: <code>
12561
     * UTF8::substr_left('ΚόσμεMiddleEnd', 'Κόσμε'); // 'MiddleEnd'
12562
     * UTF8::substr_left('ΚόσμεMiddleEnd', 'κόσμε'); // 'ΚόσμεMiddleEnd'
12563
     * </code>
12564
     *
12565
     * @param string $haystack <p>The string to search in.</p>
12566
     * @param string $needle   <p>The substring to search for.</p>
12567
     *
12568
     * @psalm-pure
12569
     *
12570
     * @return string
12571
     *                <p>Return the sub-string.</p>
12572
     */
12573 2
    public static function substr_left(string $haystack, string $needle): string
12574
    {
12575 2
        if ($haystack === '') {
12576 2
            return '';
12577
        }
12578
12579 2
        if ($needle === '') {
12580 2
            return $haystack;
12581
        }
12582
12583 2
        if (self::str_starts_with($haystack, $needle)) {
12584 2
            $haystack = (string) \mb_substr($haystack, (int) self::strlen($needle));
12585
        }
12586
12587 2
        return $haystack;
12588
    }
12589
12590
    /**
12591
     * Replace text within a portion of a string.
12592
     *
12593
     * EXAMPLE: <code>UTF8::substr_replace(array('Iñtërnâtiônàlizætiøn', 'foo'), 'æ', 1); // array('Iæñtërnâtiônàlizætiøn', 'fæoo')</code>
12594
     *
12595
     * source: https://gist.github.com/stemar/8287074
12596
     *
12597
     * @param string|string[] $str         <p>The input string or an array of stings.</p>
12598
     * @param string|string[] $replacement <p>The replacement string or an array of stings.</p>
12599
     * @param int|int[]       $offset      <p>
12600
     *                                     If start is positive, the replacing will begin at the start'th offset
12601
     *                                     into string.
12602
     *                                     <br><br>
12603
     *                                     If start is negative, the replacing will begin at the start'th character
12604
     *                                     from the end of string.
12605
     *                                     </p>
12606
     * @param int|int[]|null  $length      [optional] <p>If given and is positive, it represents the length of the
12607
     *                                     portion of string which is to be replaced. If it is negative, it
12608
     *                                     represents the number of characters from the end of string at which to
12609
     *                                     stop replacing. If it is not given, then it will default to strlen(
12610
     *                                     string ); i.e. end the replacing at the end of string. Of course, if
12611
     *                                     length is zero then this function will have the effect of inserting
12612
     *                                     replacement into string at the given start offset.</p>
12613
     * @param string          $encoding    [optional] <p>Set the charset for e.g. "mb_" function</p>
12614
     *
12615
     * @psalm-pure
12616
     *
12617
     * @return string|string[]
12618
     *                         <p>The result string is returned. If string is an array then array is returned.</p>
12619
     */
12620 10
    public static function substr_replace(
12621
        $str,
12622
        $replacement,
12623
        $offset,
12624
        $length = null,
12625
        string $encoding = 'UTF-8'
12626
    ) {
12627 10
        if (\is_array($str)) {
12628 1
            $num = \count($str);
12629
12630
            // the replacement
12631 1
            if (\is_array($replacement)) {
12632 1
                $replacement = \array_slice($replacement, 0, $num);
12633
            } else {
12634 1
                $replacement = \array_pad([$replacement], $num, $replacement);
12635
            }
12636
12637
            // the offset
12638 1
            if (\is_array($offset)) {
12639 1
                $offset = \array_slice($offset, 0, $num);
12640 1
                foreach ($offset as &$value_tmp) {
12641 1
                    $value_tmp = (int) $value_tmp === $value_tmp ? $value_tmp : 0;
12642
                }
12643 1
                unset($value_tmp);
12644
            } else {
12645 1
                $offset = \array_pad([$offset], $num, $offset);
12646
            }
12647
12648
            // the length
12649 1
            if ($length === null) {
12650 1
                $length = \array_fill(0, $num, 0);
12651 1
            } elseif (\is_array($length)) {
12652 1
                $length = \array_slice($length, 0, $num);
12653 1
                foreach ($length as &$value_tmp_V2) {
12654 1
                    $value_tmp_V2 = (int) $value_tmp_V2 === $value_tmp_V2 ? $value_tmp_V2 : $num;
12655
                }
12656 1
                unset($value_tmp_V2);
12657
            } else {
12658 1
                $length = \array_pad([$length], $num, $length);
12659
            }
12660
12661
            // recursive call
12662 1
            return \array_map([self::class, 'substr_replace'], $str, $replacement, $offset, $length);
12663
        }
12664
12665 10
        if (\is_array($replacement)) {
12666 1
            if ($replacement !== []) {
12667 1
                $replacement = $replacement[0];
12668
            } else {
12669 1
                $replacement = '';
12670
            }
12671
        }
12672
12673
        // init
12674 10
        $str = (string) $str;
12675 10
        $replacement = (string) $replacement;
12676
12677 10
        if (\is_array($length)) {
12678
            throw new \InvalidArgumentException('Parameter "$length" can only be an array, if "$str" is also an array.');
12679
        }
12680
12681 10
        if (\is_array($offset)) {
12682
            throw new \InvalidArgumentException('Parameter "$offset" can only be an array, if "$str" is also an array.');
12683
        }
12684
12685 10
        if ($str === '') {
12686 1
            return $replacement;
12687
        }
12688
12689 9
        if (self::$SUPPORT['mbstring'] === true) {
12690 9
            $string_length = (int) self::strlen($str, $encoding);
12691
12692 9
            if ($offset < 0) {
12693 1
                $offset = (int) \max(0, $string_length + $offset);
12694 9
            } elseif ($offset > $string_length) {
12695 1
                $offset = $string_length;
12696
            }
12697
12698 9
            if ($length !== null && $length < 0) {
12699 1
                $length = (int) \max(0, $string_length - $offset + $length);
12700 9
            } elseif ($length === null || $length > $string_length) {
12701 4
                $length = $string_length;
12702
            }
12703
12704
            /** @noinspection AdditionOperationOnArraysInspection */
12705 9
            if (($offset + $length) > $string_length) {
12706 4
                $length = $string_length - $offset;
12707
            }
12708
12709
            /** @noinspection AdditionOperationOnArraysInspection */
12710 9
            return ((string) \mb_substr($str, 0, $offset, $encoding)) .
12711 9
                   $replacement .
12712 9
                   ((string) \mb_substr($str, $offset + $length, $string_length - $offset - $length, $encoding));
12713
        }
12714
12715
        //
12716
        // fallback for ascii only
12717
        //
12718
12719
        if (ASCII::is_ascii($str)) {
12720
            return ($length === null) ?
12721
                \substr_replace($str, $replacement, $offset) :
12722
                \substr_replace($str, $replacement, $offset, $length);
12723
        }
12724
12725
        //
12726
        // fallback via vanilla php
12727
        //
12728
12729
        \preg_match_all('/./us', $str, $str_matches);
12730
        \preg_match_all('/./us', $replacement, $replacement_matches);
12731
12732
        if ($length === null) {
12733
            $length_tmp = self::strlen($str, $encoding);
12734
            if ($length_tmp === false) {
12735
                // e.g.: non mbstring support + invalid chars
12736
                return '';
12737
            }
12738
            $length = (int) $length_tmp;
12739
        }
12740
12741
        \array_splice($str_matches[0], $offset, $length, $replacement_matches[0]);
12742
12743
        return \implode('', $str_matches[0]);
12744
    }
12745
12746
    /**
12747
     * Removes a suffix ($needle) from the end of the string ($haystack).
12748
     *
12749
     * EXAMPLE: <code>
12750
     * UTF8::substr_right('BeginMiddleΚόσμε', 'Κόσμε'); // 'BeginMiddle'
12751
     * UTF8::substr_right('BeginMiddleΚόσμε', 'κόσμε'); // 'BeginMiddleΚόσμε'
12752
     * </code>
12753
     *
12754
     * @param string $haystack <p>The string to search in.</p>
12755
     * @param string $needle   <p>The substring to search for.</p>
12756
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
12757
     *
12758
     * @psalm-pure
12759
     *
12760
     * @return string
12761
     *                <p>Return the sub-string.</p>
12762
     */
12763 2
    public static function substr_right(
12764
        string $haystack,
12765
        string $needle,
12766
        string $encoding = 'UTF-8'
12767
    ): string {
12768 2
        if ($haystack === '') {
12769 2
            return '';
12770
        }
12771
12772 2
        if ($needle === '') {
12773 2
            return $haystack;
12774
        }
12775
12776
        if (
12777 2
            $encoding === 'UTF-8'
12778
            &&
12779 2
            \substr($haystack, -\strlen($needle)) === $needle
12780
        ) {
12781 2
            return (string) \mb_substr($haystack, 0, (int) \mb_strlen($haystack) - (int) \mb_strlen($needle));
12782
        }
12783
12784 2
        if (\substr($haystack, -\strlen($needle)) === $needle) {
12785
            return (string) self::substr(
12786
                $haystack,
12787
                0,
12788
                (int) self::strlen($haystack, $encoding) - (int) self::strlen($needle, $encoding),
12789
                $encoding
12790
            );
12791
        }
12792
12793 2
        return $haystack;
12794
    }
12795
12796
    /**
12797
     * Returns a case swapped version of the string.
12798
     *
12799
     * EXAMPLE: <code>UTF8::swapCase('déJÀ σσς iıII'); // 'DÉjà ΣΣΣ IIii'</code>
12800
     *
12801
     * @param string $str        <p>The input string.</p>
12802
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
12803
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
12804
     *
12805
     * @psalm-pure
12806
     *
12807
     * @return string
12808
     *                <p>Each character's case swapped.</p>
12809
     */
12810 6
    public static function swapCase(string $str, string $encoding = 'UTF-8', bool $clean_utf8 = false): string
12811
    {
12812 6
        if ($str === '') {
12813 1
            return '';
12814
        }
12815
12816 6
        if ($clean_utf8) {
12817
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
12818
            // if invalid characters are found in $haystack before $needle
12819 2
            $str = self::clean($str);
12820
        }
12821
12822 6
        if ($encoding === 'UTF-8') {
12823 4
            return (string) (\mb_strtolower($str) ^ \mb_strtoupper($str) ^ $str);
12824
        }
12825
12826 4
        return (string) (self::strtolower($str, $encoding) ^ self::strtoupper($str, $encoding) ^ $str);
12827
    }
12828
12829
    /**
12830
     * Checks whether symfony-polyfills are used.
12831
     *
12832
     * @psalm-pure
12833
     *
12834
     * @return bool
12835
     *              <p><strong>true</strong> if in use, <strong>false</strong> otherwise</p>
12836
     *
12837
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
12838
     */
12839
    public static function symfony_polyfill_used(): bool
12840
    {
12841
        // init
12842
        $return = false;
12843
12844
        $return_tmp = \extension_loaded('mbstring');
12845
        if (!$return_tmp && \function_exists('mb_strlen')) {
12846
            $return = true;
12847
        }
12848
12849
        $return_tmp = \extension_loaded('iconv');
12850
        if (!$return_tmp && \function_exists('iconv')) {
12851
            $return = true;
12852
        }
12853
12854
        return $return;
12855
    }
12856
12857
    /**
12858
     * @param string $str
12859
     * @param int    $tab_length
12860
     *
12861
     * @psalm-pure
12862
     *
12863
     * @return string
12864
     */
12865 6
    public static function tabs_to_spaces(string $str, int $tab_length = 4): string
12866
    {
12867 6
        if ($tab_length === 4) {
12868 3
            $spaces = '    ';
12869 3
        } elseif ($tab_length === 2) {
12870 1
            $spaces = '  ';
12871
        } else {
12872 2
            $spaces = \str_repeat(' ', $tab_length);
12873
        }
12874
12875 6
        return \str_replace("\t", $spaces, $str);
12876
    }
12877
12878
    /**
12879
     * Converts the first character of each word in the string to uppercase
12880
     * and all other chars to lowercase.
12881
     *
12882
     * @param string      $str                           <p>The input string.</p>
12883
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
12884
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
12885
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
12886
     *                                                   tr</p>
12887
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
12888
     *                                                   -> ß</p>
12889
     *
12890
     * @psalm-pure
12891
     *
12892
     * @return string
12893
     *                <p>A string with all characters of $str being title-cased.</p>
12894
     */
12895 5
    public static function titlecase(
12896
        string $str,
12897
        string $encoding = 'UTF-8',
12898
        bool $clean_utf8 = false,
12899
        string $lang = null,
12900
        bool $try_to_keep_the_string_length = false
12901
    ): string {
12902 5
        if ($clean_utf8) {
12903
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
12904
            // if invalid characters are found in $haystack before $needle
12905
            $str = self::clean($str);
12906
        }
12907
12908
        if (
12909 5
            $lang === null
12910
            &&
12911 5
            !$try_to_keep_the_string_length
12912
        ) {
12913 5
            if ($encoding === 'UTF-8') {
12914 3
                return \mb_convert_case($str, \MB_CASE_TITLE);
12915
            }
12916
12917 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
12918
12919 2
            return \mb_convert_case($str, \MB_CASE_TITLE, $encoding);
12920
        }
12921
12922
        return self::str_titleize(
12923
            $str,
12924
            null,
12925
            $encoding,
12926
            false,
12927
            $lang,
12928
            $try_to_keep_the_string_length,
12929
            false
12930
        );
12931
    }
12932
12933
    /**
12934
     * alias for "UTF8::to_ascii()"
12935
     *
12936
     * @param string $str
12937
     * @param string $subst_chr
12938
     * @param bool   $strict
12939
     *
12940
     * @psalm-pure
12941
     *
12942
     * @return string
12943
     *
12944
     * @see        UTF8::to_ascii()
12945
     * @deprecated <p>please use "UTF8::to_ascii()"</p>
12946
     */
12947 7
    public static function toAscii(
12948
        string $str,
12949
        string $subst_chr = '?',
12950
        bool $strict = false
12951
    ): string {
12952 7
        return self::to_ascii($str, $subst_chr, $strict);
12953
    }
12954
12955
    /**
12956
     * alias for "UTF8::to_iso8859()"
12957
     *
12958
     * @param string|string[] $str
12959
     *
12960
     * @psalm-pure
12961
     *
12962
     * @return string|string[]
12963
     *
12964
     * @see        UTF8::to_iso8859()
12965
     * @deprecated <p>please use "UTF8::to_iso8859()"</p>
12966
     */
12967 2
    public static function toIso8859($str)
12968
    {
12969 2
        return self::to_iso8859($str);
12970
    }
12971
12972
    /**
12973
     * alias for "UTF8::to_latin1()"
12974
     *
12975
     * @param string|string[] $str
12976
     *
12977
     * @psalm-pure
12978
     *
12979
     * @return string|string[]
12980
     *
12981
     * @see        UTF8::to_iso8859()
12982
     * @deprecated <p>please use "UTF8::to_iso8859()"</p>
12983
     */
12984 2
    public static function toLatin1($str)
12985
    {
12986 2
        return self::to_iso8859($str);
12987
    }
12988
12989
    /**
12990
     * alias for "UTF8::to_utf8()"
12991
     *
12992
     * @param string|string[] $str
12993
     *
12994
     * @psalm-pure
12995
     *
12996
     * @return string|string[]
12997
     *
12998
     * @see        UTF8::to_utf8()
12999
     * @deprecated <p>please use "UTF8::to_utf8()"</p>
13000
     */
13001 2
    public static function toUTF8($str)
13002
    {
13003 2
        return self::to_utf8($str);
13004
    }
13005
13006
    /**
13007
     * Convert a string into ASCII.
13008
     *
13009
     * EXAMPLE: <code>UTF8::to_ascii('déjà σσς iıii'); // 'deja sss iiii'</code>
13010
     *
13011
     * @param string $str     <p>The input string.</p>
13012
     * @param string $unknown [optional] <p>Character use if character unknown. (default is ?)</p>
13013
     * @param bool   $strict  [optional] <p>Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad
13014
     *                        performance</p>
13015
     *
13016
     * @psalm-pure
13017
     *
13018
     * @return string
13019
     */
13020 37
    public static function to_ascii(
13021
        string $str,
13022
        string $unknown = '?',
13023
        bool $strict = false
13024
    ): string {
13025 37
        return ASCII::to_transliterate($str, $unknown, $strict);
13026
    }
13027
13028
    /**
13029
     * @param bool|int|string $str
13030
     *
13031
     * @phpstan-param bool|int|numeric-string $str
13032
     *
13033
     * @psalm-pure
13034
     *
13035
     * @return bool
13036
     */
13037 19
    public static function to_boolean($str): bool
13038
    {
13039
        // init
13040 19
        $str = (string) $str;
13041
13042 19
        if ($str === '') {
13043 2
            return false;
13044
        }
13045
13046
        // Info: http://php.net/manual/en/filter.filters.validate.php
13047
        $map = [
13048 17
            'true'  => true,
13049
            '1'     => true,
13050
            'on'    => true,
13051
            'yes'   => true,
13052
            'false' => false,
13053
            '0'     => false,
13054
            'off'   => false,
13055
            'no'    => false,
13056
        ];
13057
13058 17
        if (isset($map[$str])) {
13059 11
            return $map[$str];
13060
        }
13061
13062 6
        $key = \strtolower($str);
13063 6
        if (isset($map[$key])) {
13064 2
            return $map[$key];
13065
        }
13066
13067 4
        if (\is_numeric($str)) {
13068 2
            return ((float) $str + 0) > 0;
13069
        }
13070
13071 2
        return (bool) \trim($str);
13072
    }
13073
13074
    /**
13075
     * Convert given string to safe filename (and keep string case).
13076
     *
13077
     * @param string $str
13078
     * @param bool   $use_transliterate No transliteration, conversion etc. is done by default - unsafe characters are
13079
     *                                  simply replaced with hyphen.
13080
     * @param string $fallback_char
13081
     *
13082
     * @psalm-pure
13083
     *
13084
     * @return string
13085
     */
13086 1
    public static function to_filename(
13087
        string $str,
13088
        bool $use_transliterate = false,
13089
        string $fallback_char = '-'
13090
    ): string {
13091 1
        return ASCII::to_filename(
13092 1
            $str,
13093 1
            $use_transliterate,
13094 1
            $fallback_char
13095
        );
13096
    }
13097
13098
    /**
13099
     * Convert a string into "ISO-8859"-encoding (Latin-1).
13100
     *
13101
     * EXAMPLE: <code>UTF8::to_utf8(UTF8::to_iso8859('  -ABC-中文空白-  ')); // '  -ABC-????-  '</code>
13102
     *
13103
     * @param string|string[] $str
13104
     *
13105
     * @psalm-pure
13106
     *
13107
     * @return string|string[]
13108
     */
13109 8
    public static function to_iso8859($str)
13110
    {
13111 8
        if (\is_array($str)) {
13112 2
            foreach ($str as $k => &$v) {
13113 2
                $v = self::to_iso8859($v);
13114
            }
13115
13116 2
            return $str;
13117
        }
13118
13119 8
        $str = (string) $str;
13120 8
        if ($str === '') {
13121 2
            return '';
13122
        }
13123
13124 8
        return self::utf8_decode($str);
13125
    }
13126
13127
    /**
13128
     * alias for "UTF8::to_iso8859()"
13129
     *
13130
     * @param string|string[] $str
13131
     *
13132
     * @psalm-pure
13133
     *
13134
     * @return string|string[]
13135
     *
13136
     * @see        UTF8::to_iso8859()
13137
     * @deprecated <p>please use "UTF8::to_iso8859()"</p>
13138
     */
13139 2
    public static function to_latin1($str)
13140
    {
13141 2
        return self::to_iso8859($str);
13142
    }
13143
13144
    /**
13145
     * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
13146
     *
13147
     * <ul>
13148
     * <li>It decode UTF-8 codepoints and Unicode escape sequences.</li>
13149
     * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li>
13150
     * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
13151
     * case.</li>
13152
     * </ul>
13153
     *
13154
     * EXAMPLE: <code>UTF8::to_utf8(["\u0063\u0061\u0074"]); // array('cat')</code>
13155
     *
13156
     * @param string|string[] $str                        <p>Any string or array of strings.</p>
13157
     * @param bool            $decode_html_entity_to_utf8 <p>Set to true, if you need to decode html-entities.</p>
13158
     *
13159
     * @psalm-pure
13160
     *
13161
     * @return string|string[]
13162
     *                         <p>The UTF-8 encoded string</p>
13163
     *
13164
     * @template TToUtf8
13165
     * @phpstan-param TToUtf8 $str
13166
     * @phpstan-return TToUtf8
13167
     *
13168
     * @noinspection SuspiciousBinaryOperationInspection
13169
     */
13170 44
    public static function to_utf8($str, bool $decode_html_entity_to_utf8 = false)
13171
    {
13172 44
        if (\is_array($str)) {
13173 4
            foreach ($str as $k => &$v) {
13174 4
                $v = self::to_utf8_string($v, $decode_html_entity_to_utf8);
13175
            }
13176
13177 4
            return $str;
13178
        }
13179
13180
        /** @phpstan-var TToUtf8 $str */
13181 44
        $str = self::to_utf8_string($str, $decode_html_entity_to_utf8);
13182
13183 44
        return $str;
13184
    }
13185
13186
    /**
13187
     * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
13188
     *
13189
     * <ul>
13190
     * <li>It decode UTF-8 codepoints and Unicode escape sequences.</li>
13191
     * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li>
13192
     * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
13193
     * case.</li>
13194
     * </ul>
13195
     *
13196
     * EXAMPLE: <code>UTF8::to_utf8_string("\u0063\u0061\u0074"); // 'cat'</code>
13197
     *
13198
     * @param string $str                        <p>Any string.</p>
13199
     * @param bool   $decode_html_entity_to_utf8 <p>Set to true, if you need to decode html-entities.</p>
13200
     *
13201
     * @psalm-pure
13202
     *
13203
     * @return string
13204
     *                <p>The UTF-8 encoded string</p>
13205
     *
13206
     * @noinspection SuspiciousBinaryOperationInspection
13207
     */
13208 44
    public static function to_utf8_string(string $str, bool $decode_html_entity_to_utf8 = false): string
13209
    {
13210 44
        if ($str === '') {
13211 7
            return $str;
13212
        }
13213
13214 44
        $max = \strlen($str);
13215 44
        $buf = '';
13216
13217 44
        for ($i = 0; $i < $max; ++$i) {
13218 44
            $c1 = $str[$i];
13219
13220 44
            if ($c1 >= "\xC0") { // should be converted to UTF8, if it's not UTF8 already
13221
13222 40
                if ($c1 <= "\xDF") { // looks like 2 bytes UTF8
13223
13224 36
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
13225
13226 36
                    if ($c2 >= "\x80" && $c2 <= "\xBF") { // yeah, almost sure it's UTF8 already
13227 22
                        $buf .= $c1 . $c2;
13228 22
                        ++$i;
13229
                    } else { // not valid UTF8 - convert it
13230 36
                        $buf .= self::to_utf8_convert_helper($c1);
13231
                    }
13232 37
                } elseif ($c1 >= "\xE0" && $c1 <= "\xEF") { // looks like 3 bytes UTF8
13233
13234 36
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
13235 36
                    $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
13236
13237 36
                    if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF") { // yeah, almost sure it's UTF8 already
13238 17
                        $buf .= $c1 . $c2 . $c3;
13239 17
                        $i += 2;
13240
                    } else { // not valid UTF8 - convert it
13241 36
                        $buf .= self::to_utf8_convert_helper($c1);
13242
                    }
13243 28
                } elseif ($c1 >= "\xF0" && $c1 <= "\xF7") { // looks like 4 bytes UTF8
13244
13245 28
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
13246 28
                    $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
13247 28
                    $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
13248
13249 28
                    if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF" && $c4 >= "\x80" && $c4 <= "\xBF") { // yeah, almost sure it's UTF8 already
13250 10
                        $buf .= $c1 . $c2 . $c3 . $c4;
13251 10
                        $i += 3;
13252
                    } else { // not valid UTF8 - convert it
13253 28
                        $buf .= self::to_utf8_convert_helper($c1);
13254
                    }
13255
                } else { // doesn't look like UTF8, but should be converted
13256
13257 40
                    $buf .= self::to_utf8_convert_helper($c1);
13258
                }
13259 41
            } elseif (($c1 & "\xC0") === "\x80") { // needs conversion
13260
13261 4
                $buf .= self::to_utf8_convert_helper($c1);
13262
            } else { // it doesn't need conversion
13263
13264 41
                $buf .= $c1;
13265
            }
13266
        }
13267
13268
        // decode unicode escape sequences + unicode surrogate pairs
13269 44
        $buf = \preg_replace_callback(
13270 44
            '/\\\\u([dD][89abAB][0-9a-fA-F]{2})\\\\u([dD][cdefCDEF][\da-fA-F]{2})|\\\\u([0-9a-fA-F]{4})/',
13271
            /**
13272
             * @param array $matches
13273
             *
13274
             * @psalm-pure
13275
             *
13276
             * @return string
13277
             */
13278
            static function (array $matches): string {
13279 13
                if (isset($matches[3])) {
13280 13
                    $cp = (int) \hexdec($matches[3]);
13281
                } else {
13282
                    // http://unicode.org/faq/utf_bom.html#utf16-4
13283 1
                    $cp = ((int) \hexdec($matches[1]) << 10)
13284 1
                          + (int) \hexdec($matches[2])
13285 1
                          + 0x10000
13286 1
                          - (0xD800 << 10)
13287 1
                          - 0xDC00;
13288
                }
13289
13290
                // https://github.com/php/php-src/blob/php-7.3.2/ext/standard/html.c#L471
13291
                //
13292
                // php_utf32_utf8(unsigned char *buf, unsigned k)
13293
13294 13
                if ($cp < 0x80) {
13295 8
                    return (string) self::chr($cp);
13296
                }
13297
13298 10
                if ($cp < 0xA0) {
13299
                    /** @noinspection UnnecessaryCastingInspection */
13300
                    return (string) self::chr(0xC0 | $cp >> 6) . (string) self::chr(0x80 | $cp & 0x3F);
13301
                }
13302
13303 10
                return self::decimal_to_chr($cp);
13304 44
            },
13305 44
            $buf
13306
        );
13307
13308 44
        if ($buf === null) {
13309
            return '';
13310
        }
13311
13312
        // decode UTF-8 codepoints
13313 44
        if ($decode_html_entity_to_utf8) {
13314 3
            $buf = self::html_entity_decode($buf);
13315
        }
13316
13317 44
        return $buf;
13318
    }
13319
13320
    /**
13321
     * Returns the given string as an integer, or null if the string isn't numeric.
13322
     *
13323
     * @param string $str
13324
     *
13325
     * @psalm-pure
13326
     *
13327
     * @return int|null
13328
     *                  <p>null if the string isn't numeric</p>
13329
     */
13330 1
    public static function to_int(string $str)
13331
    {
13332 1
        if (\is_numeric($str)) {
13333 1
            return (int) $str;
13334
        }
13335
13336 1
        return null;
13337
    }
13338
13339
    /**
13340
     * Returns the given input as string, or null if the input isn't int|float|string
13341
     * and do not implement the "__toString()" method.
13342
     *
13343
     * @param float|int|object|string|null $input
13344
     *
13345
     * @psalm-pure
13346
     *
13347
     * @return string|null
13348
     *                     <p>null if the input isn't int|float|string and has no "__toString()" method</p>
13349
     */
13350 1
    public static function to_string($input)
13351
    {
13352 1
        if ($input === null) {
13353
            return null;
13354
        }
13355
13356
        /** @var string $input_type - hack for psalm */
13357 1
        $input_type = \gettype($input);
13358
13359
        if (
13360 1
            $input_type === 'string'
13361
            ||
13362 1
            $input_type === 'integer'
13363
            ||
13364 1
            $input_type === 'float'
13365
            ||
13366 1
            $input_type === 'double'
13367
        ) {
13368 1
            return (string) $input;
13369
        }
13370
13371 1
        if ($input_type === 'object') {
13372
            /** @noinspection PhpSillyAssignmentInspection */
13373
            /** @var object $input - hack for psalm / phpstan */
13374 1
            $input = $input;
13375
            /** @noinspection NestedPositiveIfStatementsInspection */
13376
            /** @noinspection MissingOrEmptyGroupStatementInspection */
13377 1
            if (\method_exists($input, '__toString')) {
13378 1
                return (string) $input;
13379
            }
13380
        }
13381
13382 1
        return null;
13383
    }
13384
13385
    /**
13386
     * Strip whitespace or other characters from the beginning and end of a UTF-8 string.
13387
     *
13388
     * INFO: This is slower then "trim()"
13389
     *
13390
     * We can only use the original-function, if we use <= 7-Bit in the string / chars
13391
     * but the check for ASCII (7-Bit) cost more time, then we can safe here.
13392
     *
13393
     * EXAMPLE: <code>UTF8::trim('   -ABC-中文空白-  '); // '-ABC-中文空白-'</code>
13394
     *
13395
     * @param string      $str   <p>The string to be trimmed</p>
13396
     * @param string|null $chars [optional] <p>Optional characters to be stripped</p>
13397
     *
13398
     * @psalm-pure
13399
     *
13400
     * @return string
13401
     *                <p>The trimmed string.</p>
13402
     */
13403 57
    public static function trim(string $str = '', string $chars = null): string
13404
    {
13405 57
        if ($str === '') {
13406 9
            return '';
13407
        }
13408
13409 50
        if (self::$SUPPORT['mbstring'] === true) {
13410 50
            if ($chars !== null) {
13411
                /** @noinspection PregQuoteUsageInspection */
13412 28
                $chars = \preg_quote($chars);
13413 28
                $pattern = "^[${chars}]+|[${chars}]+\$";
13414
            } else {
13415 22
                $pattern = '^[\\s]+|[\\s]+$';
13416
            }
13417
13418
            /** @noinspection PhpComposerExtensionStubsInspection */
13419 50
            return (string) \mb_ereg_replace($pattern, '', $str);
13420
        }
13421
13422 8
        if ($chars !== null) {
13423
            $chars = \preg_quote($chars, '/');
13424
            $pattern = "^[${chars}]+|[${chars}]+\$";
13425
        } else {
13426 8
            $pattern = '^[\\s]+|[\\s]+$';
13427
        }
13428
13429 8
        return self::regex_replace($str, $pattern, '');
13430
    }
13431
13432
    /**
13433
     * Makes string's first char uppercase.
13434
     *
13435
     * EXAMPLE: <code>UTF8::ucfirst('ñtërnâtiônàlizætiøn foo'); // 'Ñtërnâtiônàlizætiøn foo'</code>
13436
     *
13437
     * @param string      $str                           <p>The input string.</p>
13438
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
13439
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
13440
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
13441
     *                                                   tr</p>
13442
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
13443
     *                                                   -> ß</p>
13444
     *
13445
     * @psalm-pure
13446
     *
13447
     * @return string
13448
     *                <p>The resulting string with with char uppercase.</p>
13449
     */
13450 69
    public static function ucfirst(
13451
        string $str,
13452
        string $encoding = 'UTF-8',
13453
        bool $clean_utf8 = false,
13454
        string $lang = null,
13455
        bool $try_to_keep_the_string_length = false
13456
    ): string {
13457 69
        if ($str === '') {
13458 3
            return '';
13459
        }
13460
13461 68
        if ($clean_utf8) {
13462
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
13463
            // if invalid characters are found in $haystack before $needle
13464 1
            $str = self::clean($str);
13465
        }
13466
13467 68
        $use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
13468
13469 68
        if ($encoding === 'UTF-8') {
13470 22
            $str_part_two = (string) \mb_substr($str, 1);
13471
13472 22
            if ($use_mb_functions) {
13473 22
                $str_part_one = \mb_strtoupper(
13474 22
                    (string) \mb_substr($str, 0, 1)
13475
                );
13476
            } else {
13477
                $str_part_one = self::strtoupper(
13478
                    (string) \mb_substr($str, 0, 1),
13479
                    $encoding,
13480
                    false,
13481
                    $lang,
13482 22
                    $try_to_keep_the_string_length
13483
                );
13484
            }
13485
        } else {
13486 47
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
13487
13488 47
            $str_part_two = (string) self::substr($str, 1, null, $encoding);
13489
13490 47
            if ($use_mb_functions) {
13491 47
                $str_part_one = \mb_strtoupper(
13492 47
                    (string) \mb_substr($str, 0, 1, $encoding),
13493 47
                    $encoding
13494
                );
13495
            } else {
13496
                $str_part_one = self::strtoupper(
13497
                    (string) self::substr($str, 0, 1, $encoding),
13498
                    $encoding,
13499
                    false,
13500
                    $lang,
13501
                    $try_to_keep_the_string_length
13502
                );
13503
            }
13504
        }
13505
13506 68
        return $str_part_one . $str_part_two;
13507
    }
13508
13509
    /**
13510
     * alias for "UTF8::ucfirst()"
13511
     *
13512
     * @param string $str
13513
     * @param string $encoding
13514
     * @param bool   $clean_utf8
13515
     *
13516
     * @psalm-pure
13517
     *
13518
     * @return string
13519
     *
13520
     * @see        UTF8::ucfirst()
13521
     * @deprecated <p>please use "UTF8::ucfirst()"</p>
13522
     */
13523 1
    public static function ucword(
13524
        string $str,
13525
        string $encoding = 'UTF-8',
13526
        bool $clean_utf8 = false
13527
    ): string {
13528 1
        return self::ucfirst($str, $encoding, $clean_utf8);
13529
    }
13530
13531
    /**
13532
     * Uppercase for all words in the string.
13533
     *
13534
     * EXAMPLE: <code>UTF8::ucwords('iñt ërn âTi ônà liz æti øn'); // 'Iñt Ërn ÂTi Ônà Liz Æti Øn'</code>
13535
     *
13536
     * @param string   $str        <p>The input string.</p>
13537
     * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
13538
     * @param string   $char_list  [optional] <p>Additional chars that contains to words and do not start a new
13539
     *                             word.</p>
13540
     * @param string   $encoding   [optional] <p>Set the charset.</p>
13541
     * @param bool     $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
13542
     *
13543
     * @psalm-pure
13544
     *
13545
     * @return string
13546
     */
13547 8
    public static function ucwords(
13548
        string $str,
13549
        array $exceptions = [],
13550
        string $char_list = '',
13551
        string $encoding = 'UTF-8',
13552
        bool $clean_utf8 = false
13553
    ): string {
13554 8
        if (!$str) {
13555 2
            return '';
13556
        }
13557
13558
        // INFO: mb_convert_case($str, MB_CASE_TITLE);
13559
        // -> MB_CASE_TITLE didn't only uppercase the first letter, it also lowercase all other letters
13560
13561 7
        if ($clean_utf8) {
13562
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
13563
            // if invalid characters are found in $haystack before $needle
13564 1
            $str = self::clean($str);
13565
        }
13566
13567 7
        $use_php_default_functions = !(bool) ($char_list . \implode('', $exceptions));
13568
13569
        if (
13570 7
            $use_php_default_functions
13571
            &&
13572 7
            ASCII::is_ascii($str)
13573
        ) {
13574
            return \ucwords($str);
13575
        }
13576
13577 7
        $words = self::str_to_words($str, $char_list);
13578 7
        $use_exceptions = $exceptions !== [];
13579
13580 7
        $words_str = '';
13581 7
        foreach ($words as &$word) {
13582 7
            if (!$word) {
13583 7
                continue;
13584
            }
13585
13586
            if (
13587 7
                !$use_exceptions
13588
                ||
13589 7
                !\in_array($word, $exceptions, true)
13590
            ) {
13591 7
                $words_str .= self::ucfirst($word, $encoding);
13592
            } else {
13593 7
                $words_str .= $word;
13594
            }
13595
        }
13596
13597 7
        return $words_str;
13598
    }
13599
13600
    /**
13601
     * Multi decode HTML entity + fix urlencoded-win1252-chars.
13602
     *
13603
     * EXAMPLE: <code>UTF8::urldecode('tes%20öäü%20\u00edtest+test'); // 'tes öäü ítest test'</code>
13604
     *
13605
     * e.g:
13606
     * 'test+test'                     => 'test test'
13607
     * 'D&#252;sseldorf'               => 'Düsseldorf'
13608
     * 'D%FCsseldorf'                  => 'Düsseldorf'
13609
     * 'D&#xFC;sseldorf'               => 'Düsseldorf'
13610
     * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
13611
     * 'Düsseldorf'                   => 'Düsseldorf'
13612
     * 'D%C3%BCsseldorf'               => 'Düsseldorf'
13613
     * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
13614
     * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
13615
     *
13616
     * @param string $str          <p>The input string.</p>
13617
     * @param bool   $multi_decode <p>Decode as often as possible.</p>
13618
     *
13619
     * @psalm-pure
13620
     *
13621
     * @return string
13622
     */
13623 4
    public static function urldecode(string $str, bool $multi_decode = true): string
13624
    {
13625 4
        if ($str === '') {
13626 3
            return '';
13627
        }
13628
13629
        if (
13630 4
            \strpos($str, '&') === false
13631
            &&
13632 4
            \strpos($str, '%') === false
13633
            &&
13634 4
            \strpos($str, '+') === false
13635
            &&
13636 4
            \strpos($str, '\u') === false
13637
        ) {
13638 3
            return self::fix_simple_utf8($str);
13639
        }
13640
13641 4
        $str = self::urldecode_unicode_helper($str);
13642
13643 4
        if ($multi_decode) {
13644
            do {
13645 3
                $str_compare = $str;
13646
13647
                /**
13648
                 * @psalm-suppress PossiblyInvalidArgument
13649
                 */
13650 3
                $str = self::fix_simple_utf8(
13651 3
                    \urldecode(
13652 3
                        self::html_entity_decode(
13653 3
                            self::to_utf8($str),
13654 3
                            \ENT_QUOTES | \ENT_HTML5
13655
                        )
13656
                    )
13657
                );
13658 3
            } while ($str_compare !== $str);
13659
        } else {
13660
            /**
13661
             * @psalm-suppress PossiblyInvalidArgument
13662
             */
13663 1
            $str = self::fix_simple_utf8(
13664 1
                \urldecode(
13665 1
                    self::html_entity_decode(
13666 1
                        self::to_utf8($str),
13667 1
                        \ENT_QUOTES | \ENT_HTML5
13668
                    )
13669
                )
13670
            );
13671
        }
13672
13673 4
        return $str;
13674
    }
13675
13676
    /**
13677
     * Return a array with "urlencoded"-win1252 -> UTF-8
13678
     *
13679
     * @psalm-pure
13680
     *
13681
     * @return string[]
13682
     *
13683
     * @deprecated <p>please use the "UTF8::urldecode()" function to decode a string</p>
13684
     */
13685 2
    public static function urldecode_fix_win1252_chars(): array
13686
    {
13687
        return [
13688 2
            '%20' => ' ',
13689
            '%21' => '!',
13690
            '%22' => '"',
13691
            '%23' => '#',
13692
            '%24' => '$',
13693
            '%25' => '%',
13694
            '%26' => '&',
13695
            '%27' => "'",
13696
            '%28' => '(',
13697
            '%29' => ')',
13698
            '%2A' => '*',
13699
            '%2B' => '+',
13700
            '%2C' => ',',
13701
            '%2D' => '-',
13702
            '%2E' => '.',
13703
            '%2F' => '/',
13704
            '%30' => '0',
13705
            '%31' => '1',
13706
            '%32' => '2',
13707
            '%33' => '3',
13708
            '%34' => '4',
13709
            '%35' => '5',
13710
            '%36' => '6',
13711
            '%37' => '7',
13712
            '%38' => '8',
13713
            '%39' => '9',
13714
            '%3A' => ':',
13715
            '%3B' => ';',
13716
            '%3C' => '<',
13717
            '%3D' => '=',
13718
            '%3E' => '>',
13719
            '%3F' => '?',
13720
            '%40' => '@',
13721
            '%41' => 'A',
13722
            '%42' => 'B',
13723
            '%43' => 'C',
13724
            '%44' => 'D',
13725
            '%45' => 'E',
13726
            '%46' => 'F',
13727
            '%47' => 'G',
13728
            '%48' => 'H',
13729
            '%49' => 'I',
13730
            '%4A' => 'J',
13731
            '%4B' => 'K',
13732
            '%4C' => 'L',
13733
            '%4D' => 'M',
13734
            '%4E' => 'N',
13735
            '%4F' => 'O',
13736
            '%50' => 'P',
13737
            '%51' => 'Q',
13738
            '%52' => 'R',
13739
            '%53' => 'S',
13740
            '%54' => 'T',
13741
            '%55' => 'U',
13742
            '%56' => 'V',
13743
            '%57' => 'W',
13744
            '%58' => 'X',
13745
            '%59' => 'Y',
13746
            '%5A' => 'Z',
13747
            '%5B' => '[',
13748
            '%5C' => '\\',
13749
            '%5D' => ']',
13750
            '%5E' => '^',
13751
            '%5F' => '_',
13752
            '%60' => '`',
13753
            '%61' => 'a',
13754
            '%62' => 'b',
13755
            '%63' => 'c',
13756
            '%64' => 'd',
13757
            '%65' => 'e',
13758
            '%66' => 'f',
13759
            '%67' => 'g',
13760
            '%68' => 'h',
13761
            '%69' => 'i',
13762
            '%6A' => 'j',
13763
            '%6B' => 'k',
13764
            '%6C' => 'l',
13765
            '%6D' => 'm',
13766
            '%6E' => 'n',
13767
            '%6F' => 'o',
13768
            '%70' => 'p',
13769
            '%71' => 'q',
13770
            '%72' => 'r',
13771
            '%73' => 's',
13772
            '%74' => 't',
13773
            '%75' => 'u',
13774
            '%76' => 'v',
13775
            '%77' => 'w',
13776
            '%78' => 'x',
13777
            '%79' => 'y',
13778
            '%7A' => 'z',
13779
            '%7B' => '{',
13780
            '%7C' => '|',
13781
            '%7D' => '}',
13782
            '%7E' => '~',
13783
            '%7F' => '',
13784
            '%80' => '`',
13785
            '%81' => '',
13786
            '%82' => '‚',
13787
            '%83' => 'ƒ',
13788
            '%84' => '„',
13789
            '%85' => '…',
13790
            '%86' => '†',
13791
            '%87' => '‡',
13792
            '%88' => 'ˆ',
13793
            '%89' => '‰',
13794
            '%8A' => 'Š',
13795
            '%8B' => '‹',
13796
            '%8C' => 'Œ',
13797
            '%8D' => '',
13798
            '%8E' => 'Ž',
13799
            '%8F' => '',
13800
            '%90' => '',
13801
            '%91' => '‘',
13802
            '%92' => '’',
13803
            '%93' => '“',
13804
            '%94' => '”',
13805
            '%95' => '•',
13806
            '%96' => '–',
13807
            '%97' => '—',
13808
            '%98' => '˜',
13809
            '%99' => '™',
13810
            '%9A' => 'š',
13811
            '%9B' => '›',
13812
            '%9C' => 'œ',
13813
            '%9D' => '',
13814
            '%9E' => 'ž',
13815
            '%9F' => 'Ÿ',
13816
            '%A0' => '',
13817
            '%A1' => '¡',
13818
            '%A2' => '¢',
13819
            '%A3' => '£',
13820
            '%A4' => '¤',
13821
            '%A5' => '¥',
13822
            '%A6' => '¦',
13823
            '%A7' => '§',
13824
            '%A8' => '¨',
13825
            '%A9' => '©',
13826
            '%AA' => 'ª',
13827
            '%AB' => '«',
13828
            '%AC' => '¬',
13829
            '%AD' => '',
13830
            '%AE' => '®',
13831
            '%AF' => '¯',
13832
            '%B0' => '°',
13833
            '%B1' => '±',
13834
            '%B2' => '²',
13835
            '%B3' => '³',
13836
            '%B4' => '´',
13837
            '%B5' => 'µ',
13838
            '%B6' => '¶',
13839
            '%B7' => '·',
13840
            '%B8' => '¸',
13841
            '%B9' => '¹',
13842
            '%BA' => 'º',
13843
            '%BB' => '»',
13844
            '%BC' => '¼',
13845
            '%BD' => '½',
13846
            '%BE' => '¾',
13847
            '%BF' => '¿',
13848
            '%C0' => 'À',
13849
            '%C1' => 'Á',
13850
            '%C2' => 'Â',
13851
            '%C3' => 'Ã',
13852
            '%C4' => 'Ä',
13853
            '%C5' => 'Å',
13854
            '%C6' => 'Æ',
13855
            '%C7' => 'Ç',
13856
            '%C8' => 'È',
13857
            '%C9' => 'É',
13858
            '%CA' => 'Ê',
13859
            '%CB' => 'Ë',
13860
            '%CC' => 'Ì',
13861
            '%CD' => 'Í',
13862
            '%CE' => 'Î',
13863
            '%CF' => 'Ï',
13864
            '%D0' => 'Ð',
13865
            '%D1' => 'Ñ',
13866
            '%D2' => 'Ò',
13867
            '%D3' => 'Ó',
13868
            '%D4' => 'Ô',
13869
            '%D5' => 'Õ',
13870
            '%D6' => 'Ö',
13871
            '%D7' => '×',
13872
            '%D8' => 'Ø',
13873
            '%D9' => 'Ù',
13874
            '%DA' => 'Ú',
13875
            '%DB' => 'Û',
13876
            '%DC' => 'Ü',
13877
            '%DD' => 'Ý',
13878
            '%DE' => 'Þ',
13879
            '%DF' => 'ß',
13880
            '%E0' => 'à',
13881
            '%E1' => 'á',
13882
            '%E2' => 'â',
13883
            '%E3' => 'ã',
13884
            '%E4' => 'ä',
13885
            '%E5' => 'å',
13886
            '%E6' => 'æ',
13887
            '%E7' => 'ç',
13888
            '%E8' => 'è',
13889
            '%E9' => 'é',
13890
            '%EA' => 'ê',
13891
            '%EB' => 'ë',
13892
            '%EC' => 'ì',
13893
            '%ED' => 'í',
13894
            '%EE' => 'î',
13895
            '%EF' => 'ï',
13896
            '%F0' => 'ð',
13897
            '%F1' => 'ñ',
13898
            '%F2' => 'ò',
13899
            '%F3' => 'ó',
13900
            '%F4' => 'ô',
13901
            '%F5' => 'õ',
13902
            '%F6' => 'ö',
13903
            '%F7' => '÷',
13904
            '%F8' => 'ø',
13905
            '%F9' => 'ù',
13906
            '%FA' => 'ú',
13907
            '%FB' => 'û',
13908
            '%FC' => 'ü',
13909
            '%FD' => 'ý',
13910
            '%FE' => 'þ',
13911
            '%FF' => 'ÿ',
13912
        ];
13913
    }
13914
13915
    /**
13916
     * Decodes a UTF-8 string to ISO-8859-1.
13917
     *
13918
     * EXAMPLE: <code>UTF8::encode('UTF-8', UTF8::utf8_decode('-ABC-中文空白-')); // '-ABC-????-'</code>
13919
     *
13920
     * @param string $str             <p>The input string.</p>
13921
     * @param bool   $keep_utf8_chars
13922
     *
13923
     * @psalm-pure
13924
     *
13925
     * @return string
13926
     *
13927
     * @noinspection SuspiciousBinaryOperationInspection
13928
     */
13929 14
    public static function utf8_decode(string $str, bool $keep_utf8_chars = false): string
13930
    {
13931 14
        if ($str === '') {
13932 6
            return '';
13933
        }
13934
13935
        // save for later comparision
13936 14
        $str_backup = $str;
13937 14
        $len = \strlen($str);
13938
13939 14
        if (self::$ORD === null) {
13940
            self::$ORD = self::getData('ord');
13941
        }
13942
13943 14
        if (self::$CHR === null) {
13944
            self::$CHR = self::getData('chr');
13945
        }
13946
13947 14
        $no_char_found = '?';
13948
        /** @noinspection ForeachInvariantsInspection */
13949 14
        for ($i = 0, $j = 0; $i < $len; ++$i, ++$j) {
13950 14
            switch ($str[$i] & "\xF0") {
13951 14
                case "\xC0":
13952 13
                case "\xD0":
13953 13
                    $c = (self::$ORD[$str[$i] & "\x1F"] << 6) | self::$ORD[$str[++$i] & "\x3F"];
13954 13
                    $str[$j] = $c < 256 ? self::$CHR[$c] : $no_char_found;
13955
13956 13
                    break;
13957
13958
                /** @noinspection PhpMissingBreakStatementInspection */
13959 13
                case "\xF0":
13960
                    ++$i;
13961
13962
                // no break
13963
13964 13
                case "\xE0":
13965 11
                    $str[$j] = $no_char_found;
13966 11
                    $i += 2;
13967
13968 11
                    break;
13969
13970
                default:
13971 12
                    $str[$j] = $str[$i];
13972
            }
13973
        }
13974
13975
        /** @var false|string $return - needed for PhpStan (stubs error) */
13976 14
        $return = \substr($str, 0, $j);
13977 14
        if ($return === false) {
13978
            $return = '';
13979
        }
13980
13981
        if (
13982 14
            $keep_utf8_chars
13983
            &&
13984 14
            (int) self::strlen($return) >= (int) self::strlen($str_backup)
13985
        ) {
13986 2
            return $str_backup;
13987
        }
13988
13989 14
        return $return;
13990
    }
13991
13992
    /**
13993
     * Encodes an ISO-8859-1 string to UTF-8.
13994
     *
13995
     * EXAMPLE: <code>UTF8::utf8_decode(UTF8::utf8_encode('-ABC-中文空白-')); // '-ABC-中文空白-'</code>
13996
     *
13997
     * @param string $str <p>The input string.</p>
13998
     *
13999
     * @psalm-pure
14000
     *
14001
     * @return string
14002
     */
14003 16
    public static function utf8_encode(string $str): string
14004
    {
14005 16
        if ($str === '') {
14006 14
            return '';
14007
        }
14008
14009
        /** @var false|string $str - the polyfill maybe return false */
14010 16
        $str = \utf8_encode($str);
0 ignored issues
show
Bug introduced by
It seems like $str can also be of type false; however, parameter $data of utf8_encode() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

14010
        $str = \utf8_encode(/** @scrutinizer ignore-type */ $str);
Loading history...
14011
14012
        /** @noinspection CallableParameterUseCaseInTypeContextInspection */
14013
        /** @psalm-suppress TypeDoesNotContainType */
14014 16
        if ($str === false) {
14015
            return '';
14016
        }
14017
14018 16
        return $str;
14019
    }
14020
14021
    /**
14022
     * fix -> utf8-win1252 chars
14023
     *
14024
     * @param string $str <p>The input string.</p>
14025
     *
14026
     * @psalm-pure
14027
     *
14028
     * @return string
14029
     *
14030
     * @deprecated <p>please use "UTF8::fix_simple_utf8()"</p>
14031
     */
14032 2
    public static function utf8_fix_win1252_chars(string $str): string
14033
    {
14034 2
        return self::fix_simple_utf8($str);
14035
    }
14036
14037
    /**
14038
     * Returns an array with all utf8 whitespace characters.
14039
     *
14040
     * @see http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
14041
     *
14042
     * @psalm-pure
14043
     *
14044
     * @return string[]
14045
     *                  An array with all known whitespace characters as values and the type of whitespace as keys
14046
     *                  as defined in above URL
14047
     */
14048 2
    public static function whitespace_table(): array
14049
    {
14050 2
        return self::$WHITESPACE_TABLE;
14051
    }
14052
14053
    /**
14054
     * Limit the number of words in a string.
14055
     *
14056
     * EXAMPLE: <code>UTF8::words_limit('fòô bàř fòô', 2, ''); // 'fòô bàř'</code>
14057
     *
14058
     * @param string $str        <p>The input string.</p>
14059
     * @param int    $limit      <p>The limit of words as integer.</p>
14060
     * @param string $str_add_on <p>Replacement for the striped string.</p>
14061
     *
14062
     * @psalm-pure
14063
     *
14064
     * @return string
14065
     */
14066 2
    public static function words_limit(
14067
        string $str,
14068
        int $limit = 100,
14069
        string $str_add_on = '…'
14070
    ): string {
14071 2
        if ($str === '' || $limit < 1) {
14072 2
            return '';
14073
        }
14074
14075 2
        \preg_match('/^\\s*+(?:[^\\s]++\\s*+){1,' . $limit . '}/u', $str, $matches);
14076
14077
        if (
14078 2
            !isset($matches[0])
14079
            ||
14080 2
            \mb_strlen($str) === (int) \mb_strlen($matches[0])
14081
        ) {
14082 2
            return $str;
14083
        }
14084
14085 2
        return \rtrim($matches[0]) . $str_add_on;
14086
    }
14087
14088
    /**
14089
     * Wraps a string to a given number of characters
14090
     *
14091
     * EXAMPLE: <code>UTF8::wordwrap('Iñtërnâtiônàlizætiøn', 2, '<br>', true)); // 'Iñ<br>të<br>rn<br>ât<br>iô<br>nà<br>li<br>zæ<br>ti<br>øn'</code>
14092
     *
14093
     * @see http://php.net/manual/en/function.wordwrap.php
14094
     *
14095
     * @param string $str   <p>The input string.</p>
14096
     * @param int    $width [optional] <p>The column width.</p>
14097
     * @param string $break [optional] <p>The line is broken using the optional break parameter.</p>
14098
     * @param bool   $cut   [optional] <p>
14099
     *                      If the cut is set to true, the string is
14100
     *                      always wrapped at or before the specified width. So if you have
14101
     *                      a word that is larger than the given width, it is broken apart.
14102
     *                      </p>
14103
     *
14104
     * @psalm-pure
14105
     *
14106
     * @return string
14107
     *                <p>The given string wrapped at the specified column.</p>
14108
     */
14109 12
    public static function wordwrap(
14110
        string $str,
14111
        int $width = 75,
14112
        string $break = "\n",
14113
        bool $cut = false
14114
    ): string {
14115 12
        if ($str === '' || $break === '') {
14116 4
            return '';
14117
        }
14118
14119 10
        $str_split = \explode($break, $str);
14120 10
        if ($str_split === false) {
14121
            return '';
14122
        }
14123
14124
        /** @var string[] $charsArray */
14125 10
        $charsArray = [];
14126 10
        $word_split = '';
14127 10
        foreach ($str_split as $i => $i_value) {
14128 10
            if ($i) {
14129 3
                $charsArray[] = $break;
14130 3
                $word_split .= '#';
14131
            }
14132
14133 10
            foreach (self::str_split($i_value) as $c) {
14134 10
                $charsArray[] = $c;
14135 10
                if ($c === ' ') {
14136 3
                    $word_split .= ' ';
14137
                } else {
14138 10
                    $word_split .= '?';
14139
                }
14140
            }
14141
        }
14142
14143 10
        $str_return = '';
14144 10
        $j = 0;
14145 10
        $b = -1;
14146 10
        $i = -1;
14147 10
        $word_split = \wordwrap($word_split, $width, '#', $cut);
14148
14149 10
        $max = \mb_strlen($word_split);
14150 10
        while (($b = \mb_strpos($word_split, '#', $b + 1)) !== false) {
14151 8
            for (++$i; $i < $b; ++$i) {
14152 8
                if (isset($charsArray[$j])) {
14153 8
                    $str_return .= $charsArray[$j];
14154 8
                    unset($charsArray[$j]);
14155
                }
14156 8
                ++$j;
14157
14158
                // prevent endless loop, e.g. if there is a error in the "mb_*" polyfill
14159 8
                if ($i > $max) {
14160
                    break 2;
14161
                }
14162
            }
14163
14164
            if (
14165 8
                $break === $charsArray[$j]
14166
                ||
14167 8
                $charsArray[$j] === ' '
14168
            ) {
14169 5
                unset($charsArray[$j++]);
14170
            }
14171
14172 8
            $str_return .= $break;
14173
14174
            // prevent endless loop, e.g. if there is a error in the "mb_*" polyfill
14175 8
            if ($b > $max) {
14176
                break;
14177
            }
14178
        }
14179
14180 10
        return $str_return . \implode('', $charsArray);
14181
    }
14182
14183
    /**
14184
     * Line-Wrap the string after $limit, but split the string by "$delimiter" before ...
14185
     *    ... so that we wrap the per line.
14186
     *
14187
     * @param string      $str             <p>The input string.</p>
14188
     * @param int         $width           [optional] <p>The column width.</p>
14189
     * @param string      $break           [optional] <p>The line is broken using the optional break parameter.</p>
14190
     * @param bool        $cut             [optional] <p>
14191
     *                                     If the cut is set to true, the string is
14192
     *                                     always wrapped at or before the specified width. So if you have
14193
     *                                     a word that is larger than the given width, it is broken apart.
14194
     *                                     </p>
14195
     * @param bool        $add_final_break [optional] <p>
14196
     *                                     If this flag is true, then the method will add a $break at the end
14197
     *                                     of the result string.
14198
     *                                     </p>
14199
     * @param string|null $delimiter       [optional] <p>
14200
     *                                     You can change the default behavior, where we split the string by newline.
14201
     *                                     </p>
14202
     *
14203
     * @psalm-pure
14204
     *
14205
     * @return string
14206
     */
14207 1
    public static function wordwrap_per_line(
14208
        string $str,
14209
        int $width = 75,
14210
        string $break = "\n",
14211
        bool $cut = false,
14212
        bool $add_final_break = true,
14213
        string $delimiter = null
14214
    ): string {
14215 1
        if ($delimiter === null) {
14216 1
            $strings = \preg_split('/\\r\\n|\\r|\\n/', $str);
14217
        } else {
14218 1
            $strings = \explode($delimiter, $str);
14219
        }
14220
14221 1
        $string_helper_array = [];
14222 1
        if ($strings !== false) {
14223 1
            foreach ($strings as $value) {
14224 1
                $string_helper_array[] = self::wordwrap($value, $width, $break, $cut);
14225
            }
14226
        }
14227
14228 1
        if ($add_final_break) {
14229 1
            $final_break = $break;
14230
        } else {
14231 1
            $final_break = '';
14232
        }
14233
14234 1
        return \implode($delimiter ?? "\n", $string_helper_array) . $final_break;
14235
    }
14236
14237
    /**
14238
     * Returns an array of Unicode White Space characters.
14239
     *
14240
     * @psalm-pure
14241
     *
14242
     * @return string[]
14243
     *                  <p>An array with numeric code point as key and White Space Character as value.</p>
14244
     */
14245 2
    public static function ws(): array
14246
    {
14247 2
        return self::$WHITESPACE;
14248
    }
14249
14250
    /**
14251
     * Checks whether the passed string contains only byte sequences that are valid UTF-8 characters.
14252
     *
14253
     * EXAMPLE: <code>
14254
     * UTF8::is_utf8_string('Iñtërnâtiônàlizætiøn']); // true
14255
     * //
14256
     * UTF8::is_utf8_string("Iñtërnâtiônàlizætiøn\xA0\xA1"); // false
14257
     * </code>
14258
     *
14259
     * @see          http://hsivonen.iki.fi/php-utf8/
14260
     *
14261
     * @param string $str    <p>The string to be checked.</p>
14262
     * @param bool   $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
14263
     *
14264
     * @psalm-pure
14265
     *
14266
     * @return bool
14267
     *
14268
     * @noinspection ReturnTypeCanBeDeclaredInspection
14269
     */
14270 110
    private static function is_utf8_string(string $str, bool $strict = false)
14271
    {
14272 110
        if ($str === '') {
14273 15
            return true;
14274
        }
14275
14276 103
        if ($strict) {
14277 2
            $is_binary = self::is_binary($str, true);
14278
14279 2
            if ($is_binary && self::is_utf16($str, false) !== false) {
14280 2
                return false;
14281
            }
14282
14283
            if ($is_binary && self::is_utf32($str, false) !== false) {
14284
                return false;
14285
            }
14286
        }
14287
14288 103
        if (self::$SUPPORT['pcre_utf8']) {
14289
            // If even just the first character can be matched, when the /u
14290
            // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
14291
            // invalid, nothing at all will match, even if the string contains
14292
            // some valid sequences
14293 103
            return \preg_match('/^./us', $str) === 1;
14294
        }
14295
14296 2
        $mState = 0; // cached expected number of octets after the current octet
14297
        // until the beginning of the next UTF8 character sequence
14298 2
        $mUcs4 = 0; // cached Unicode character
14299 2
        $mBytes = 1; // cached expected number of octets in the current sequence
14300
14301 2
        if (self::$ORD === null) {
14302
            self::$ORD = self::getData('ord');
14303
        }
14304
14305 2
        $len = \strlen($str);
14306
        /** @noinspection ForeachInvariantsInspection */
14307 2
        for ($i = 0; $i < $len; ++$i) {
14308 2
            $in = self::$ORD[$str[$i]];
14309
14310 2
            if ($mState === 0) {
14311
                // When mState is zero we expect either a US-ASCII character or a
14312
                // multi-octet sequence.
14313 2
                if ((0x80 & $in) === 0) {
14314
                    // US-ASCII, pass straight through.
14315 2
                    $mBytes = 1;
14316 2
                } elseif ((0xE0 & $in) === 0xC0) {
14317
                    // First octet of 2 octet sequence.
14318 2
                    $mUcs4 = $in;
14319 2
                    $mUcs4 = ($mUcs4 & 0x1F) << 6;
14320 2
                    $mState = 1;
14321 2
                    $mBytes = 2;
14322 2
                } elseif ((0xF0 & $in) === 0xE0) {
14323
                    // First octet of 3 octet sequence.
14324 2
                    $mUcs4 = $in;
14325 2
                    $mUcs4 = ($mUcs4 & 0x0F) << 12;
14326 2
                    $mState = 2;
14327 2
                    $mBytes = 3;
14328
                } elseif ((0xF8 & $in) === 0xF0) {
14329
                    // First octet of 4 octet sequence.
14330
                    $mUcs4 = $in;
14331
                    $mUcs4 = ($mUcs4 & 0x07) << 18;
14332
                    $mState = 3;
14333
                    $mBytes = 4;
14334
                } elseif ((0xFC & $in) === 0xF8) {
14335
                    /* First octet of 5 octet sequence.
14336
                     *
14337
                     * This is illegal because the encoded codepoint must be either
14338
                     * (a) not the shortest form or
14339
                     * (b) outside the Unicode range of 0-0x10FFFF.
14340
                     * Rather than trying to resynchronize, we will carry on until the end
14341
                     * of the sequence and let the later error handling code catch it.
14342
                     */
14343
                    $mUcs4 = $in;
14344
                    $mUcs4 = ($mUcs4 & 0x03) << 24;
14345
                    $mState = 4;
14346
                    $mBytes = 5;
14347
                } elseif ((0xFE & $in) === 0xFC) {
14348
                    // First octet of 6 octet sequence, see comments for 5 octet sequence.
14349
                    $mUcs4 = $in;
14350
                    $mUcs4 = ($mUcs4 & 1) << 30;
14351
                    $mState = 5;
14352
                    $mBytes = 6;
14353
                } else {
14354
                    // Current octet is neither in the US-ASCII range nor a legal first
14355
                    // octet of a multi-octet sequence.
14356 2
                    return false;
14357
                }
14358 2
            } elseif ((0xC0 & $in) === 0x80) {
14359
14360
                // When mState is non-zero, we expect a continuation of the multi-octet
14361
                // sequence
14362
14363
                // Legal continuation.
14364 2
                $shift = ($mState - 1) * 6;
14365 2
                $tmp = $in;
14366 2
                $tmp = ($tmp & 0x0000003F) << $shift;
14367 2
                $mUcs4 |= $tmp;
14368
                // Prefix: End of the multi-octet sequence. mUcs4 now contains the final
14369
                // Unicode code point to be output.
14370 2
                if (--$mState === 0) {
14371
                    // Check for illegal sequences and code points.
14372
                    //
14373
                    // From Unicode 3.1, non-shortest form is illegal
14374
                    if (
14375 2
                        ($mBytes === 2 && $mUcs4 < 0x0080)
14376
                        ||
14377 2
                        ($mBytes === 3 && $mUcs4 < 0x0800)
14378
                        ||
14379 2
                        ($mBytes === 4 && $mUcs4 < 0x10000)
14380
                        ||
14381 2
                        ($mBytes > 4)
14382
                        ||
14383
                        // From Unicode 3.2, surrogate characters are illegal.
14384 2
                        (($mUcs4 & 0xFFFFF800) === 0xD800)
14385
                        ||
14386
                        // Code points outside the Unicode range are illegal.
14387 2
                        ($mUcs4 > 0x10FFFF)
14388
                    ) {
14389
                        return false;
14390
                    }
14391
                    // initialize UTF8 cache
14392 2
                    $mState = 0;
14393 2
                    $mUcs4 = 0;
14394 2
                    $mBytes = 1;
14395
                }
14396
            } else {
14397
                // ((0xC0 & (*in) != 0x80) && (mState != 0))
14398
                // Incomplete multi-octet sequence.
14399
                return false;
14400
            }
14401
        }
14402
14403 2
        return $mState === 0;
14404
    }
14405
14406
    /**
14407
     * @param string $str
14408
     * @param bool   $use_lowercase      <p>Use uppercase by default, otherwise use lowercase.</p>
14409
     * @param bool   $use_full_case_fold <p>Convert not only common cases.</p>
14410
     *
14411
     * @psalm-pure
14412
     *
14413
     * @return string
14414
     *
14415
     * @noinspection ReturnTypeCanBeDeclaredInspection
14416
     */
14417 33
    private static function fixStrCaseHelper(
14418
        string $str,
14419
        bool $use_lowercase = false,
14420
        bool $use_full_case_fold = false
14421
    ) {
14422 33
        $upper = self::$COMMON_CASE_FOLD['upper'];
14423 33
        $lower = self::$COMMON_CASE_FOLD['lower'];
14424
14425 33
        if ($use_lowercase) {
14426 2
            $str = \str_replace(
14427 2
                $upper,
14428 2
                $lower,
14429 2
                $str
14430
            );
14431
        } else {
14432 31
            $str = \str_replace(
14433 31
                $lower,
14434 31
                $upper,
14435 31
                $str
14436
            );
14437
        }
14438
14439 33
        if ($use_full_case_fold) {
14440
            /**
14441
             * @psalm-suppress ImpureStaticVariable
14442
             *
14443
             * @var array<mixed>|null
14444
             */
14445 31
            static $FULL_CASE_FOLD = null;
14446 31
            if ($FULL_CASE_FOLD === null) {
14447 1
                $FULL_CASE_FOLD = self::getData('caseFolding_full');
14448
            }
14449
14450 31
            if ($use_lowercase) {
14451 2
                $str = \str_replace($FULL_CASE_FOLD[0], $FULL_CASE_FOLD[1], $str);
14452
            } else {
14453 29
                $str = \str_replace($FULL_CASE_FOLD[1], $FULL_CASE_FOLD[0], $str);
14454
            }
14455
        }
14456
14457 33
        return $str;
14458
    }
14459
14460
    /**
14461
     * get data from "/data/*.php"
14462
     *
14463
     * @param string $file
14464
     *
14465
     * @psalm-pure
14466
     *
14467
     * @return array
14468
     *
14469
     * @noinspection ReturnTypeCanBeDeclaredInspection
14470
     */
14471 6
    private static function getData(string $file)
14472
    {
14473
        /** @noinspection PhpIncludeInspection */
14474
        /** @noinspection UsingInclusionReturnValueInspection */
14475
        /** @psalm-suppress UnresolvableInclude */
14476 6
        return include __DIR__ . '/data/' . $file . '.php';
14477
    }
14478
14479
    /**
14480
     * @psalm-pure
14481
     *
14482
     * @return true|null
14483
     */
14484 12
    private static function initEmojiData()
14485
    {
14486 12
        if (self::$EMOJI_KEYS_CACHE === null) {
14487 1
            if (self::$EMOJI === null) {
14488 1
                self::$EMOJI = self::getData('emoji');
14489
            }
14490
14491
            /**
14492
             * @psalm-suppress ImpureFunctionCall - static sort function is used
14493
             */
14494 1
            \uksort(
14495 1
                self::$EMOJI,
14496
                static function (string $a, string $b): int {
14497 1
                    return \strlen($b) <=> \strlen($a);
14498 1
                }
14499
            );
14500
14501 1
            self::$EMOJI_KEYS_CACHE = \array_keys(self::$EMOJI);
14502 1
            self::$EMOJI_VALUES_CACHE = self::$EMOJI;
14503
14504 1
            foreach (self::$EMOJI_KEYS_CACHE as $key) {
14505 1
                $tmp_key = \crc32($key);
14506 1
                self::$EMOJI_KEYS_REVERSIBLE_CACHE[] = '_-_PORTABLE_UTF8_-_' . $tmp_key . '_-_' . \strrev((string) $tmp_key) . '_-_8FTU_ELBATROP_-_';
14507
            }
14508
14509 1
            return true;
14510
        }
14511
14512 12
        return null;
14513
    }
14514
14515
    /**
14516
     * Checks whether mbstring "overloaded" is active on the server.
14517
     *
14518
     * @psalm-pure
14519
     *
14520
     * @return bool
14521
     *
14522
     * @noinspection ReturnTypeCanBeDeclaredInspection
14523
     */
14524
    private static function mbstring_overloaded()
14525
    {
14526
        /**
14527
         * INI directive 'mbstring.func_overload' is deprecated since PHP 7.2
14528
         */
14529
14530
        /** @noinspection PhpComposerExtensionStubsInspection */
14531
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
14532
        return \defined('MB_OVERLOAD_STRING')
14533
               &&
14534
               ((int) @\ini_get('mbstring.func_overload') & \MB_OVERLOAD_STRING);
14535
    }
14536
14537
    /**
14538
     * @param array    $strings
14539
     * @param bool     $remove_empty_values
14540
     * @param int|null $remove_short_values
14541
     *
14542
     * @psalm-pure
14543
     *
14544
     * @return array
14545
     *
14546
     * @noinspection ReturnTypeCanBeDeclaredInspection
14547
     */
14548 2
    private static function reduce_string_array(
14549
        array $strings,
14550
        bool $remove_empty_values,
14551
        int $remove_short_values = null
14552
    ) {
14553
        // init
14554 2
        $return = [];
14555
14556 2
        foreach ($strings as &$str) {
14557
            if (
14558 2
                $remove_short_values !== null
14559
                &&
14560 2
                \mb_strlen($str) <= $remove_short_values
14561
            ) {
14562 2
                continue;
14563
            }
14564
14565
            if (
14566 2
                $remove_empty_values
14567
                &&
14568 2
                \trim($str) === ''
14569
            ) {
14570 2
                continue;
14571
            }
14572
14573 2
            $return[] = $str;
14574
        }
14575
14576 2
        return $return;
14577
    }
14578
14579
    /**
14580
     * rxClass
14581
     *
14582
     * @param string $s
14583
     * @param string $class
14584
     *
14585
     * @psalm-pure
14586
     *
14587
     * @return string
14588
     *
14589
     * @noinspection ReturnTypeCanBeDeclaredInspection
14590
     */
14591 33
    private static function rxClass(string $s, string $class = '')
14592
    {
14593
        /**
14594
         * @psalm-suppress ImpureStaticVariable
14595
         *
14596
         * @var array<string,string>
14597
         */
14598 33
        static $RX_CLASS_CACHE = [];
14599
14600 33
        $cache_key = $s . '_' . $class;
14601
14602 33
        if (isset($RX_CLASS_CACHE[$cache_key])) {
14603 21
            return $RX_CLASS_CACHE[$cache_key];
14604
        }
14605
14606
        /** @var string[] $class_array */
14607 16
        $class_array[] = $class;
0 ignored issues
show
Comprehensibility Best Practice introduced by
$class_array was never initialized. Although not strictly required by PHP, it is generally a good practice to add $class_array = array(); before regardless.
Loading history...
14608
14609
        /** @noinspection SuspiciousLoopInspection */
14610
        /** @noinspection AlterInForeachInspection */
14611 16
        foreach (self::str_split($s) as &$s) {
0 ignored issues
show
Bug introduced by
The expression self::str_split($s) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
14612 15
            if ($s === '-') {
14613
                $class_array[0] = '-' . $class_array[0];
14614 15
            } elseif (!isset($s[2])) {
14615 15
                $class_array[0] .= \preg_quote($s, '/');
14616 1
            } elseif (self::strlen($s) === 1) {
14617 1
                $class_array[0] .= $s;
14618
            } else {
14619 15
                $class_array[] = $s;
14620
            }
14621
        }
14622
14623 16
        if ($class_array[0]) {
14624 16
            $class_array[0] = '[' . $class_array[0] . ']';
14625
        }
14626
14627 16
        if (\count($class_array) === 1) {
14628 16
            $return = $class_array[0];
14629
        } else {
14630
            $return = '(?:' . \implode('|', $class_array) . ')';
14631
        }
14632
14633 16
        $RX_CLASS_CACHE[$cache_key] = $return;
14634
14635 16
        return $return;
14636
    }
14637
14638
    /**
14639
     * Personal names such as "Marcus Aurelius" are sometimes typed incorrectly using lowercase ("marcus aurelius").
14640
     *
14641
     * @param string $names
14642
     * @param string $delimiter
14643
     * @param string $encoding
14644
     *
14645
     * @psalm-pure
14646
     *
14647
     * @return string
14648
     *
14649
     * @noinspection ReturnTypeCanBeDeclaredInspection
14650
     */
14651 1
    private static function str_capitalize_name_helper(
14652
        string $names,
14653
        string $delimiter,
14654
        string $encoding = 'UTF-8'
14655
    ) {
14656
        // init
14657 1
        $name_helper_array = \explode($delimiter, $names);
14658 1
        if ($name_helper_array === false) {
14659
            return '';
14660
        }
14661
14662
        $special_cases = [
14663 1
            'names' => [
14664
                'ab',
14665
                'af',
14666
                'al',
14667
                'and',
14668
                'ap',
14669
                'bint',
14670
                'binte',
14671
                'da',
14672
                'de',
14673
                'del',
14674
                'den',
14675
                'der',
14676
                'di',
14677
                'dit',
14678
                'ibn',
14679
                'la',
14680
                'mac',
14681
                'nic',
14682
                'of',
14683
                'ter',
14684
                'the',
14685
                'und',
14686
                'van',
14687
                'von',
14688
                'y',
14689
                'zu',
14690
            ],
14691
            'prefixes' => [
14692
                'al-',
14693
                "d'",
14694
                'ff',
14695
                "l'",
14696
                'mac',
14697
                'mc',
14698
                'nic',
14699
            ],
14700
        ];
14701
14702 1
        foreach ($name_helper_array as &$name) {
14703 1
            if (\in_array($name, $special_cases['names'], true)) {
14704 1
                continue;
14705
            }
14706
14707 1
            $continue = false;
14708
14709 1
            if ($delimiter === '-') {
14710
                /** @noinspection AlterInForeachInspection */
14711 1
                foreach ((array) $special_cases['names'] as &$beginning) {
14712 1
                    if (\strncmp($name, $beginning, \strlen($beginning)) === 0) {
14713 1
                        $continue = true;
14714
14715 1
                        break;
14716
                    }
14717
                }
14718
            }
14719
14720
            /** @noinspection AlterInForeachInspection */
14721 1
            foreach ((array) $special_cases['prefixes'] as &$beginning) {
14722 1
                if (\strncmp($name, $beginning, \strlen($beginning)) === 0) {
14723 1
                    $continue = true;
14724
14725 1
                    break;
14726
                }
14727
            }
14728
14729 1
            if ($continue) {
14730 1
                continue;
14731
            }
14732
14733 1
            $name = self::ucfirst($name);
14734
        }
14735
14736 1
        return \implode($delimiter, $name_helper_array);
14737
    }
14738
14739
    /**
14740
     * Generic case-sensitive transformation for collation matching.
14741
     *
14742
     * @param string $str <p>The input string</p>
14743
     *
14744
     * @psalm-pure
14745
     *
14746
     * @return string|null
14747
     */
14748 6
    private static function strtonatfold(string $str)
14749
    {
14750 6
        $str = \Normalizer::normalize($str, \Normalizer::NFD);
14751
        /** @phpstan-ignore-next-line - https://github.com/JetBrains/phpstorm-stubs/pull/949 */
14752 6
        if ($str === false) {
14753 2
            return '';
14754
        }
14755
14756
        /** @noinspection PhpUndefinedClassInspection */
14757 6
        return \preg_replace(
14758 6
            '/\p{Mn}+/u',
14759 6
            '',
14760 6
            $str
14761
        );
14762
    }
14763
14764
    /**
14765
     * @param int|string $input
14766
     *
14767
     * @psalm-pure
14768
     *
14769
     * @return string
14770
     *
14771
     * @noinspection ReturnTypeCanBeDeclaredInspection
14772
     * @noinspection SuspiciousBinaryOperationInspection
14773
     */
14774 32
    private static function to_utf8_convert_helper($input)
14775
    {
14776
        // init
14777 32
        $buf = '';
14778
14779 32
        if (self::$ORD === null) {
14780 1
            self::$ORD = self::getData('ord');
14781
        }
14782
14783 32
        if (self::$CHR === null) {
14784 1
            self::$CHR = self::getData('chr');
14785
        }
14786
14787 32
        if (self::$WIN1252_TO_UTF8 === null) {
14788 1
            self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
14789
        }
14790
14791 32
        $ordC1 = self::$ORD[$input];
14792 32
        if (isset(self::$WIN1252_TO_UTF8[$ordC1])) { // found in Windows-1252 special cases
14793 32
            $buf .= self::$WIN1252_TO_UTF8[$ordC1];
14794
        } else {
14795
            /** @noinspection OffsetOperationsInspection */
14796 1
            $cc1 = self::$CHR[$ordC1 / 64] | "\xC0";
14797 1
            $cc2 = ((string) $input & "\x3F") | "\x80";
0 ignored issues
show
Bug introduced by
Are you sure you want to use the bitwise | or did you mean ||?
Loading history...
Bug introduced by
Are you sure you want to use the bitwise & or did you mean &&?
Loading history...
14798 1
            $buf .= $cc1 . $cc2;
14799
        }
14800
14801 32
        return $buf;
14802
    }
14803
14804
    /**
14805
     * @param string $str
14806
     *
14807
     * @psalm-pure
14808
     *
14809
     * @return string
14810
     *
14811
     * @noinspection ReturnTypeCanBeDeclaredInspection
14812
     */
14813 10
    private static function urldecode_unicode_helper(string $str)
14814
    {
14815 10
        $pattern = '/%u([0-9a-fA-F]{3,4})/';
14816 10
        if (\preg_match($pattern, $str)) {
14817 7
            $str = (string) \preg_replace($pattern, '&#x\\1;', $str);
14818
        }
14819
14820 10
        return $str;
14821
    }
14822
}
14823