Passed
Push — master ( eb7992...d50de1 )
by Lars
03:20
created

UTF8::str_insert()   A

Complexity

Conditions 4
Paths 4

Size

Total Lines 28
Code Lines 14

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 14
CRAP Score 4.0047

Importance

Changes 0
Metric Value
cc 4
eloc 14
nc 4
nop 4
dl 0
loc 28
ccs 14
cts 15
cp 0.9333
crap 4.0047
rs 9.7998
c 0
b 0
f 0
1
<?php
2
3
declare(strict_types=1);
4
5
namespace voku\helper;
6
7
/**
8
 * @psalm-immutable
9
 */
10
final class UTF8
11
{
12
    /**
13
     * (CRLF|([ZWNJ-ZWJ]|T+|L*(LV?V+|LV|LVT)T*|L+|[^Control])[Extend]*|[Control])
14
     * This regular expression is a work around for http://bugs.exim.org/1279
15
     *
16
     * @deprecated <p>please don't use it anymore</p>
17
     */
18
    const GRAPHEME_CLUSTER_RX = "(?:\r\n|(?:[ -~\x{200C}\x{200D}]|[ᆨ-ᇹ]+|[ᄀ-ᅟ]*(?:[가개갸걔거게겨계고과괘괴교구궈궤귀규그긔기까깨꺄꺠꺼께껴꼐꼬꽈꽤꾀꾜꾸꿔꿰뀌뀨끄끠끼나내냐냬너네녀녜노놔놰뇌뇨누눠눼뉘뉴느늬니다대댜댸더데뎌뎨도돠돼되됴두둬뒈뒤듀드듸디따때땨떄떠떼뗘뗴또똬뙈뙤뚀뚜뚸뛔뛰뜌뜨띄띠라래랴럐러레려례로롸뢔뢰료루뤄뤠뤼류르릐리마매먀먜머메며몌모뫄뫠뫼묘무뭐뭬뮈뮤므믜미바배뱌뱨버베벼볘보봐봬뵈뵤부붜붸뷔뷰브븨비빠빼뺘뺴뻐뻬뼈뼤뽀뽜뽸뾔뾰뿌뿨쀄쀠쀼쁘쁴삐사새샤섀서세셔셰소솨쇄쇠쇼수숴쉐쉬슈스싀시싸쌔쌰썌써쎄쎠쎼쏘쏴쐐쐬쑈쑤쒀쒜쒸쓔쓰씌씨아애야얘어에여예오와왜외요우워웨위유으의이자재쟈쟤저제져졔조좌좨죄죠주줘줴쥐쥬즈즤지짜째쨔쨰쩌쩨쪄쪠쪼쫘쫴쬐쬬쭈쭤쮀쮜쮸쯔쯰찌차채챠챼처체쳐쳬초촤쵀최쵸추춰췌취츄츠츼치카캐캬컈커케켜켸코콰쾌쾨쿄쿠쿼퀘퀴큐크킈키타태탸턔터테텨톄토톼퇘퇴툐투퉈퉤튀튜트틔티파패퍄퍠퍼페펴폐포퐈퐤푀표푸풔풰퓌퓨프픠피하해햐햬허헤혀혜호화홰회효후훠훼휘휴흐희히]?[ᅠ-ᆢ]+|[가-힣])[ᆨ-ᇹ]*|[ᄀ-ᅟ]+|[^\p{Cc}\p{Cf}\p{Zl}\p{Zp}])[\p{Mn}\p{Me}\x{09BE}\x{09D7}\x{0B3E}\x{0B57}\x{0BBE}\x{0BD7}\x{0CC2}\x{0CD5}\x{0CD6}\x{0D3E}\x{0D57}\x{0DCF}\x{0DDF}\x{200C}\x{200D}\x{1D165}\x{1D16E}-\x{1D172}]*|[\p{Cc}\p{Cf}\p{Zl}\p{Zp}])";
19
20
    /**
21
     * Bom => Byte-Length
22
     *
23
     * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
24
     *
25
     * @var array<string, int>
26
     */
27
    private static $BOM = [
28
        "\xef\xbb\xbf"     => 3, // UTF-8 BOM
29
        ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
30
        "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
31
        '  þÿ'             => 6, // UTF-32 (BE) BOM as "WINDOWS-1252"
32
        "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
33
        'ÿþ  '             => 6, // UTF-32 (LE) BOM as "WINDOWS-1252"
34
        "\xfe\xff"         => 2, // UTF-16 (BE) BOM
35
        'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
36
        "\xff\xfe"         => 2, // UTF-16 (LE) BOM
37
        'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
38
    ];
39
40
    /**
41
     * Numeric code point => UTF-8 Character
42
     *
43
     * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
44
     *
45
     * @var array<int, string>
46
     */
47
    private static $WHITESPACE = [
48
        // NULL Byte
49
        0 => "\x0",
50
        // Tab
51
        9 => "\x9",
52
        // New Line
53
        10 => "\xa",
54
        // Vertical Tab
55
        11 => "\xb",
56
        // Carriage Return
57
        13 => "\xd",
58
        // Ordinary Space
59
        32 => "\x20",
60
        // NO-BREAK SPACE
61
        160 => "\xc2\xa0",
62
        // OGHAM SPACE MARK
63
        5760 => "\xe1\x9a\x80",
64
        // MONGOLIAN VOWEL SEPARATOR
65
        6158 => "\xe1\xa0\x8e",
66
        // EN QUAD
67
        8192 => "\xe2\x80\x80",
68
        // EM QUAD
69
        8193 => "\xe2\x80\x81",
70
        // EN SPACE
71
        8194 => "\xe2\x80\x82",
72
        // EM SPACE
73
        8195 => "\xe2\x80\x83",
74
        // THREE-PER-EM SPACE
75
        8196 => "\xe2\x80\x84",
76
        // FOUR-PER-EM SPACE
77
        8197 => "\xe2\x80\x85",
78
        // SIX-PER-EM SPACE
79
        8198 => "\xe2\x80\x86",
80
        // FIGURE SPACE
81
        8199 => "\xe2\x80\x87",
82
        // PUNCTUATION SPACE
83
        8200 => "\xe2\x80\x88",
84
        // THIN SPACE
85
        8201 => "\xe2\x80\x89",
86
        // HAIR SPACE
87
        8202 => "\xe2\x80\x8a",
88
        // LINE SEPARATOR
89
        8232 => "\xe2\x80\xa8",
90
        // PARAGRAPH SEPARATOR
91
        8233 => "\xe2\x80\xa9",
92
        // NARROW NO-BREAK SPACE
93
        8239 => "\xe2\x80\xaf",
94
        // MEDIUM MATHEMATICAL SPACE
95
        8287 => "\xe2\x81\x9f",
96
        // HALFWIDTH HANGUL FILLER
97
        65440 => "\xef\xbe\xa0",
98
        // IDEOGRAPHIC SPACE
99
        12288 => "\xe3\x80\x80",
100
    ];
101
102
    /**
103
     * @var array<string, string>
104
     */
105
    private static $WHITESPACE_TABLE = [
106
        'SPACE'                     => "\x20",
107
        'NO-BREAK SPACE'            => "\xc2\xa0",
108
        'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
109
        'EN QUAD'                   => "\xe2\x80\x80",
110
        'EM QUAD'                   => "\xe2\x80\x81",
111
        'EN SPACE'                  => "\xe2\x80\x82",
112
        'EM SPACE'                  => "\xe2\x80\x83",
113
        'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
114
        'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
115
        'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
116
        'FIGURE SPACE'              => "\xe2\x80\x87",
117
        'PUNCTUATION SPACE'         => "\xe2\x80\x88",
118
        'THIN SPACE'                => "\xe2\x80\x89",
119
        'HAIR SPACE'                => "\xe2\x80\x8a",
120
        'LINE SEPARATOR'            => "\xe2\x80\xa8",
121
        'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
122
        'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
123
        'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
124
        'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
125
        'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
126
        'HALFWIDTH HANGUL FILLER'   => "\xef\xbe\xa0",
127
    ];
128
129
    /**
130
     * @var array
131
     *
132
     * @psalm-var array{upper: string[], lower: string[]}
133
     */
134
    private static $COMMON_CASE_FOLD = [
135
        'upper' => [
136
            'µ',
137
            'ſ',
138
            "\xCD\x85",
139
            'ς',
140
            'ẞ',
141
            "\xCF\x90",
142
            "\xCF\x91",
143
            "\xCF\x95",
144
            "\xCF\x96",
145
            "\xCF\xB0",
146
            "\xCF\xB1",
147
            "\xCF\xB5",
148
            "\xE1\xBA\x9B",
149
            "\xE1\xBE\xBE",
150
        ],
151
        'lower' => [
152
            'μ',
153
            's',
154
            'ι',
155
            'σ',
156
            'ß',
157
            'β',
158
            'θ',
159
            'φ',
160
            'π',
161
            'κ',
162
            'ρ',
163
            'ε',
164
            "\xE1\xB9\xA1",
165
            'ι',
166
        ],
167
    ];
168
169
    /**
170
     * @var array
171
     *
172
     * @psalm-var array<string, mixed>
173
     */
174
    private static $SUPPORT = [];
175
176
    /**
177
     * @var string[]|null
178
     *
179
     * @psalm-var array<string, string>|null
180
     */
181
    private static $BROKEN_UTF8_FIX;
182
183
    /**
184
     * @var string[]|null
185
     *
186
     * @psalm-var array<int, string>|null
187
     */
188
    private static $WIN1252_TO_UTF8;
189
190
    /**
191
     * @var string[]|null
192
     *
193
     * @psalm-var array<int ,string>|null
194
     */
195
    private static $INTL_TRANSLITERATOR_LIST;
196
197
    /**
198
     * @var string[]|null
199
     *
200
     * @psalm-var array<string>|null
201
     */
202
    private static $ENCODINGS;
203
204
    /**
205
     * @var int[]|null
206
     *
207
     * @psalm-var array<string ,int>|null
208
     */
209
    private static $ORD;
210
211
    /**
212
     * @var string[]|null
213
     *
214
     * @psalm-var array<string, string>|null
215
     */
216
    private static $EMOJI;
217
218
    /**
219
     * @var string[]|null
220
     *
221
     * @psalm-var array<string>|null
222
     */
223
    private static $EMOJI_VALUES_CACHE;
224
225
    /**
226
     * @var string[]|null
227
     *
228
     * @psalm-var array<string>|null
229
     */
230
    private static $EMOJI_KEYS_CACHE;
231
232
    /**
233
     * @var string[]|null
234
     *
235
     * @psalm-var array<string>|null
236
     */
237
    private static $EMOJI_KEYS_REVERSIBLE_CACHE;
238
239
    /**
240
     * @var string[]|null
241
     *
242
     * @psalm-var array<int, string>|null
243
     */
244
    private static $CHR;
245
246
    /**
247
     * __construct()
248
     */
249 34
    public function __construct()
250
    {
251 34
    }
252
253
    /**
254
     * Return the character at the specified position: $str[1] like functionality.
255
     *
256
     * EXAMPLE: <code>UTF8::access('fòô', 1); // 'ò'</code>
257
     *
258
     * @param string $str      <p>A UTF-8 string.</p>
259
     * @param int    $pos      <p>The position of character to return.</p>
260
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
261
     *
262
     * @psalm-pure
263
     *
264
     * @return string
265
     *                <p>Single multi-byte character.</p>
266
     */
267 3
    public static function access(string $str, int $pos, string $encoding = 'UTF-8'): string
268
    {
269 3
        if ($str === '' || $pos < 0) {
270 2
            return '';
271
        }
272
273 3
        if ($encoding === 'UTF-8') {
274 3
            return (string) \mb_substr($str, $pos, 1);
275
        }
276
277
        return (string) self::substr($str, $pos, 1, $encoding);
278
    }
279
280
    /**
281
     * Prepends UTF-8 BOM character to the string and returns the whole string.
282
     *
283
     * INFO: If BOM already existed there, the Input string is returned.
284
     *
285
     * EXAMPLE: <code>UTF8::add_bom_to_string('fòô'); // "\xEF\xBB\xBF" . 'fòô'</code>
286
     *
287
     * @param string $str <p>The input string.</p>
288
     *
289
     * @psalm-pure
290
     *
291
     * @return string
292
     *                <p>The output string that contains BOM.</p>
293
     */
294 2
    public static function add_bom_to_string(string $str): string
295
    {
296 2
        if (!self::string_has_bom($str)) {
297 2
            $str = self::bom() . $str;
298
        }
299
300 2
        return $str;
301
    }
302
303
    /**
304
     * Changes all keys in an array.
305
     *
306
     * @param array<string, mixed> $array    <p>The array to work on</p>
307
     * @param int                  $case     [optional] <p> Either <strong>CASE_UPPER</strong><br>
308
     *                                       or <strong>CASE_LOWER</strong> (default)</p>
309
     * @param string               $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
310
     *
311
     * @psalm-pure
312
     *
313
     * @return string[]
314
     *                  <p>An array with its keys lower- or uppercased.</p>
315
     */
316 2
    public static function array_change_key_case(
317
        array $array,
318
        int $case = \CASE_LOWER,
319
        string $encoding = 'UTF-8'
320
    ): array {
321
        if (
322 2
            $case !== \CASE_LOWER
323
            &&
324 2
            $case !== \CASE_UPPER
325
        ) {
326
            $case = \CASE_LOWER;
327
        }
328
329 2
        $return = [];
330 2
        foreach ($array as $key => &$value) {
331 2
            $key = $case === \CASE_LOWER
332 2
                ? self::strtolower((string) $key, $encoding)
333 2
                : self::strtoupper((string) $key, $encoding);
334
335 2
            $return[$key] = $value;
336
        }
337
338 2
        return $return;
339
    }
340
341
    /**
342
     * Returns the substring between $start and $end, if found, or an empty
343
     * string. An optional offset may be supplied from which to begin the
344
     * search for the start string.
345
     *
346
     * @param string $str
347
     * @param string $start    <p>Delimiter marking the start of the substring.</p>
348
     * @param string $end      <p>Delimiter marking the end of the substring.</p>
349
     * @param int    $offset   [optional] <p>Index from which to begin the search. Default: 0</p>
350
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
351
     *
352
     * @psalm-pure
353
     *
354
     * @return string
355
     */
356 16
    public static function between(
357
        string $str,
358
        string $start,
359
        string $end,
360
        int $offset = 0,
361
        string $encoding = 'UTF-8'
362
    ): string {
363 16
        if ($encoding === 'UTF-8') {
364 8
            $start_position = \mb_strpos($str, $start, $offset);
365 8
            if ($start_position === false) {
366 1
                return '';
367
            }
368
369 7
            $substr_index = $start_position + (int) \mb_strlen($start);
370 7
            $end_position = \mb_strpos($str, $end, $substr_index);
371
            if (
372 7
                $end_position === false
373
                ||
374 7
                $end_position === $substr_index
375
            ) {
376 2
                return '';
377
            }
378
379 5
            return (string) \mb_substr($str, $substr_index, $end_position - $substr_index);
380
        }
381
382 8
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
383
384 8
        $start_position = self::strpos($str, $start, $offset, $encoding);
385 8
        if ($start_position === false) {
386 1
            return '';
387
        }
388
389 7
        $substr_index = $start_position + (int) self::strlen($start, $encoding);
390 7
        $end_position = self::strpos($str, $end, $substr_index, $encoding);
391
        if (
392 7
            $end_position === false
393
            ||
394 7
            $end_position === $substr_index
395
        ) {
396 2
            return '';
397
        }
398
399 5
        return (string) self::substr(
400 5
            $str,
401 5
            $substr_index,
402 5
            $end_position - $substr_index,
403 5
            $encoding
404
        );
405
    }
406
407
    /**
408
     * Convert binary into a string.
409
     *
410
     * INFO: opposite to UTF8::str_to_binary()
411
     *
412
     * EXAMPLE: <code>UTF8::binary_to_str('11110000100111111001100010000011'); // '😃'</code>
413
     *
414
     * @param string $bin 1|0
415
     *
416
     * @psalm-pure
417
     *
418
     * @return string
419
     */
420 2
    public static function binary_to_str($bin): string
421
    {
422 2
        if (!isset($bin[0])) {
423
            return '';
424
        }
425
426 2
        $convert = \base_convert($bin, 2, 16);
427 2
        if ($convert === '0') {
428 1
            return '';
429
        }
430
431 2
        return \pack('H*', $convert);
432
    }
433
434
    /**
435
     * Returns the UTF-8 Byte Order Mark Character.
436
     *
437
     * INFO: take a look at UTF8::$bom for e.g. UTF-16 and UTF-32 BOM values
438
     *
439
     * EXAMPLE: <code>UTF8::bom(); // "\xEF\xBB\xBF"</code>
440
     *
441
     * @psalm-pure
442
     *
443
     * @return string
444
     *                <p>UTF-8 Byte Order Mark.</p>
445
     */
446 4
    public static function bom(): string
447
    {
448 4
        return "\xef\xbb\xbf";
449
    }
450
451
    /**
452
     * @alias of UTF8::chr_map()
453
     *
454
     * @param callable $callback
455
     * @param string   $str
456
     *
457
     * @psalm-pure
458
     *
459
     * @return string[]
460
     *
461
     * @see   UTF8::chr_map()
462
     */
463 2
    public static function callback($callback, string $str): array
464
    {
465 2
        return self::chr_map($callback, $str);
466
    }
467
468
    /**
469
     * Returns the character at $index, with indexes starting at 0.
470
     *
471
     * @param string $str      <p>The input string.</p>
472
     * @param int    $index    <p>Position of the character.</p>
473
     * @param string $encoding [optional] <p>Default is UTF-8</p>
474
     *
475
     * @psalm-pure
476
     *
477
     * @return string
478
     *                <p>The character at $index.</p>
479
     */
480 9
    public static function char_at(string $str, int $index, string $encoding = 'UTF-8'): string
481
    {
482 9
        if ($encoding === 'UTF-8') {
483 5
            return (string) \mb_substr($str, $index, 1);
484
        }
485
486 4
        return (string) self::substr($str, $index, 1, $encoding);
487
    }
488
489
    /**
490
     * Returns an array consisting of the characters in the string.
491
     *
492
     * @param string $str <p>The input string.</p>
493
     *
494
     * @psalm-pure
495
     *
496
     * @return string[]
497
     *                  <p>An array of chars.</p>
498
     */
499 3
    public static function chars(string $str): array
500
    {
501
        /** @var string[] */
502 3
        return self::str_split($str);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::str_split($str) returns an array which contains values of type array|string[] which are incompatible with the documented value type string.
Loading history...
503
    }
504
505
    /**
506
     * This method will auto-detect your server environment for UTF-8 support.
507
     *
508
     * @return true|null
509
     *
510
     * @internal <p>You don't need to run it manually, it will be triggered if it's needed.</p>
511
     */
512 5
    public static function checkForSupport()
513
    {
514 5
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
515
            self::$SUPPORT['already_checked_via_portable_utf8'] = true;
516
517
            // http://php.net/manual/en/book.mbstring.php
518
            self::$SUPPORT['mbstring'] = self::mbstring_loaded();
519
            self::$SUPPORT['mbstring_func_overload'] = self::mbstring_overloaded();
520
            if (self::$SUPPORT['mbstring'] === true) {
521
                \mb_internal_encoding('UTF-8');
522
                /** @noinspection UnusedFunctionResultInspection */
523
                /** @noinspection PhpComposerExtensionStubsInspection */
524
                \mb_regex_encoding('UTF-8');
525
                self::$SUPPORT['mbstring_internal_encoding'] = 'UTF-8';
526
            }
527
528
            // http://php.net/manual/en/book.iconv.php
529
            self::$SUPPORT['iconv'] = self::iconv_loaded();
530
531
            // http://php.net/manual/en/book.intl.php
532
            self::$SUPPORT['intl'] = self::intl_loaded();
533
534
            // http://php.net/manual/en/class.intlchar.php
535
            self::$SUPPORT['intlChar'] = self::intlChar_loaded();
536
537
            // http://php.net/manual/en/book.ctype.php
538
            self::$SUPPORT['ctype'] = self::ctype_loaded();
539
540
            // http://php.net/manual/en/class.finfo.php
541
            self::$SUPPORT['finfo'] = self::finfo_loaded();
542
543
            // http://php.net/manual/en/book.json.php
544
            self::$SUPPORT['json'] = self::json_loaded();
545
546
            // http://php.net/manual/en/book.pcre.php
547
            self::$SUPPORT['pcre_utf8'] = self::pcre_utf8_support();
548
549
            self::$SUPPORT['symfony_polyfill_used'] = self::symfony_polyfill_used();
550
            if (self::$SUPPORT['symfony_polyfill_used'] === true) {
551
                \mb_internal_encoding('UTF-8');
552
                self::$SUPPORT['mbstring_internal_encoding'] = 'UTF-8';
553
            }
554
555
            return true;
556
        }
557
558 5
        return null;
559
    }
560
561
    /**
562
     * Generates a UTF-8 encoded character from the given code point.
563
     *
564
     * INFO: opposite to UTF8::ord()
565
     *
566
     * EXAMPLE: <code>UTF8::chr(0x2603); // '☃'</code>
567
     *
568
     * @param int    $code_point <p>The code point for which to generate a character.</p>
569
     * @param string $encoding   [optional] <p>Default is UTF-8</p>
570
     *
571
     * @psalm-pure
572
     *
573
     * @return string|null
574
     *                     <p>Multi-byte character, returns null on failure or empty input.</p>
575
     */
576 21
    public static function chr($code_point, string $encoding = 'UTF-8')
577
    {
578
        // init
579
        /**
580
         * @psalm-suppress ImpureStaticVariable
581
         *
582
         * @var array<string,string>
583
         */
584 21
        static $CHAR_CACHE = [];
585
586 21
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
587 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
588
        }
589
590
        /** @noinspection InArrayCanBeUsedInspection */
591
        if (
592 21
            $encoding !== 'UTF-8'
593
            &&
594 21
            $encoding !== 'ISO-8859-1'
595
            &&
596 21
            $encoding !== 'WINDOWS-1252'
597
            &&
598 21
            self::$SUPPORT['mbstring'] === false
599
        ) {
600
            /**
601
             * @psalm-suppress ImpureFunctionCall - is is only a warning
602
             */
603
            \trigger_error('UTF8::chr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
604
        }
605
606 21
        if ($code_point <= 0) {
607 5
            return null;
608
        }
609
610 21
        $cache_key = $code_point . '_' . $encoding;
611 21
        if (isset($CHAR_CACHE[$cache_key])) {
612 19
            return $CHAR_CACHE[$cache_key];
613
        }
614
615 10
        if ($code_point <= 0x80) { // only for "simple"-chars
616
617 9
            if (self::$CHR === null) {
618
                self::$CHR = self::getData('chr');
619
            }
620
621
            /**
622
             * @psalm-suppress PossiblyNullArrayAccess
623
             */
624 9
            $chr = self::$CHR[$code_point];
625
626 9
            if ($encoding !== 'UTF-8') {
627 1
                $chr = self::encode($encoding, $chr);
628
            }
629
630 9
            return $CHAR_CACHE[$cache_key] = $chr;
631
        }
632
633
        //
634
        // fallback via "IntlChar"
635
        //
636
637 6
        if (self::$SUPPORT['intlChar'] === true) {
638
            /** @noinspection PhpComposerExtensionStubsInspection */
639 6
            $chr = \IntlChar::chr($code_point);
640
641 6
            if ($encoding !== 'UTF-8') {
642
                $chr = self::encode($encoding, $chr);
643
            }
644
645 6
            return $CHAR_CACHE[$cache_key] = $chr;
646
        }
647
648
        //
649
        // fallback via vanilla php
650
        //
651
652
        if (self::$CHR === null) {
653
            self::$CHR = self::getData('chr');
654
        }
655
656
        $code_point = (int) $code_point;
657
        if ($code_point <= 0x7FF) {
658
            /**
659
             * @psalm-suppress PossiblyNullArrayAccess
660
             */
661
            $chr = self::$CHR[($code_point >> 6) + 0xC0] .
662
                   self::$CHR[($code_point & 0x3F) + 0x80];
663
        } elseif ($code_point <= 0xFFFF) {
664
            /**
665
             * @psalm-suppress PossiblyNullArrayAccess
666
             */
667
            $chr = self::$CHR[($code_point >> 12) + 0xE0] .
668
                   self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
669
                   self::$CHR[($code_point & 0x3F) + 0x80];
670
        } else {
671
            /**
672
             * @psalm-suppress PossiblyNullArrayAccess
673
             */
674
            $chr = self::$CHR[($code_point >> 18) + 0xF0] .
675
                   self::$CHR[(($code_point >> 12) & 0x3F) + 0x80] .
676
                   self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
677
                   self::$CHR[($code_point & 0x3F) + 0x80];
678
        }
679
680
        if ($encoding !== 'UTF-8') {
681
            $chr = self::encode($encoding, $chr);
682
        }
683
684
        return $CHAR_CACHE[$cache_key] = $chr;
685
    }
686
687
    /**
688
     * Applies callback to all characters of a string.
689
     *
690
     * EXAMPLE: <code>UTF8::chr_map([UTF8::class, 'strtolower'], 'Κόσμε'); // ['κ','ό', 'σ', 'μ', 'ε']</code>
691
     *
692
     * @param callable $callback <p>The callback function.</p>
693
     * @param string   $str      <p>UTF-8 string to run callback on.</p>
694
     *
695
     * @psalm-pure
696
     *
697
     * @return string[]
698
     *                  <p>The outcome of the callback, as array.</p>
699
     */
700 2
    public static function chr_map($callback, string $str): array
701
    {
702 2
        return \array_map(
703 2
            $callback,
704 2
            self::str_split($str)
705
        );
706
    }
707
708
    /**
709
     * Generates an array of byte length of each character of a Unicode string.
710
     *
711
     * 1 byte => U+0000  - U+007F
712
     * 2 byte => U+0080  - U+07FF
713
     * 3 byte => U+0800  - U+FFFF
714
     * 4 byte => U+10000 - U+10FFFF
715
     *
716
     * EXAMPLE: <code>UTF8::chr_size_list('中文空白-test'); // [3, 3, 3, 3, 1, 1, 1, 1, 1]</code>
717
     *
718
     * @param string $str <p>The original unicode string.</p>
719
     *
720
     * @psalm-pure
721
     *
722
     * @return int[]
723
     *               <p>An array of byte lengths of each character.</p>
724
     */
725 4
    public static function chr_size_list(string $str): array
726
    {
727 4
        if ($str === '') {
728 4
            return [];
729
        }
730
731 4
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
732
            return \array_map(
733
                static function (string $data): int {
734
                    // "mb_" is available if overload is used, so use it ...
735
                    return \mb_strlen($data, 'CP850'); // 8-BIT
736
                },
737
                self::str_split($str)
738
            );
739
        }
740
741 4
        return \array_map('\strlen', self::str_split($str));
742
    }
743
744
    /**
745
     * Get a decimal code representation of a specific character.
746
     *
747
     * INFO: opposite to UTF8::decimal_to_chr()
748
     *
749
     * EXAMPLE: <code>UTF8::chr_to_decimal('§'); // 0xa7</code>
750
     *
751
     * @param string $char <p>The input character.</p>
752
     *
753
     * @psalm-pure
754
     *
755
     * @return int
756
     */
757 5
    public static function chr_to_decimal(string $char): int
758
    {
759 5
        if (self::$SUPPORT['iconv'] === true) {
760 5
            $chr_tmp = \iconv('UTF-8', 'UCS-4LE', $char);
761 5
            if ($chr_tmp !== false) {
762
                /** @noinspection OffsetOperationsInspection */
763 5
                return \unpack('V', $chr_tmp)[1];
764
            }
765
        }
766
767
        $code = self::ord($char[0]);
768
        $bytes = 1;
769
770
        if (!($code & 0x80)) {
771
            // 0xxxxxxx
772
            return $code;
773
        }
774
775
        if (($code & 0xe0) === 0xc0) {
776
            // 110xxxxx
777
            $bytes = 2;
778
            $code &= ~0xc0;
779
        } elseif (($code & 0xf0) === 0xe0) {
780
            // 1110xxxx
781
            $bytes = 3;
782
            $code &= ~0xe0;
783
        } elseif (($code & 0xf8) === 0xf0) {
784
            // 11110xxx
785
            $bytes = 4;
786
            $code &= ~0xf0;
787
        }
788
789
        for ($i = 2; $i <= $bytes; ++$i) {
790
            // 10xxxxxx
791
            $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
792
        }
793
794
        return $code;
795
    }
796
797
    /**
798
     * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
799
     *
800
     * EXAMPLE: <code>UTF8::chr_to_hex('§'); // U+00a7</code>
801
     *
802
     * @param int|string $char   <p>The input character</p>
803
     * @param string     $prefix [optional]
804
     *
805
     * @psalm-pure
806
     *
807
     * @return string
808
     *                <p>The code point encoded as U+xxxx.</p>
809
     */
810 2
    public static function chr_to_hex($char, string $prefix = 'U+'): string
811
    {
812 2
        if ($char === '') {
813 2
            return '';
814
        }
815
816 2
        if ($char === '&#0;') {
817 2
            $char = '';
818
        }
819
820 2
        return self::int_to_hex(self::ord((string) $char), $prefix);
821
    }
822
823
    /**
824
     * alias for "UTF8::chr_to_decimal()"
825
     *
826
     * @param string $chr
827
     *
828
     * @psalm-pure
829
     *
830
     * @return int
831
     *
832
     * @see        UTF8::chr_to_decimal()
833
     * @deprecated <p>please use "UTF8::chr_to_decimal()"</p>
834
     */
835 2
    public static function chr_to_int(string $chr): int
836
    {
837 2
        return self::chr_to_decimal($chr);
838
    }
839
840
    /**
841
     * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
842
     *
843
     * EXAMPLE: <code>UTF8::chunk_split('ABC-ÖÄÜ-中文空白-κόσμε', 3); // "ABC\r\n-ÖÄ\r\nÜ-中\r\n文空白\r\n-κό\r\nσμε"</code>
844
     *
845
     * @param string $body         <p>The original string to be split.</p>
846
     * @param int    $chunk_length [optional] <p>The maximum character length of a chunk.</p>
847
     * @param string $end          [optional] <p>The character(s) to be inserted at the end of each chunk.</p>
848
     *
849
     * @psalm-pure
850
     *
851
     * @return string
852
     *                <p>The chunked string.</p>
853
     */
854 4
    public static function chunk_split(string $body, int $chunk_length = 76, string $end = "\r\n"): string
855
    {
856 4
        return \implode($end, self::str_split($body, $chunk_length));
857
    }
858
859
    /**
860
     * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
861
     *
862
     * EXAMPLE: <code>UTF8::clean("\xEF\xBB\xBF„Abcdef\xc2\xa0\x20…” — 😃 - Düsseldorf", true, true); // '„Abcdef  …” — 😃 - Düsseldorf'</code>
863
     *
864
     * @param string $str                                     <p>The string to be sanitized.</p>
865
     * @param bool   $remove_bom                              [optional] <p>Set to true, if you need to remove
866
     *                                                        UTF-BOM.</p>
867
     * @param bool   $normalize_whitespace                    [optional] <p>Set to true, if you need to normalize the
868
     *                                                        whitespace.</p>
869
     * @param bool   $normalize_msword                        [optional] <p>Set to true, if you need to normalize MS
870
     *                                                        Word chars e.g.: "…"
871
     *                                                        => "..."</p>
872
     * @param bool   $keep_non_breaking_space                 [optional] <p>Set to true, to keep non-breaking-spaces,
873
     *                                                        in
874
     *                                                        combination with
875
     *                                                        $normalize_whitespace</p>
876
     * @param bool   $replace_diamond_question_mark           [optional] <p>Set to true, if you need to remove diamond
877
     *                                                        question mark e.g.: "�"</p>
878
     * @param bool   $remove_invisible_characters             [optional] <p>Set to false, if you not want to remove
879
     *                                                        invisible characters e.g.: "\0"</p>
880
     * @param bool   $remove_invisible_characters_url_encoded [optional] <p>Set to true, if you not want to remove
881
     *                                                        invisible url encoded characters e.g.: "%0B"<br> WARNING:
882
     *                                                        maybe contains false-positives e.g. aa%0Baa -> aaaa.
883
     *                                                        </p>
884
     *
885
     * @psalm-pure
886
     *
887
     * @return string
888
     *                <p>An clean UTF-8 encoded string.</p>
889
     *
890
     * @noinspection PhpTooManyParametersInspection
891
     */
892 89
    public static function clean(
893
        string $str,
894
        bool $remove_bom = false,
895
        bool $normalize_whitespace = false,
896
        bool $normalize_msword = false,
897
        bool $keep_non_breaking_space = false,
898
        bool $replace_diamond_question_mark = false,
899
        bool $remove_invisible_characters = true,
900
        bool $remove_invisible_characters_url_encoded = false
901
    ): string {
902
        // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
903
        // caused connection reset problem on larger strings
904
905 89
        $regex = '/
906
          (
907
            (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
908
            |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
909
            |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
910
            |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
911
            ){1,100}                      # ...one or more times
912
          )
913
        | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
914
        | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
915
        /x';
916
        /** @noinspection NotOptimalRegularExpressionsInspection */
917 89
        $str = (string) \preg_replace($regex, '$1', $str);
918
919 89
        if ($replace_diamond_question_mark) {
920 33
            $str = self::replace_diamond_question_mark($str);
921
        }
922
923 89
        if ($remove_invisible_characters) {
924 89
            $str = self::remove_invisible_characters($str, $remove_invisible_characters_url_encoded);
925
        }
926
927 89
        if ($normalize_whitespace) {
928 37
            $str = self::normalize_whitespace($str, $keep_non_breaking_space);
929
        }
930
931 89
        if ($normalize_msword) {
932 4
            $str = self::normalize_msword($str);
933
        }
934
935 89
        if ($remove_bom) {
936 37
            $str = self::remove_bom($str);
937
        }
938
939 89
        return $str;
940
    }
941
942
    /**
943
     * Clean-up a string and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
944
     *
945
     * EXAMPLE: <code>UTF8::cleanup("\xEF\xBB\xBF„Abcdef\xc2\xa0\x20…” — 😃 - Düsseldorf", true, true); // '„Abcdef  …” — 😃 - Düsseldorf'</code>
946
     *
947
     * @param string $str <p>The input string.</p>
948
     *
949
     * @psalm-pure
950
     *
951
     * @return string
952
     */
953 33
    public static function cleanup($str): string
954
    {
955
        // init
956 33
        $str = (string) $str;
957
958 33
        if ($str === '') {
959 5
            return '';
960
        }
961
962
        // fixed ISO <-> UTF-8 Errors
963 33
        $str = self::fix_simple_utf8($str);
964
965
        // remove all none UTF-8 symbols
966
        // && remove diamond question mark (�)
967
        // && remove remove invisible characters (e.g. "\0")
968
        // && remove BOM
969
        // && normalize whitespace chars (but keep non-breaking-spaces)
970 33
        return self::clean(
971 33
            $str,
972 33
            true,
973 33
            true,
974 33
            false,
975 33
            true,
976 33
            true
977
        );
978
    }
979
980
    /**
981
     * Accepts a string or a array of strings and returns an array of Unicode code points.
982
     *
983
     * INFO: opposite to UTF8::string()
984
     *
985
     * EXAMPLE: <code>
986
     * UTF8::codepoints('κöñ'); // array(954, 246, 241)
987
     * // ... OR ...
988
     * UTF8::codepoints('κöñ', true); // array('U+03ba', 'U+00f6', 'U+00f1')
989
     * </code>
990
     *
991
     * @param string|string[] $arg         <p>A UTF-8 encoded string or an array of such strings.</p>
992
     * @param bool            $use_u_style <p>If True, will return code points in U+xxxx format,
993
     *                                     default, code points will be returned as integers.</p>
994
     *
995
     * @psalm-pure
996
     *
997
     * @return int[]|string[]
998
     *                        <p>
999
     *                        The array of code points:<br>
1000
     *                        int[] for $u_style === false<br>
1001
     *                        string[] for $u_style === true<br>
1002
     *                        </p>
1003
     */
1004 12
    public static function codepoints($arg, bool $use_u_style = false): array
1005
    {
1006 12
        if (\is_string($arg)) {
1007 12
            $arg = self::str_split($arg);
1008
        }
1009
1010
        /**
1011
         * @psalm-suppress DocblockTypeContradiction
1012
         */
1013 12
        if (!\is_array($arg)) {
0 ignored issues
show
introduced by
The condition is_array($arg) is always true.
Loading history...
1014 4
            return [];
1015
        }
1016
1017 12
        if ($arg === []) {
1018 7
            return [];
1019
        }
1020
1021 11
        $arg = \array_map(
1022
            [
1023 11
                self::class,
1024
                'ord',
1025
            ],
1026 11
            $arg
1027
        );
1028
1029 11
        if ($use_u_style) {
1030 2
            $arg = \array_map(
1031
                [
1032 2
                    self::class,
1033
                    'int_to_hex',
1034
                ],
1035 2
                $arg
1036
            );
1037
        }
1038
1039 11
        return $arg;
1040
    }
1041
1042
    /**
1043
     * Trims the string and replaces consecutive whitespace characters with a
1044
     * single space. This includes tabs and newline characters, as well as
1045
     * multibyte whitespace such as the thin space and ideographic space.
1046
     *
1047
     * @param string $str <p>The input string.</p>
1048
     *
1049
     * @psalm-pure
1050
     *
1051
     * @return string
1052
     *                <p>A string with trimmed $str and condensed whitespace.</p>
1053
     */
1054 13
    public static function collapse_whitespace(string $str): string
1055
    {
1056 13
        if (self::$SUPPORT['mbstring'] === true) {
1057
            /** @noinspection PhpComposerExtensionStubsInspection */
1058 13
            return \trim((string) \mb_ereg_replace('[[:space:]]+', ' ', $str));
1059
        }
1060
1061
        return \trim(self::regex_replace($str, '[[:space:]]+', ' '));
1062
    }
1063
1064
    /**
1065
     * Returns count of characters used in a string.
1066
     *
1067
     * EXAMPLE: <code>UTF8::count_chars('κaκbκc'); // array('κ' => 3, 'a' => 1, 'b' => 1, 'c' => 1)</code>
1068
     *
1069
     * @param string $str                     <p>The input string.</p>
1070
     * @param bool   $clean_utf8              [optional] <p>Remove non UTF-8 chars from the string.</p>
1071
     * @param bool   $try_to_use_mb_functions [optional] <p>Set to false, if you don't want to use
1072
     *
1073
     * @psalm-pure
1074
     *
1075
     * @return int[]
1076
     *               <p>An associative array of Character as keys and
1077
     *               their count as values.</p>
1078
     */
1079 19
    public static function count_chars(
1080
        string $str,
1081
        bool $clean_utf8 = false,
1082
        bool $try_to_use_mb_functions = true
1083
    ): array {
1084 19
        return \array_count_values(
1085 19
            self::str_split(
1086 19
                $str,
1087 19
                1,
1088 19
                $clean_utf8,
1089 19
                $try_to_use_mb_functions
1090
            )
1091
        );
1092
    }
1093
1094
    /**
1095
     * Create a valid CSS identifier for e.g. "class"- or "id"-attributes.
1096
     *
1097
     * EXAMPLE: <code>UTF8::css_identifier('123foo/bar!!!'); // _23foo-bar</code>
1098
     *
1099
     * copy&past from https://github.com/drupal/core/blob/8.8.x/lib/Drupal/Component/Utility/Html.php#L95
1100
     *
1101
     * @param string   $str        <p>INFO: if no identifier is given e.g. " " or "", we will create a unique string automatically</p>
1102
     * @param string[] $filter
1103
     * @param bool     $strip_tags
1104
     * @param bool     $strtolower
1105
     *
1106
     * @psalm-pure
1107
     *
1108
     * @return string
1109
     *
1110
     * @psalm-param array<string,string> $filter
1111
     */
1112 1
    public static function css_identifier(
1113
        string $str = '',
1114
        array $filter = [
1115
            ' ' => '-',
1116
            '/' => '-',
1117
            '[' => '',
1118
            ']' => '',
1119
        ],
1120
        bool $strip_tags = false,
1121
        bool $strtolower = true
1122
    ): string {
1123
        // We could also use strtr() here but its much slower than str_replace(). In
1124
        // order to keep '__' to stay '__' we first replace it with a different
1125
        // placeholder after checking that it is not defined as a filter.
1126 1
        $double_underscore_replacements = 0;
1127
1128
        // Fallback ...
1129 1
        if (\trim($str) === '') {
1130 1
            $str = \uniqid('auto-generated-css-class', true);
1131
        } else {
1132 1
            $str = self::clean($str);
1133
        }
1134
1135 1
        if ($strip_tags) {
1136
            $str = \strip_tags($str);
1137
        }
1138
1139 1
        if ($strtolower) {
1140 1
            $str = \strtolower($str);
1141
        }
1142
1143 1
        if (!isset($filter['__'])) {
1144 1
            $str = \str_replace('__', '##', $str, $double_underscore_replacements);
1145
        }
1146
1147
        /* @noinspection ArrayValuesMissUseInspection */
1148 1
        $str = \str_replace(\array_keys($filter), \array_values($filter), $str);
1149
        // Replace temporary placeholder '##' with '__' only if the original
1150
        // $identifier contained '__'.
1151 1
        if ($double_underscore_replacements > 0) {
1152
            $str = \str_replace('##', '__', $str);
1153
        }
1154
1155
        // Valid characters in a CSS identifier are:
1156
        // - the hyphen (U+002D)
1157
        // - a-z (U+0030 - U+0039)
1158
        // - A-Z (U+0041 - U+005A)
1159
        // - the underscore (U+005F)
1160
        // - 0-9 (U+0061 - U+007A)
1161
        // - ISO 10646 characters U+00A1 and higher
1162
        // We strip out any character not in the above list.
1163 1
        $str = (string) \preg_replace('/[^\x{002D}\x{0030}-\x{0039}\x{0041}-\x{005A}\x{005F}\x{0061}-\x{007A}\x{00A1}-\x{FFFF}]/u', '', $str);
1164
        // Identifiers cannot start with a digit, two hyphens, or a hyphen followed by a digit.
1165 1
        $str = (string) \preg_replace(['/^[0-9]/', '/^(-[0-9])|^(--)/'], ['_', '__'], $str);
1166
1167 1
        return \trim($str, '-');
1168
    }
1169
1170
    /**
1171
     * Remove css media-queries.
1172
     *
1173
     * @param string $str
1174
     *
1175
     * @psalm-pure
1176
     *
1177
     * @return string
1178
     */
1179 1
    public static function css_stripe_media_queries(string $str): string
1180
    {
1181 1
        return (string) \preg_replace(
1182 1
            '#@media\\s+(?:only\\s)?(?:[\\s{(]|screen|all)\\s?[^{]+{.*}\\s*}\\s*#isumU',
1183 1
            '',
1184 1
            $str
1185
        );
1186
    }
1187
1188
    /**
1189
     * Checks whether ctype is available on the server.
1190
     *
1191
     * @psalm-pure
1192
     *
1193
     * @return bool
1194
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
1195
     */
1196
    public static function ctype_loaded(): bool
1197
    {
1198
        return \extension_loaded('ctype');
1199
    }
1200
1201
    /**
1202
     * Converts an int value into a UTF-8 character.
1203
     *
1204
     * INFO: opposite to UTF8::string()
1205
     *
1206
     * EXAMPLE: <code>UTF8::decimal_to_chr(931); // 'Σ'</code>
1207
     *
1208
     * @param int|string $int
1209
     *
1210
     * @psalm-param int|numeric-string $int
1211
     *
1212
     * @psalm-pure
1213
     *
1214
     * @return string
1215
     */
1216 20
    public static function decimal_to_chr($int): string
1217
    {
1218 20
        return self::html_entity_decode('&#' . $int . ';', \ENT_QUOTES | \ENT_HTML5);
1219
    }
1220
1221
    /**
1222
     * Decodes a MIME header field
1223
     *
1224
     * @param string $str
1225
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
1226
     *
1227
     * @psalm-pure
1228
     *
1229
     * @return false|string
1230
     *                      <p>A decoded MIME field on success,
1231
     *                      or false if an error occurs during the decoding.</p>
1232
     */
1233 2
    public static function decode_mimeheader($str, string $encoding = 'UTF-8')
1234
    {
1235 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
1236 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
1237
        }
1238
1239
        // always fallback via symfony polyfill
1240 2
        return \iconv_mime_decode($str, \ICONV_MIME_DECODE_CONTINUE_ON_ERROR, $encoding);
1241
    }
1242
1243
    /**
1244
     * Convert any two-letter country code (ISO 3166-1) to the corresponding Emoji.
1245
     *
1246
     * @see https://en.wikipedia.org/wiki/ISO_3166-1
1247
     *
1248
     * @param string $country_code_iso_3166_1 <p>e.g. DE</p>
1249
     *
1250
     * @return string
1251
     *                <p>Emoji or empty string on error.</p>
1252
     */
1253 1
    public static function emoji_from_country_code(string $country_code_iso_3166_1): string
1254
    {
1255 1
        if ($country_code_iso_3166_1 === '') {
1256 1
            return '';
1257
        }
1258
1259 1
        if (self::strlen($country_code_iso_3166_1) !== 2) {
1260 1
            return '';
1261
        }
1262
1263 1
        $country_code_iso_3166_1 = \strtoupper($country_code_iso_3166_1);
1264
1265 1
        $flagOffset = 0x1F1E6;
1266 1
        $asciiOffset = 0x41;
1267
1268 1
        return (self::chr((self::ord($country_code_iso_3166_1[0]) - $asciiOffset + $flagOffset)) ?? '') .
1269 1
               (self::chr((self::ord($country_code_iso_3166_1[1]) - $asciiOffset + $flagOffset)) ?? '');
1270
    }
1271
1272
    /**
1273
     * Decodes a string which was encoded by "UTF8::emoji_encode()".
1274
     *
1275
     * INFO: opposite to UTF8::emoji_encode()
1276
     *
1277
     * EXAMPLE: <code>
1278
     * UTF8::emoji_decode('foo CHARACTER_OGRE', false); // 'foo 👹'
1279
     * //
1280
     * UTF8::emoji_decode('foo _-_PORTABLE_UTF8_-_308095726_-_627590803_-_8FTU_ELBATROP_-_', true); // 'foo 👹'
1281
     * </code>
1282
     *
1283
     * @param string $str                            <p>The input string.</p>
1284
     * @param bool   $use_reversible_string_mappings [optional] <p>
1285
     *                                               When <b>TRUE</b>, we se a reversible string mapping
1286
     *                                               between "emoji_encode" and "emoji_decode".</p>
1287
     *
1288
     * @psalm-pure
1289
     *
1290
     * @return string
1291
     */
1292 9
    public static function emoji_decode(
1293
        string $str,
1294
        bool $use_reversible_string_mappings = false
1295
    ): string {
1296 9
        self::initEmojiData();
1297
1298 9
        if ($use_reversible_string_mappings) {
1299 9
            return (string) \str_replace(
1300 9
                (array) self::$EMOJI_KEYS_REVERSIBLE_CACHE,
1301 9
                (array) self::$EMOJI_VALUES_CACHE,
1302 9
                $str
1303
            );
1304
        }
1305
1306 1
        return (string) \str_replace(
1307 1
            (array) self::$EMOJI_KEYS_CACHE,
1308 1
            (array) self::$EMOJI_VALUES_CACHE,
1309 1
            $str
1310
        );
1311
    }
1312
1313
    /**
1314
     * Encode a string with emoji chars into a non-emoji string.
1315
     *
1316
     * INFO: opposite to UTF8::emoji_decode()
1317
     *
1318
     * EXAMPLE: <code>
1319
     * UTF8::emoji_encode('foo 👹', false)); // 'foo CHARACTER_OGRE'
1320
     * //
1321
     * UTF8::emoji_encode('foo 👹', true)); // 'foo _-_PORTABLE_UTF8_-_308095726_-_627590803_-_8FTU_ELBATROP_-_'
1322
     * </code>
1323
     *
1324
     * @param string $str                            <p>The input string</p>
1325
     * @param bool   $use_reversible_string_mappings [optional] <p>
1326
     *                                               when <b>TRUE</b>, we use a reversible string mapping
1327
     *                                               between "emoji_encode" and "emoji_decode"</p>
1328
     *
1329
     * @psalm-pure
1330
     *
1331
     * @return string
1332
     */
1333 12
    public static function emoji_encode(
1334
        string $str,
1335
        bool $use_reversible_string_mappings = false
1336
    ): string {
1337 12
        self::initEmojiData();
1338
1339 12
        if ($use_reversible_string_mappings) {
1340 9
            return (string) \str_replace(
1341 9
                (array) self::$EMOJI_VALUES_CACHE,
1342 9
                (array) self::$EMOJI_KEYS_REVERSIBLE_CACHE,
1343 9
                $str
1344
            );
1345
        }
1346
1347 4
        return (string) \str_replace(
1348 4
            (array) self::$EMOJI_VALUES_CACHE,
1349 4
            (array) self::$EMOJI_KEYS_CACHE,
1350 4
            $str
1351
        );
1352
    }
1353
1354
    /**
1355
     * Encode a string with a new charset-encoding.
1356
     *
1357
     * INFO:  This function will also try to fix broken / double encoding,
1358
     *        so you can call this function also on a UTF-8 string and you don't mess up the string.
1359
     *
1360
     * EXAMPLE: <code>
1361
     * UTF8::encode('ISO-8859-1', '-ABC-中文空白-'); // '-ABC-????-'
1362
     * //
1363
     * UTF8::encode('UTF-8', '-ABC-中文空白-'); // '-ABC-中文空白-'
1364
     * //
1365
     * UTF8::encode('HTML', '-ABC-中文空白-'); // '-ABC-&#20013;&#25991;&#31354;&#30333;-'
1366
     * //
1367
     * UTF8::encode('BASE64', '-ABC-中文空白-'); // 'LUFCQy3kuK3mlofnqbrnmb0t'
1368
     * </code>
1369
     *
1370
     * @param string $to_encoding                   <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.</p>
1371
     * @param string $str                           <p>The input string</p>
1372
     * @param bool   $auto_detect_the_from_encoding [optional] <p>Force the new encoding (we try to fix broken / double
1373
     *                                              encoding for UTF-8)<br> otherwise we auto-detect the current
1374
     *                                              string-encoding</p>
1375
     * @param string $from_encoding                 [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
1376
     *                                              A empty string will trigger the autodetect anyway.</p>
1377
     *
1378
     * @psalm-pure
1379
     *
1380
     * @return string
1381
     *
1382
     * @psalm-suppress InvalidReturnStatement
1383
     */
1384 29
    public static function encode(
1385
        string $to_encoding,
1386
        string $str,
1387
        bool $auto_detect_the_from_encoding = true,
1388
        string $from_encoding = ''
1389
    ): string {
1390 29
        if ($str === '' || $to_encoding === '') {
1391 13
            return $str;
1392
        }
1393
1394 29
        if ($to_encoding !== 'UTF-8' && $to_encoding !== 'CP850') {
1395 7
            $to_encoding = self::normalize_encoding($to_encoding, 'UTF-8');
1396
        }
1397
1398 29
        if ($from_encoding && $from_encoding !== 'UTF-8' && $from_encoding !== 'CP850') {
1399 2
            $from_encoding = self::normalize_encoding($from_encoding);
1400
        }
1401
1402
        if (
1403 29
            $to_encoding
1404
            &&
1405 29
            $from_encoding
1406
            &&
1407 29
            $from_encoding === $to_encoding
1408
        ) {
1409
            return $str;
1410
        }
1411
1412 29
        if ($to_encoding === 'JSON') {
1413 1
            $return = self::json_encode($str);
1414 1
            if ($return === false) {
1415
                throw new \InvalidArgumentException('The input string [' . $str . '] can not be used for json_encode().');
1416
            }
1417
1418 1
            return $return;
1419
        }
1420 29
        if ($from_encoding === 'JSON') {
1421 1
            $str = self::json_decode($str);
1422 1
            $from_encoding = '';
1423
        }
1424
1425 29
        if ($to_encoding === 'BASE64') {
1426 2
            return \base64_encode($str);
1427
        }
1428 29
        if ($from_encoding === 'BASE64') {
1429 2
            $str = \base64_decode($str, true);
1430 2
            $from_encoding = '';
1431
        }
1432
1433 29
        if ($to_encoding === 'HTML-ENTITIES') {
1434 2
            return self::html_encode($str, true);
1435
        }
1436 29
        if ($from_encoding === 'HTML-ENTITIES') {
1437 2
            $str = self::html_entity_decode($str, \ENT_COMPAT);
1438 2
            $from_encoding = '';
1439
        }
1440
1441 29
        $from_encoding_auto_detected = false;
1442
        if (
1443 29
            $auto_detect_the_from_encoding
1444
            ||
1445 29
            !$from_encoding
1446
        ) {
1447 29
            $from_encoding_auto_detected = self::str_detect_encoding($str);
1448
        }
1449
1450
        // DEBUG
1451
        //var_dump($to_encoding, $from_encoding, $from_encoding_auto_detected, $str, "\n\n");
1452
1453 29
        if ($from_encoding_auto_detected !== false) {
1454
            /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
1455 25
            $from_encoding = $from_encoding_auto_detected;
1456 7
        } elseif ($auto_detect_the_from_encoding) {
1457
            // fallback for the "autodetect"-mode
1458 7
            return self::to_utf8($str);
1459
        }
1460
1461
        if (
1462 25
            !$from_encoding
1463
            ||
1464 25
            $from_encoding === $to_encoding
1465
        ) {
1466 15
            return $str;
1467
        }
1468
1469
        if (
1470 20
            $to_encoding === 'UTF-8'
1471
            &&
1472
            (
1473 18
                $from_encoding === 'WINDOWS-1252'
1474
                ||
1475 20
                $from_encoding === 'ISO-8859-1'
1476
            )
1477
        ) {
1478 14
            return self::to_utf8($str);
1479
        }
1480
1481
        if (
1482 12
            $to_encoding === 'ISO-8859-1'
1483
            &&
1484
            (
1485 6
                $from_encoding === 'WINDOWS-1252'
1486
                ||
1487 12
                $from_encoding === 'UTF-8'
1488
            )
1489
        ) {
1490 6
            return self::to_iso8859($str);
1491
        }
1492
1493
        /** @noinspection InArrayCanBeUsedInspection */
1494
        if (
1495 10
            $to_encoding !== 'UTF-8'
1496
            &&
1497 10
            $to_encoding !== 'ISO-8859-1'
1498
            &&
1499 10
            $to_encoding !== 'WINDOWS-1252'
1500
            &&
1501 10
            self::$SUPPORT['mbstring'] === false
1502
        ) {
1503
            /**
1504
             * @psalm-suppress ImpureFunctionCall - is is only a warning
1505
             */
1506
            \trigger_error('UTF8::encode() without mbstring cannot handle "' . $to_encoding . '" encoding', \E_USER_WARNING);
1507
        }
1508
1509 10
        if (self::$SUPPORT['mbstring'] === true) {
1510
            // warning: do not use the symfony polyfill here
1511 10
            $str_encoded = \mb_convert_encoding(
1512 10
                $str,
1513 10
                $to_encoding,
1514 10
                $from_encoding
1515
            );
1516
1517 10
            if ($str_encoded) {
1518 10
                \assert(\is_string($str_encoded));
1519
1520 10
                return $str_encoded;
1521
            }
1522
        }
1523
1524
        /** @noinspection PhpUsageOfSilenceOperatorInspection - Detected an incomplete multibyte character in input string */
1525
        $return = @\iconv($from_encoding, $to_encoding, $str);
1526
        if ($return !== false) {
1527
            return $return;
1528
        }
1529
1530
        return $str;
1531
    }
1532
1533
    /**
1534
     * @param string $str
1535
     * @param string $from_charset      [optional] <p>Set the input charset.</p>
1536
     * @param string $to_charset        [optional] <p>Set the output charset.</p>
1537
     * @param string $transfer_encoding [optional] <p>Set the transfer encoding.</p>
1538
     * @param string $linefeed          [optional] <p>Set the used linefeed.</p>
1539
     * @param int    $indent            [optional] <p>Set the max length indent.</p>
1540
     *
1541
     * @psalm-pure
1542
     *
1543
     * @return false|string
1544
     *                      <p>An encoded MIME field on success,
1545
     *                      or false if an error occurs during the encoding.</p>
1546
     */
1547 1
    public static function encode_mimeheader(
1548
        string $str,
1549
        string $from_charset = 'UTF-8',
1550
        string $to_charset = 'UTF-8',
1551
        string $transfer_encoding = 'Q',
1552
        string $linefeed = "\r\n",
1553
        int $indent = 76
1554
    ) {
1555 1
        if ($from_charset !== 'UTF-8' && $from_charset !== 'CP850') {
1556
            $from_charset = self::normalize_encoding($from_charset, 'UTF-8');
1557
        }
1558
1559 1
        if ($to_charset !== 'UTF-8' && $to_charset !== 'CP850') {
1560 1
            $to_charset = self::normalize_encoding($to_charset, 'UTF-8');
1561
        }
1562
1563
        // always fallback via symfony polyfill
1564 1
        return \iconv_mime_encode(
1565 1
            '',
1566 1
            $str,
1567
            [
1568 1
                'scheme'           => $transfer_encoding,
1569 1
                'line-length'      => $indent,
1570 1
                'input-charset'    => $from_charset,
1571 1
                'output-charset'   => $to_charset,
1572 1
                'line-break-chars' => $linefeed,
1573
            ]
1574
        );
1575
    }
1576
1577
    /**
1578
     * Create an extract from a sentence, so if the search-string was found, it try to centered in the output.
1579
     *
1580
     * @param string   $str                       <p>The input string.</p>
1581
     * @param string   $search                    <p>The searched string.</p>
1582
     * @param int|null $length                    [optional] <p>Default: null === text->length / 2</p>
1583
     * @param string   $replacer_for_skipped_text [optional] <p>Default: …</p>
1584
     * @param string   $encoding                  [optional] <p>Set the charset for e.g. "mb_" function</p>
1585
     *
1586
     * @psalm-pure
1587
     *
1588
     * @return string
1589
     */
1590 1
    public static function extract_text(
1591
        string $str,
1592
        string $search = '',
1593
        int $length = null,
1594
        string $replacer_for_skipped_text = '…',
1595
        string $encoding = 'UTF-8'
1596
    ): string {
1597 1
        if ($str === '') {
1598 1
            return '';
1599
        }
1600
1601 1
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
1602
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
1603
        }
1604
1605 1
        $trim_chars = "\t\r\n -_()!~?=+/*\\,.:;\"'[]{}`&";
1606
1607 1
        if ($length === null) {
1608 1
            $length = (int) \round((int) self::strlen($str, $encoding) / 2);
1609
        }
1610
1611 1
        if ($search === '') {
1612 1
            if ($encoding === 'UTF-8') {
1613 1
                if ($length > 0) {
1614 1
                    $string_length = (int) \mb_strlen($str);
1615 1
                    $end = ($length - 1) > $string_length ? $string_length : ($length - 1);
1616
                } else {
1617 1
                    $end = 0;
1618
                }
1619
1620 1
                $pos = (int) \min(
1621 1
                    \mb_strpos($str, ' ', $end),
1622 1
                    \mb_strpos($str, '.', $end)
1623
                );
1624
            } else {
1625
                if ($length > 0) {
1626
                    $string_length = (int) self::strlen($str, $encoding);
1627
                    $end = ($length - 1) > $string_length ? $string_length : ($length - 1);
1628
                } else {
1629
                    $end = 0;
1630
                }
1631
1632
                $pos = (int) \min(
1633
                    self::strpos($str, ' ', $end, $encoding),
1634
                    self::strpos($str, '.', $end, $encoding)
1635
                );
1636
            }
1637
1638 1
            if ($pos) {
1639 1
                if ($encoding === 'UTF-8') {
1640 1
                    $str_sub = \mb_substr($str, 0, $pos);
1641
                } else {
1642
                    $str_sub = self::substr($str, 0, $pos, $encoding);
1643
                }
1644
1645 1
                if ($str_sub === false) {
1646
                    return '';
1647
                }
1648
1649 1
                return \rtrim($str_sub, $trim_chars) . $replacer_for_skipped_text;
1650
            }
1651
1652
            return $str;
1653
        }
1654
1655 1
        if ($encoding === 'UTF-8') {
1656 1
            $word_position = (int) \mb_stripos($str, $search);
1657 1
            $half_side = (int) ($word_position - $length / 2 + (int) \mb_strlen($search) / 2);
1658
        } else {
1659
            $word_position = (int) self::stripos($str, $search, 0, $encoding);
1660
            $half_side = (int) ($word_position - $length / 2 + (int) self::strlen($search, $encoding) / 2);
1661
        }
1662
1663 1
        $pos_start = 0;
1664 1
        if ($half_side > 0) {
1665 1
            if ($encoding === 'UTF-8') {
1666 1
                $half_text = \mb_substr($str, 0, $half_side);
1667
            } else {
1668
                $half_text = self::substr($str, 0, $half_side, $encoding);
1669
            }
1670 1
            if ($half_text !== false) {
1671 1
                if ($encoding === 'UTF-8') {
1672 1
                    $pos_start = (int) \max(
1673 1
                        \mb_strrpos($half_text, ' '),
1674 1
                        \mb_strrpos($half_text, '.')
1675
                    );
1676
                } else {
1677
                    $pos_start = (int) \max(
1678
                        self::strrpos($half_text, ' ', 0, $encoding),
1679
                        self::strrpos($half_text, '.', 0, $encoding)
1680
                    );
1681
                }
1682
            }
1683
        }
1684
1685 1
        if ($word_position && $half_side > 0) {
1686 1
            $offset = $pos_start + $length - 1;
1687 1
            $real_length = (int) self::strlen($str, $encoding);
1688
1689 1
            if ($offset > $real_length) {
1690
                $offset = $real_length;
1691
            }
1692
1693 1
            if ($encoding === 'UTF-8') {
1694 1
                $pos_end = (int) \min(
1695 1
                    \mb_strpos($str, ' ', $offset),
1696 1
                    \mb_strpos($str, '.', $offset)
1697 1
                ) - $pos_start;
1698
            } else {
1699
                $pos_end = (int) \min(
1700
                    self::strpos($str, ' ', $offset, $encoding),
1701
                    self::strpos($str, '.', $offset, $encoding)
1702
                ) - $pos_start;
1703
            }
1704
1705 1
            if (!$pos_end || $pos_end <= 0) {
1706 1
                if ($encoding === 'UTF-8') {
1707 1
                    $str_sub = \mb_substr($str, $pos_start, (int) \mb_strlen($str));
1708
                } else {
1709
                    $str_sub = self::substr($str, $pos_start, (int) self::strlen($str, $encoding), $encoding);
1710
                }
1711 1
                if ($str_sub !== false) {
1712 1
                    $extract = $replacer_for_skipped_text . \ltrim($str_sub, $trim_chars);
1713
                } else {
1714 1
                    $extract = '';
1715
                }
1716
            } else {
1717 1
                if ($encoding === 'UTF-8') {
1718 1
                    $str_sub = \mb_substr($str, $pos_start, $pos_end);
1719
                } else {
1720
                    $str_sub = self::substr($str, $pos_start, $pos_end, $encoding);
1721
                }
1722 1
                if ($str_sub !== false) {
1723 1
                    $extract = $replacer_for_skipped_text . \trim($str_sub, $trim_chars) . $replacer_for_skipped_text;
1724
                } else {
1725 1
                    $extract = '';
1726
                }
1727
            }
1728
        } else {
1729 1
            $offset = $length - 1;
1730 1
            $true_length = (int) self::strlen($str, $encoding);
1731
1732 1
            if ($offset > $true_length) {
1733
                $offset = $true_length;
1734
            }
1735
1736 1
            if ($encoding === 'UTF-8') {
1737 1
                $pos_end = (int) \min(
1738 1
                    \mb_strpos($str, ' ', $offset),
1739 1
                    \mb_strpos($str, '.', $offset)
1740
                );
1741
            } else {
1742
                $pos_end = (int) \min(
1743
                    self::strpos($str, ' ', $offset, $encoding),
1744
                    self::strpos($str, '.', $offset, $encoding)
1745
                );
1746
            }
1747
1748 1
            if ($pos_end) {
1749 1
                if ($encoding === 'UTF-8') {
1750 1
                    $str_sub = \mb_substr($str, 0, $pos_end);
1751
                } else {
1752
                    $str_sub = self::substr($str, 0, $pos_end, $encoding);
1753
                }
1754 1
                if ($str_sub !== false) {
1755 1
                    $extract = \rtrim($str_sub, $trim_chars) . $replacer_for_skipped_text;
1756
                } else {
1757 1
                    $extract = '';
1758
                }
1759
            } else {
1760 1
                $extract = $str;
1761
            }
1762
        }
1763
1764 1
        return $extract;
1765
    }
1766
1767
    /**
1768
     * Reads entire file into a string.
1769
     *
1770
     * EXAMPLE: <code>UTF8::file_get_contents('utf16le.txt'); // ...</code>
1771
     *
1772
     * WARNING: Do not use UTF-8 Option ($convert_to_utf8) for binary files (e.g.: images) !!!
1773
     *
1774
     * @see http://php.net/manual/en/function.file-get-contents.php
1775
     *
1776
     * @param string        $filename         <p>
1777
     *                                        Name of the file to read.
1778
     *                                        </p>
1779
     * @param bool          $use_include_path [optional] <p>
1780
     *                                        Prior to PHP 5, this parameter is called
1781
     *                                        use_include_path and is a bool.
1782
     *                                        As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
1783
     *                                        to trigger include path
1784
     *                                        search.
1785
     *                                        </p>
1786
     * @param resource|null $context          [optional] <p>
1787
     *                                        A valid context resource created with
1788
     *                                        stream_context_create. If you don't need to use a
1789
     *                                        custom context, you can skip this parameter by &null;.
1790
     *                                        </p>
1791
     * @param int|null      $offset           [optional] <p>
1792
     *                                        The offset where the reading starts.
1793
     *                                        </p>
1794
     * @param int|null      $max_length       [optional] <p>
1795
     *                                        Maximum length of data read. The default is to read until end
1796
     *                                        of file is reached.
1797
     *                                        </p>
1798
     * @param int           $timeout          <p>The time in seconds for the timeout.</p>
1799
     * @param bool          $convert_to_utf8  <strong>WARNING!!!</strong> <p>Maybe you can't use this option for
1800
     *                                        some files, because they used non default utf-8 chars. Binary files
1801
     *                                        like images or pdf will not be converted.</p>
1802
     * @param string        $from_encoding    [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
1803
     *                                        A empty string will trigger the autodetect anyway.</p>
1804
     *
1805
     * @psalm-pure
1806
     *
1807
     * @return false|string
1808
     *                      <p>The function returns the read data as string or <b>false</b> on failure.</p>
1809
     *
1810
     * @noinspection PhpTooManyParametersInspection
1811
     */
1812 12
    public static function file_get_contents(
1813
        string $filename,
1814
        bool $use_include_path = false,
1815
        $context = null,
1816
        int $offset = null,
1817
        int $max_length = null,
1818
        int $timeout = 10,
1819
        bool $convert_to_utf8 = true,
1820
        string $from_encoding = ''
1821
    ) {
1822
        // init
1823 12
        $filename = \filter_var($filename, \FILTER_SANITIZE_STRING);
1824
        /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
1825 12
        if ($filename === false) {
1826
            return false;
1827
        }
1828
1829 12
        if ($timeout && $context === null) {
1830 9
            $context = \stream_context_create(
1831
                [
1832
                    'http' => [
1833 9
                        'timeout' => $timeout,
1834
                    ],
1835
                ]
1836
            );
1837
        }
1838
1839 12
        if ($offset === null) {
1840 12
            $offset = 0;
1841
        }
1842
1843 12
        if (\is_int($max_length)) {
1844 2
            $data = \file_get_contents($filename, $use_include_path, $context, $offset, $max_length);
1845
        } else {
1846 12
            $data = \file_get_contents($filename, $use_include_path, $context, $offset);
1847
        }
1848
1849
        // return false on error
1850 12
        if ($data === false) {
1851
            return false;
1852
        }
1853
1854 12
        if ($convert_to_utf8) {
1855
            if (
1856 12
                !self::is_binary($data, true)
1857
                ||
1858 9
                self::is_utf16($data, false) !== false
1859
                ||
1860 12
                self::is_utf32($data, false) !== false
1861
            ) {
1862 9
                $data = self::encode('UTF-8', $data, false, $from_encoding);
1863 9
                $data = self::cleanup($data);
1864
            }
1865
        }
1866
1867 12
        return $data;
1868
    }
1869
1870
    /**
1871
     * Checks if a file starts with BOM (Byte Order Mark) character.
1872
     *
1873
     * EXAMPLE: <code>UTF8::file_has_bom('utf8_with_bom.txt'); // true</code>
1874
     *
1875
     * @param string $file_path <p>Path to a valid file.</p>
1876
     *
1877
     * @throws \RuntimeException if file_get_contents() returned false
1878
     *
1879
     * @return bool
1880
     *              <p><strong>true</strong> if the file has BOM at the start, <strong>false</strong> otherwise</p>
1881
     *
1882
     * @psalm-pure
1883
     */
1884 2
    public static function file_has_bom(string $file_path): bool
1885
    {
1886 2
        $file_content = \file_get_contents($file_path);
1887 2
        if ($file_content === false) {
1888
            throw new \RuntimeException('file_get_contents() returned false for:' . $file_path);
1889
        }
1890
1891 2
        return self::string_has_bom($file_content);
1892
    }
1893
1894
    /**
1895
     * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1896
     *
1897
     * EXAMPLE: <code>UTF8::filter(array("\xE9", 'à', 'a')); // array('é', 'à', 'a')</code>
1898
     *
1899
     * @param array|object|string $var
1900
     * @param int                 $normalization_form
1901
     * @param string              $leading_combining
1902
     *
1903
     * @psalm-pure
1904
     *
1905
     * @return mixed
1906
     *
1907
     * @template TFilter
1908
     * @psalm-param TFilter $var
1909
     * @psalm-return TFilter
1910
     */
1911 65
    public static function filter(
1912
        $var,
1913
        int $normalization_form = \Normalizer::NFC,
1914
        string $leading_combining = '◌'
1915
    ) {
1916 65
        switch (\gettype($var)) {
1917 65
            case 'object':
1918 65
            case 'array':
1919 6
                foreach ($var as $k => &$v) {
1920 6
                    $v = self::filter($v, $normalization_form, $leading_combining);
1921
                }
1922 6
                unset($v);
1923
1924 6
                break;
1925 65
            case 'string':
1926
1927 63
                if (\strpos($var, "\r") !== false) {
1928 3
                    $var = self::normalize_line_ending($var);
1929
                }
1930
1931 63
                if (!ASCII::is_ascii($var)) {
1932 33
                    if (\Normalizer::isNormalized($var, $normalization_form)) {
1933 27
                        $n = '-';
1934
                    } else {
1935 13
                        $n = \Normalizer::normalize($var, $normalization_form);
1936
1937 13
                        if (isset($n[0])) {
1938 7
                            $var = $n;
1939
                        } else {
1940 9
                            $var = self::encode('UTF-8', $var);
1941
                        }
1942
                    }
1943
1944 33
                    \assert(\is_string($var));
1945
                    if (
1946 33
                        $var[0] >= "\x80"
1947
                        &&
1948 33
                        isset($n[0], $leading_combining[0])
1949
                        &&
1950 33
                        \preg_match('/^\\p{Mn}/u', $var)
1951
                    ) {
1952
                        // Prevent leading combining chars
1953
                        // for NFC-safe concatenations.
1954 3
                        $var = $leading_combining . $var;
1955
                    }
1956
                }
1957
1958 63
                break;
1959
            default:
1960
                // nothing
1961
        }
1962
1963
        /** @noinspection PhpSillyAssignmentInspection */
1964
        /** @psalm-var TFilter $var */
1965 65
        $var = $var;
1966
1967 65
        return $var;
1968
    }
1969
1970
    /**
1971
     * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1972
     *
1973
     * Gets a specific external variable by name and optionally filters it.
1974
     *
1975
     * EXAMPLE: <code>
1976
     * // _GET['foo'] = 'bar';
1977
     * UTF8::filter_input(INPUT_GET, 'foo', FILTER_SANITIZE_STRING)); // 'bar'
1978
     * </code>
1979
     *
1980
     * @see http://php.net/manual/en/function.filter-input.php
1981
     *
1982
     * @param int            $type          <p>
1983
     *                                      One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1984
     *                                      <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1985
     *                                      <b>INPUT_ENV</b>.
1986
     *                                      </p>
1987
     * @param string         $variable_name <p>
1988
     *                                      Name of a variable to get.
1989
     *                                      </p>
1990
     * @param int            $filter        [optional] <p>
1991
     *                                      The ID of the filter to apply. The
1992
     *                                      manual page lists the available filters.
1993
     *                                      </p>
1994
     * @param int|int[]|null $options       [optional] <p>
1995
     *                                      Associative array of options or bitwise disjunction of flags. If filter
1996
     *                                      accepts options, flags can be provided in "flags" field of array.
1997
     *                                      </p>
1998
     *
1999
     * @psalm-pure
2000
     *
2001
     * @return mixed
2002
     *               <p>
2003
     *               Value of the requested variable on success, <b>FALSE</b> if the filter fails, or <b>NULL</b> if the
2004
     *               <i>variable_name</i> variable is not set. If the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it
2005
     *               returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter fails.
2006
     *               </p>
2007
     */
2008 1
    public static function filter_input(
2009
        int $type,
2010
        string $variable_name,
2011
        int $filter = \FILTER_DEFAULT,
2012
        $options = null
2013
    ) {
2014
        /**
2015
         * @psalm-suppress ImpureFunctionCall - we use func_num_args only for args count matching here
2016
         */
2017 1
        if ($options === null || \func_num_args() < 4) {
2018 1
            $var = \filter_input($type, $variable_name, $filter);
2019
        } else {
2020
            $var = \filter_input($type, $variable_name, $filter, $options);
2021
        }
2022
2023 1
        return self::filter($var);
2024
    }
2025
2026
    /**
2027
     * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2028
     *
2029
     * Gets external variables and optionally filters them.
2030
     *
2031
     * EXAMPLE: <code>
2032
     * // _GET['foo'] = 'bar';
2033
     * UTF8::filter_input_array(INPUT_GET, array('foo' => 'FILTER_SANITIZE_STRING')); // array('bar')
2034
     * </code>
2035
     *
2036
     * @see http://php.net/manual/en/function.filter-input-array.php
2037
     *
2038
     * @param int        $type       <p>
2039
     *                               One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
2040
     *                               <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
2041
     *                               <b>INPUT_ENV</b>.
2042
     *                               </p>
2043
     * @param array|null $definition [optional] <p>
2044
     *                               An array defining the arguments. A valid key is a string
2045
     *                               containing a variable name and a valid value is either a filter type, or an array
2046
     *                               optionally specifying the filter, flags and options. If the value is an
2047
     *                               array, valid keys are filter which specifies the
2048
     *                               filter type,
2049
     *                               flags which specifies any flags that apply to the
2050
     *                               filter, and options which specifies any options that
2051
     *                               apply to the filter. See the example below for a better understanding.
2052
     *                               </p>
2053
     *                               <p>
2054
     *                               This parameter can be also an integer holding a filter constant. Then all values in the
2055
     *                               input array are filtered by this filter.
2056
     *                               </p>
2057
     * @param bool       $add_empty  [optional] <p>
2058
     *                               Add missing keys as <b>NULL</b> to the return value.
2059
     *                               </p>
2060
     *
2061
     * @psalm-pure
2062
     *
2063
     * @return mixed
2064
     *               <p>
2065
     *               An array containing the values of the requested variables on success, or <b>FALSE</b> on failure.
2066
     *               An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not
2067
     *               set. Or if the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it returns <b>FALSE</b> if the variable
2068
     *               is not set and <b>NULL</b> if the filter fails.
2069
     *               </p>
2070
     */
2071 1
    public static function filter_input_array(
2072
        int $type,
2073
        $definition = null,
2074
        bool $add_empty = true
2075
    ) {
2076
        /**
2077
         * @psalm-suppress ImpureFunctionCall - we use func_num_args only for args count matching here
2078
         */
2079 1
        if ($definition === null || \func_num_args() < 2) {
2080
            $a = \filter_input_array($type);
2081
        } else {
2082 1
            $a = \filter_input_array($type, $definition, $add_empty);
2083
        }
2084
2085 1
        return self::filter($a);
2086
    }
2087
2088
    /**
2089
     * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2090
     *
2091
     * Filters a variable with a specified filter.
2092
     *
2093
     * EXAMPLE: <code>UTF8::filter_var('-ABC-中文空白-', FILTER_VALIDATE_URL); // false</code>
2094
     *
2095
     * @see http://php.net/manual/en/function.filter-var.php
2096
     *
2097
     * @param float|int|string|null $variable <p>
2098
     *                                        Value to filter.
2099
     *                                        </p>
2100
     * @param int                   $filter   [optional] <p>
2101
     *                                        The ID of the filter to apply. The
2102
     *                                        manual page lists the available filters.
2103
     *                                        </p>
2104
     * @param int|int[]|null        $options  [optional] <p>
2105
     *                                        Associative array of options or bitwise disjunction of flags. If filter
2106
     *                                        accepts options, flags can be provided in "flags" field of array. For
2107
     *                                        the "callback" filter, callable type should be passed. The
2108
     *                                        callback must accept one argument, the value to be filtered, and return
2109
     *                                        the value after filtering/sanitizing it.
2110
     *                                        </p>
2111
     *                                        <p>
2112
     *                                        <code>
2113
     *                                        // for filters that accept options, use this format
2114
     *                                        $options = array(
2115
     *                                        'options' => array(
2116
     *                                        'default' => 3, // value to return if the filter fails
2117
     *                                        // other options here
2118
     *                                        'min_range' => 0
2119
     *                                        ),
2120
     *                                        'flags' => FILTER_FLAG_ALLOW_OCTAL,
2121
     *                                        );
2122
     *                                        $var = filter_var('0755', FILTER_VALIDATE_INT, $options);
2123
     *                                        // for filter that only accept flags, you can pass them directly
2124
     *                                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);
2125
     *                                        // for filter that only accept flags, you can also pass as an array
2126
     *                                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN,
2127
     *                                        array('flags' => FILTER_NULL_ON_FAILURE));
2128
     *                                        // callback validate filter
2129
     *                                        function foo($value)
2130
     *                                        {
2131
     *                                        // Expected format: Surname, GivenNames
2132
     *                                        if (strpos($value, ", ") === false) return false;
2133
     *                                        list($surname, $givennames) = explode(", ", $value, 2);
2134
     *                                        $empty = (empty($surname) || empty($givennames));
2135
     *                                        $notstrings = (!is_string($surname) || !is_string($givennames));
2136
     *                                        if ($empty || $notstrings) {
2137
     *                                        return false;
2138
     *                                        } else {
2139
     *                                        return $value;
2140
     *                                        }
2141
     *                                        }
2142
     *                                        $var = filter_var('Doe, Jane Sue', FILTER_CALLBACK, array('options' => 'foo'));
2143
     *                                        </code>
2144
     *                                        </p>
2145
     *
2146
     * @psalm-pure
2147
     *
2148
     * @return mixed
2149
     *               <p>The filtered data, or <b>FALSE</b> if the filter fails.</p>
2150
     */
2151 2
    public static function filter_var(
2152
        $variable,
2153
        int $filter = \FILTER_DEFAULT,
2154
        $options = null
2155
    ) {
2156
        /**
2157
         * @psalm-suppress ImpureFunctionCall - we use func_num_args only for args count matching here
2158
         */
2159 2
        if (\func_num_args() < 3) {
2160 2
            $variable = \filter_var($variable, $filter);
2161
        } else {
2162 2
            $variable = \filter_var($variable, $filter, $options);
2163
        }
2164
2165 2
        return self::filter($variable);
2166
    }
2167
2168
    /**
2169
     * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2170
     *
2171
     * Gets multiple variables and optionally filters them.
2172
     *
2173
     * EXAMPLE: <code>
2174
     * $filters = [
2175
     *     'name'  => ['filter'  => FILTER_CALLBACK, 'options' => [UTF8::class, 'ucwords']],
2176
     *     'age'   => ['filter'  => FILTER_VALIDATE_INT, 'options' => ['min_range' => 1, 'max_range' => 120]],
2177
     *     'email' => FILTER_VALIDATE_EMAIL,
2178
     * ];
2179
     *
2180
     * $data = [
2181
     *     'name' => 'κόσμε',
2182
     *     'age' => '18',
2183
     *     'email' => '[email protected]'
2184
     * ];
2185
     *
2186
     * UTF8::filter_var_array($data, $filters, true); // ['name' => 'Κόσμε', 'age' => 18, 'email' => '[email protected]']
2187
     * </code>
2188
     *
2189
     * @see http://php.net/manual/en/function.filter-var-array.php
2190
     *
2191
     * @param array<mixed>   $data       <p>
2192
     *                                   An array with string keys containing the data to filter.
2193
     *                                   </p>
2194
     * @param array|int|null $definition [optional] <p>
2195
     *                                   An array defining the arguments. A valid key is a string
2196
     *                                   containing a variable name and a valid value is either a
2197
     *                                   filter type, or an
2198
     *                                   array optionally specifying the filter, flags and options.
2199
     *                                   If the value is an array, valid keys are filter
2200
     *                                   which specifies the filter type,
2201
     *                                   flags which specifies any flags that apply to the
2202
     *                                   filter, and options which specifies any options that
2203
     *                                   apply to the filter. See the example below for a better understanding.
2204
     *                                   </p>
2205
     *                                   <p>
2206
     *                                   This parameter can be also an integer holding a filter constant. Then all values
2207
     *                                   in the input array are filtered by this filter.
2208
     *                                   </p>
2209
     * @param bool           $add_empty  [optional] <p>
2210
     *                                   Add missing keys as <b>NULL</b> to the return value.
2211
     *                                   </p>
2212
     *
2213
     * @psalm-pure
2214
     *
2215
     * @return mixed
2216
     *               <p>
2217
     *               An array containing the values of the requested variables on success, or <b>FALSE</b> on failure.
2218
     *               An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not
2219
     *               set.
2220
     *               </p>
2221
     */
2222 2
    public static function filter_var_array(
2223
        array $data,
2224
        $definition = null,
2225
        bool $add_empty = true
2226
    ) {
2227
        /**
2228
         * @psalm-suppress ImpureFunctionCall - we use func_num_args only for args count matching here
2229
         */
2230 2
        if (\func_num_args() < 2) {
2231 2
            $a = \filter_var_array($data);
2232
        } else {
2233 2
            $a = \filter_var_array($data, $definition, $add_empty);
2234
        }
2235
2236 2
        return self::filter($a);
2237
    }
2238
2239
    /**
2240
     * Checks whether finfo is available on the server.
2241
     *
2242
     * @psalm-pure
2243
     *
2244
     * @return bool
2245
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
2246
     */
2247
    public static function finfo_loaded(): bool
2248
    {
2249
        return \class_exists('finfo');
2250
    }
2251
2252
    /**
2253
     * Returns the first $n characters of the string.
2254
     *
2255
     * @param string $str      <p>The input string.</p>
2256
     * @param int    $n        <p>Number of characters to retrieve from the start.</p>
2257
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
2258
     *
2259
     * @psalm-pure
2260
     *
2261
     * @return string
2262
     */
2263 13
    public static function first_char(
2264
        string $str,
2265
        int $n = 1,
2266
        string $encoding = 'UTF-8'
2267
    ): string {
2268 13
        if ($str === '' || $n <= 0) {
2269 5
            return '';
2270
        }
2271
2272 8
        if ($encoding === 'UTF-8') {
2273 4
            return (string) \mb_substr($str, 0, $n);
2274
        }
2275
2276 4
        return (string) self::substr($str, 0, $n, $encoding);
2277
    }
2278
2279
    /**
2280
     * Check if the number of Unicode characters isn't greater than the specified integer.
2281
     *
2282
     * EXAMPLE: <code>UTF8::fits_inside('κόσμε', 6); // false</code>
2283
     *
2284
     * @param string $str      the original string to be checked
2285
     * @param int    $box_size the size in number of chars to be checked against string
2286
     *
2287
     * @psalm-pure
2288
     *
2289
     * @return bool
2290
     *              <p><strong>TRUE</strong> if string is less than or equal to $box_size, <strong>FALSE</strong> otherwise.</p>
2291
     */
2292 2
    public static function fits_inside(string $str, int $box_size): bool
2293
    {
2294 2
        return (int) self::strlen($str) <= $box_size;
2295
    }
2296
2297
    /**
2298
     * Try to fix simple broken UTF-8 strings.
2299
     *
2300
     * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
2301
     *
2302
     * EXAMPLE: <code>UTF8::fix_simple_utf8('Düsseldorf'); // 'Düsseldorf'</code>
2303
     *
2304
     * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
2305
     * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
2306
     * See: http://en.wikipedia.org/wiki/Windows-1252
2307
     *
2308
     * @param string $str <p>The input string</p>
2309
     *
2310
     * @psalm-pure
2311
     *
2312
     * @return string
2313
     */
2314 47
    public static function fix_simple_utf8(string $str): string
2315
    {
2316 47
        if ($str === '') {
2317 4
            return '';
2318
        }
2319
2320
        /**
2321
         * @psalm-suppress ImpureStaticVariable
2322
         *
2323
         * @var array<mixed>|null
2324
         */
2325 47
        static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null;
2326
2327
        /**
2328
         * @psalm-suppress ImpureStaticVariable
2329
         *
2330
         * @var array<mixed>|null
2331
         */
2332 47
        static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null;
2333
2334 47
        if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) {
2335 1
            if (self::$BROKEN_UTF8_FIX === null) {
2336 1
                self::$BROKEN_UTF8_FIX = self::getData('utf8_fix');
2337
            }
2338
2339 1
            $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = \array_keys(self::$BROKEN_UTF8_FIX);
2340 1
            $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = self::$BROKEN_UTF8_FIX;
2341
        }
2342
2343 47
        \assert(\is_array($BROKEN_UTF8_TO_UTF8_VALUES_CACHE));
2344
2345 47
        return \str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str);
2346
    }
2347
2348
    /**
2349
     * Fix a double (or multiple) encoded UTF8 string.
2350
     *
2351
     * EXAMPLE: <code>UTF8::fix_utf8('Fédération'); // 'Fédération'</code>
2352
     *
2353
     * @param string|string[] $str you can use a string or an array of strings
2354
     *
2355
     * @psalm-pure
2356
     *
2357
     * @return string|string[]
2358
     *                         Will return the fixed input-"array" or
2359
     *                         the fixed input-"string"
2360
     *
2361
     * @psalm-suppress InvalidReturnType
2362
     */
2363 2
    public static function fix_utf8($str)
2364
    {
2365 2
        if (\is_array($str)) {
2366 2
            foreach ($str as $k => &$v) {
2367 2
                $v = self::fix_utf8($v);
2368
            }
2369 2
            unset($v);
2370
2371
            /**
2372
             * @psalm-suppress InvalidReturnStatement
2373
             */
2374 2
            return $str;
2375
        }
2376
2377 2
        $str = (string) $str;
2378 2
        $last = '';
2379 2
        while ($last !== $str) {
2380 2
            $last = $str;
2381
            /**
2382
             * @psalm-suppress PossiblyInvalidArgument
2383
             */
2384 2
            $str = self::to_utf8(
2385 2
                self::utf8_decode($str, true)
2386
            );
2387
        }
2388
2389
        /**
2390
         * @psalm-suppress InvalidReturnStatement
2391
         */
2392 2
        return $str;
2393
    }
2394
2395
    /**
2396
     * Get character of a specific character.
2397
     *
2398
     * EXAMPLE: <code>UTF8::getCharDirection('ا'); // 'RTL'</code>
2399
     *
2400
     * @param string $char
2401
     *
2402
     * @psalm-pure
2403
     *
2404
     * @return string
2405
     *                <p>'RTL' or 'LTR'.</p>
2406
     */
2407 2
    public static function getCharDirection(string $char): string
2408
    {
2409 2
        if (self::$SUPPORT['intlChar'] === true) {
2410
            /** @noinspection PhpComposerExtensionStubsInspection */
2411 2
            $tmp_return = \IntlChar::charDirection($char);
2412
2413
            // from "IntlChar"-Class
2414
            $char_direction = [
2415 2
                'RTL' => [1, 13, 14, 15, 21],
2416
                'LTR' => [0, 11, 12, 20],
2417
            ];
2418
2419 2
            if (\in_array($tmp_return, $char_direction['LTR'], true)) {
2420
                return 'LTR';
2421
            }
2422
2423 2
            if (\in_array($tmp_return, $char_direction['RTL'], true)) {
2424 2
                return 'RTL';
2425
            }
2426
        }
2427
2428 2
        $c = static::chr_to_decimal($char);
2429
2430 2
        if (!($c >= 0x5be && $c <= 0x10b7f)) {
2431 2
            return 'LTR';
2432
        }
2433
2434 2
        if ($c <= 0x85e) {
2435 2
            if ($c === 0x5be ||
2436 2
                $c === 0x5c0 ||
2437 2
                $c === 0x5c3 ||
2438 2
                $c === 0x5c6 ||
2439 2
                ($c >= 0x5d0 && $c <= 0x5ea) ||
2440 2
                ($c >= 0x5f0 && $c <= 0x5f4) ||
2441 2
                $c === 0x608 ||
2442 2
                $c === 0x60b ||
2443 2
                $c === 0x60d ||
2444 2
                $c === 0x61b ||
2445 2
                ($c >= 0x61e && $c <= 0x64a) ||
2446
                ($c >= 0x66d && $c <= 0x66f) ||
2447
                ($c >= 0x671 && $c <= 0x6d5) ||
2448
                ($c >= 0x6e5 && $c <= 0x6e6) ||
2449
                ($c >= 0x6ee && $c <= 0x6ef) ||
2450
                ($c >= 0x6fa && $c <= 0x70d) ||
2451
                $c === 0x710 ||
2452
                ($c >= 0x712 && $c <= 0x72f) ||
2453
                ($c >= 0x74d && $c <= 0x7a5) ||
2454
                $c === 0x7b1 ||
2455
                ($c >= 0x7c0 && $c <= 0x7ea) ||
2456
                ($c >= 0x7f4 && $c <= 0x7f5) ||
2457
                $c === 0x7fa ||
2458
                ($c >= 0x800 && $c <= 0x815) ||
2459
                $c === 0x81a ||
2460
                $c === 0x824 ||
2461
                $c === 0x828 ||
2462
                ($c >= 0x830 && $c <= 0x83e) ||
2463
                ($c >= 0x840 && $c <= 0x858) ||
2464 2
                $c === 0x85e
2465
            ) {
2466 2
                return 'RTL';
2467
            }
2468 2
        } elseif ($c === 0x200f) {
2469
            return 'RTL';
2470 2
        } elseif ($c >= 0xfb1d) {
2471 2
            if ($c === 0xfb1d ||
2472 2
                ($c >= 0xfb1f && $c <= 0xfb28) ||
2473 2
                ($c >= 0xfb2a && $c <= 0xfb36) ||
2474 2
                ($c >= 0xfb38 && $c <= 0xfb3c) ||
2475 2
                $c === 0xfb3e ||
2476 2
                ($c >= 0xfb40 && $c <= 0xfb41) ||
2477 2
                ($c >= 0xfb43 && $c <= 0xfb44) ||
2478 2
                ($c >= 0xfb46 && $c <= 0xfbc1) ||
2479 2
                ($c >= 0xfbd3 && $c <= 0xfd3d) ||
2480 2
                ($c >= 0xfd50 && $c <= 0xfd8f) ||
2481 2
                ($c >= 0xfd92 && $c <= 0xfdc7) ||
2482 2
                ($c >= 0xfdf0 && $c <= 0xfdfc) ||
2483 2
                ($c >= 0xfe70 && $c <= 0xfe74) ||
2484 2
                ($c >= 0xfe76 && $c <= 0xfefc) ||
2485 2
                ($c >= 0x10800 && $c <= 0x10805) ||
2486 2
                $c === 0x10808 ||
2487 2
                ($c >= 0x1080a && $c <= 0x10835) ||
2488 2
                ($c >= 0x10837 && $c <= 0x10838) ||
2489 2
                $c === 0x1083c ||
2490 2
                ($c >= 0x1083f && $c <= 0x10855) ||
2491 2
                ($c >= 0x10857 && $c <= 0x1085f) ||
2492 2
                ($c >= 0x10900 && $c <= 0x1091b) ||
2493 2
                ($c >= 0x10920 && $c <= 0x10939) ||
2494 2
                $c === 0x1093f ||
2495 2
                $c === 0x10a00 ||
2496 2
                ($c >= 0x10a10 && $c <= 0x10a13) ||
2497 2
                ($c >= 0x10a15 && $c <= 0x10a17) ||
2498 2
                ($c >= 0x10a19 && $c <= 0x10a33) ||
2499 2
                ($c >= 0x10a40 && $c <= 0x10a47) ||
2500 2
                ($c >= 0x10a50 && $c <= 0x10a58) ||
2501 2
                ($c >= 0x10a60 && $c <= 0x10a7f) ||
2502 2
                ($c >= 0x10b00 && $c <= 0x10b35) ||
2503 2
                ($c >= 0x10b40 && $c <= 0x10b55) ||
2504 2
                ($c >= 0x10b58 && $c <= 0x10b72) ||
2505 2
                ($c >= 0x10b78 && $c <= 0x10b7f)
2506
            ) {
2507 2
                return 'RTL';
2508
            }
2509
        }
2510
2511 2
        return 'LTR';
2512
    }
2513
2514
    /**
2515
     * Check for php-support.
2516
     *
2517
     * @param string|null $key
2518
     *
2519
     * @psalm-pure
2520
     *
2521
     * @return mixed
2522
     *               Return the full support-"array", if $key === null<br>
2523
     *               return bool-value, if $key is used and available<br>
2524
     *               otherwise return <strong>null</strong>
2525
     */
2526 27
    public static function getSupportInfo(string $key = null)
2527
    {
2528 27
        if ($key === null) {
2529 4
            return self::$SUPPORT;
2530
        }
2531
2532 25
        if (self::$INTL_TRANSLITERATOR_LIST === null) {
2533 1
            self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
2534
        }
2535
        // compatibility fix for old versions
2536 25
        self::$SUPPORT['intl__transliterator_list_ids'] = self::$INTL_TRANSLITERATOR_LIST;
2537
2538 25
        return self::$SUPPORT[$key] ?? null;
2539
    }
2540
2541
    /**
2542
     * Warning: this method only works for some file-types (png, jpg)
2543
     *          if you need more supported types, please use e.g. "finfo"
2544
     *
2545
     * @param string $str
2546
     * @param array  $fallback <p>with this keys: 'ext', 'mime', 'type'
2547
     *
2548
     * @psalm-pure
2549
     *
2550
     * @return null[]|string[]
2551
     *                         <p>with this keys: 'ext', 'mime', 'type'</p>
2552
     *
2553
     * @phpstan-param array{ext: null|string, mime: null|string, type: null|string} $fallback
2554
     */
2555 40
    public static function get_file_type(
2556
        string $str,
2557
        array $fallback = [
2558
            'ext'  => null,
2559
            'mime' => 'application/octet-stream',
2560
            'type' => null,
2561
        ]
2562
    ): array {
2563 40
        if ($str === '') {
2564
            return $fallback;
2565
        }
2566
2567
        /** @var false|string $str_info - needed for PhpStan (stubs error) */
2568 40
        $str_info = \substr($str, 0, 2);
2569 40
        if ($str_info === false || \strlen($str_info) !== 2) {
2570 11
            return $fallback;
2571
        }
2572
2573
        // DEBUG
2574
        //var_dump($str_info);
2575
2576 36
        $str_info = \unpack('C2chars', $str_info);
2577
2578
        /** @noinspection PhpSillyAssignmentInspection */
2579
        /** @var array|false $str_info - needed for PhpStan (stubs error) */
2580 36
        $str_info = $str_info;
2581
2582 36
        if ($str_info === false) {
2583
            return $fallback;
2584
        }
2585
        /** @noinspection OffsetOperationsInspection */
2586 36
        $type_code = (int) ($str_info['chars1'] . $str_info['chars2']);
2587
2588
        // DEBUG
2589
        //var_dump($type_code);
2590
2591
        //
2592
        // info: https://en.wikipedia.org/wiki/Magic_number_%28programming%29#Format_indicator
2593
        //
2594
        switch ($type_code) {
2595
            // WARNING: do not add too simple comparisons, because of false-positive results:
2596
            //
2597
            // 3780 => 'pdf', 7790 => 'exe', 7784 => 'midi', 8075 => 'zip',
2598
            // 8297 => 'rar', 7173 => 'gif', 7373 => 'tiff' 6677 => 'bmp', ...
2599
            //
2600 36
            case 255216:
2601
                $ext = 'jpg';
2602
                $mime = 'image/jpeg';
2603
                $type = 'binary';
2604
2605
                break;
2606 36
            case 13780:
2607 7
                $ext = 'png';
2608 7
                $mime = 'image/png';
2609 7
                $type = 'binary';
2610
2611 7
                break;
2612
            default:
2613 35
                return $fallback;
2614
        }
2615
2616
        return [
2617 7
            'ext'  => $ext,
2618 7
            'mime' => $mime,
2619 7
            'type' => $type,
2620
        ];
2621
    }
2622
2623
    /**
2624
     * @param int    $length         <p>Length of the random string.</p>
2625
     * @param string $possible_chars [optional] <p>Characters string for the random selection.</p>
2626
     * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
2627
     *
2628
     * @return string
2629
     */
2630 1
    public static function get_random_string(
2631
        int $length,
2632
        string $possible_chars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789',
2633
        string $encoding = 'UTF-8'
2634
    ): string {
2635
        // init
2636 1
        $i = 0;
2637 1
        $str = '';
2638
2639
        //
2640
        // add random chars
2641
        //
2642
2643 1
        if ($encoding === 'UTF-8') {
2644 1
            $max_length = (int) \mb_strlen($possible_chars);
2645 1
            if ($max_length === 0) {
2646 1
                return '';
2647
            }
2648
2649 1
            while ($i < $length) {
2650
                try {
2651 1
                    $rand_int = \random_int(0, $max_length - 1);
2652
                } catch (\Exception $e) {
2653
                    /** @noinspection RandomApiMigrationInspection */
2654
                    $rand_int = \mt_rand(0, $max_length - 1);
2655
                }
2656 1
                $char = \mb_substr($possible_chars, $rand_int, 1);
2657 1
                if ($char !== false) {
2658 1
                    $str .= $char;
2659 1
                    ++$i;
2660
                }
2661
            }
2662
        } else {
2663
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2664
2665
            $max_length = (int) self::strlen($possible_chars, $encoding);
2666
            if ($max_length === 0) {
2667
                return '';
2668
            }
2669
2670
            while ($i < $length) {
2671
                try {
2672
                    $rand_int = \random_int(0, $max_length - 1);
2673
                } catch (\Exception $e) {
2674
                    /** @noinspection RandomApiMigrationInspection */
2675
                    $rand_int = \mt_rand(0, $max_length - 1);
2676
                }
2677
                $char = self::substr($possible_chars, $rand_int, 1, $encoding);
2678
                if ($char !== false) {
2679
                    $str .= $char;
2680
                    ++$i;
2681
                }
2682
            }
2683
        }
2684
2685 1
        return $str;
2686
    }
2687
2688
    /**
2689
     * @param int|string $extra_entropy [optional] <p>Extra entropy via a string or int value.</p>
2690
     * @param bool       $use_md5       [optional] <p>Return the unique identifier as md5-hash? Default: true</p>
2691
     *
2692
     * @return string
2693
     */
2694 1
    public static function get_unique_string($extra_entropy = '', bool $use_md5 = true): string
2695
    {
2696
        try {
2697 1
            $rand_int = \random_int(0, \mt_getrandmax());
2698
        } catch (\Exception $e) {
2699
            /** @noinspection RandomApiMigrationInspection */
2700
            $rand_int = \mt_rand(0, \mt_getrandmax());
2701
        }
2702
2703
        $unique_helper = $rand_int .
2704 1
                         \session_id() .
2705 1
                         ($_SERVER['REMOTE_ADDR'] ?? '') .
2706 1
                         ($_SERVER['SERVER_ADDR'] ?? '') .
2707 1
                         $extra_entropy;
2708
2709 1
        $unique_string = \uniqid($unique_helper, true);
2710
2711 1
        if ($use_md5) {
2712 1
            $unique_string = \md5($unique_string . $unique_helper);
2713
        }
2714
2715 1
        return $unique_string;
2716
    }
2717
2718
    /**
2719
     * alias for "UTF8::string_has_bom()"
2720
     *
2721
     * @param string $str
2722
     *
2723
     * @psalm-pure
2724
     *
2725
     * @return bool
2726
     *
2727
     * @see        UTF8::string_has_bom()
2728
     * @deprecated <p>please use "UTF8::string_has_bom()"</p>
2729
     */
2730 2
    public static function hasBom(string $str): bool
2731
    {
2732 2
        return self::string_has_bom($str);
2733
    }
2734
2735
    /**
2736
     * Returns true if the string contains a lower case char, false otherwise.
2737
     *
2738
     * @param string $str <p>The input string.</p>
2739
     *
2740
     * @psalm-pure
2741
     *
2742
     * @return bool
2743
     *              <p>Whether or not the string contains a lower case character.</p>
2744
     */
2745 47
    public static function has_lowercase(string $str): bool
2746
    {
2747 47
        if (self::$SUPPORT['mbstring'] === true) {
2748
            /** @noinspection PhpComposerExtensionStubsInspection */
2749 47
            return \mb_ereg_match('.*[[:lower:]]', $str);
2750
        }
2751
2752
        return self::str_matches_pattern($str, '.*[[:lower:]]');
2753
    }
2754
2755
    /**
2756
     * Returns true if the string contains whitespace, false otherwise.
2757
     *
2758
     * @param string $str <p>The input string.</p>
2759
     *
2760
     * @psalm-pure
2761
     *
2762
     * @return bool
2763
     *              <p>Whether or not the string contains whitespace.</p>
2764
     */
2765 11
    public static function has_whitespace(string $str): bool
2766
    {
2767 11
        if (self::$SUPPORT['mbstring'] === true) {
2768
            /** @noinspection PhpComposerExtensionStubsInspection */
2769 11
            return \mb_ereg_match('.*[[:space:]]', $str);
2770
        }
2771
2772
        return self::str_matches_pattern($str, '.*[[:space:]]');
2773
    }
2774
2775
    /**
2776
     * Returns true if the string contains an upper case char, false otherwise.
2777
     *
2778
     * @param string $str <p>The input string.</p>
2779
     *
2780
     * @psalm-pure
2781
     *
2782
     * @return bool
2783
     *              <p>Whether or not the string contains an upper case character.</p>
2784
     */
2785 12
    public static function has_uppercase(string $str): bool
2786
    {
2787 12
        if (self::$SUPPORT['mbstring'] === true) {
2788
            /** @noinspection PhpComposerExtensionStubsInspection */
2789 12
            return \mb_ereg_match('.*[[:upper:]]', $str);
2790
        }
2791
2792
        return self::str_matches_pattern($str, '.*[[:upper:]]');
2793
    }
2794
2795
    /**
2796
     * Converts a hexadecimal value into a UTF-8 character.
2797
     *
2798
     * INFO: opposite to UTF8::chr_to_hex()
2799
     *
2800
     * EXAMPLE: <code>UTF8::hex_to_chr('U+00a7'); // '§'</code>
2801
     *
2802
     * @param string $hexdec <p>The hexadecimal value.</p>
2803
     *
2804
     * @psalm-pure
2805
     *
2806
     * @return false|string one single UTF-8 character
2807
     */
2808 4
    public static function hex_to_chr(string $hexdec)
2809
    {
2810
        /** @noinspection PhpUsageOfSilenceOperatorInspection - Invalid characters passed for attempted conversion, these have been ignored */
2811 4
        return self::decimal_to_chr((int) @\hexdec($hexdec));
2812
    }
2813
2814
    /**
2815
     * Converts hexadecimal U+xxxx code point representation to integer.
2816
     *
2817
     * INFO: opposite to UTF8::int_to_hex()
2818
     *
2819
     * EXAMPLE: <code>UTF8::hex_to_int('U+00f1'); // 241</code>
2820
     *
2821
     * @param string $hexdec <p>The hexadecimal code point representation.</p>
2822
     *
2823
     * @psalm-pure
2824
     *
2825
     * @return false|int
2826
     *                   <p>The code point, or false on failure.</p>
2827
     */
2828 2
    public static function hex_to_int($hexdec)
2829
    {
2830
        // init
2831 2
        $hexdec = (string) $hexdec;
2832
2833 2
        if ($hexdec === '') {
2834 2
            return false;
2835
        }
2836
2837 2
        if (\preg_match('/^(?:\\\u|U\+|)([a-zA-Z0-9]{4,6})$/', $hexdec, $match)) {
2838 2
            return \intval($match[1], 16);
2839
        }
2840
2841 2
        return false;
2842
    }
2843
2844
    /**
2845
     * alias for "UTF8::html_entity_decode()"
2846
     *
2847
     * @param string   $str
2848
     * @param int|null $flags
2849
     * @param string   $encoding
2850
     *
2851
     * @psalm-pure
2852
     *
2853
     * @return string
2854
     *
2855
     * @see        UTF8::html_entity_decode()
2856
     * @deprecated <p>please use "UTF8::html_entity_decode()"</p>
2857
     */
2858 2
    public static function html_decode(
2859
        string $str,
2860
        int $flags = null,
2861
        string $encoding = 'UTF-8'
2862
    ): string {
2863 2
        return self::html_entity_decode($str, $flags, $encoding);
2864
    }
2865
2866
    /**
2867
     * Converts a UTF-8 string to a series of HTML numbered entities.
2868
     *
2869
     * INFO: opposite to UTF8::html_decode()
2870
     *
2871
     * EXAMPLE: <code>UTF8::html_encode('中文空白'); // '&#20013;&#25991;&#31354;&#30333;'</code>
2872
     *
2873
     * @param string $str              <p>The Unicode string to be encoded as numbered entities.</p>
2874
     * @param bool   $keep_ascii_chars [optional] <p>Keep ASCII chars.</p>
2875
     * @param string $encoding         [optional] <p>Set the charset for e.g. "mb_" function</p>
2876
     *
2877
     * @psalm-pure
2878
     *
2879
     * @return string HTML numbered entities
2880
     */
2881 14
    public static function html_encode(
2882
        string $str,
2883
        bool $keep_ascii_chars = false,
2884
        string $encoding = 'UTF-8'
2885
    ): string {
2886 14
        if ($str === '') {
2887 4
            return '';
2888
        }
2889
2890 14
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2891 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2892
        }
2893
2894
        // INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2895 14
        if (self::$SUPPORT['mbstring'] === true) {
2896 14
            $start_code = 0x00;
2897 14
            if ($keep_ascii_chars) {
2898 13
                $start_code = 0x80;
2899
            }
2900
2901 14
            if ($encoding === 'UTF-8') {
2902
                /** @var false|string|null $return - needed for PhpStan (stubs error) */
2903 14
                $return = \mb_encode_numericentity(
2904 14
                    $str,
2905 14
                    [$start_code, 0xfffff, 0, 0xfffff, 0]
2906
                );
2907 14
                if ($return !== null && $return !== false) {
2908 14
                    return $return;
2909
                }
2910
            }
2911
2912
            /** @var false|string|null $return - needed for PhpStan (stubs error) */
2913 4
            $return = \mb_encode_numericentity(
2914 4
                $str,
2915 4
                [$start_code, 0xfffff, 0, 0xfffff, 0],
2916 4
                $encoding
2917
            );
2918 4
            if ($return !== null && $return !== false) {
2919 4
                return $return;
2920
            }
2921
        }
2922
2923
        //
2924
        // fallback via vanilla php
2925
        //
2926
2927
        return \implode(
2928
            '',
2929
            \array_map(
2930
                static function (string $chr) use ($keep_ascii_chars, $encoding): string {
2931
                    return self::single_chr_html_encode($chr, $keep_ascii_chars, $encoding);
2932
                },
2933
                self::str_split($str)
2934
            )
2935
        );
2936
    }
2937
2938
    /**
2939
     * UTF-8 version of html_entity_decode()
2940
     *
2941
     * The reason we are not using html_entity_decode() by itself is because
2942
     * while it is not technically correct to leave out the semicolon
2943
     * at the end of an entity most browsers will still interpret the entity
2944
     * correctly. html_entity_decode() does not convert entities without
2945
     * semicolons, so we are left with our own little solution here. Bummer.
2946
     *
2947
     * Convert all HTML entities to their applicable characters.
2948
     *
2949
     * INFO: opposite to UTF8::html_encode()
2950
     *
2951
     * EXAMPLE: <code>UTF8::html_entity_decode('&#20013;&#25991;&#31354;&#30333;'); // '中文空白'</code>
2952
     *
2953
     * @see http://php.net/manual/en/function.html-entity-decode.php
2954
     *
2955
     * @param string   $str      <p>
2956
     *                           The input string.
2957
     *                           </p>
2958
     * @param int|null $flags    [optional] <p>
2959
     *                           A bitmask of one or more of the following flags, which specify how to handle quotes
2960
     *                           and which document type to use. The default is ENT_COMPAT | ENT_HTML401.
2961
     *                           <table>
2962
     *                           Available <i>flags</i> constants
2963
     *                           <tr valign="top">
2964
     *                           <td>Constant Name</td>
2965
     *                           <td>Description</td>
2966
     *                           </tr>
2967
     *                           <tr valign="top">
2968
     *                           <td><b>ENT_COMPAT</b></td>
2969
     *                           <td>Will convert double-quotes and leave single-quotes alone.</td>
2970
     *                           </tr>
2971
     *                           <tr valign="top">
2972
     *                           <td><b>ENT_QUOTES</b></td>
2973
     *                           <td>Will convert both double and single quotes.</td>
2974
     *                           </tr>
2975
     *                           <tr valign="top">
2976
     *                           <td><b>ENT_NOQUOTES</b></td>
2977
     *                           <td>Will leave both double and single quotes unconverted.</td>
2978
     *                           </tr>
2979
     *                           <tr valign="top">
2980
     *                           <td><b>ENT_HTML401</b></td>
2981
     *                           <td>
2982
     *                           Handle code as HTML 4.01.
2983
     *                           </td>
2984
     *                           </tr>
2985
     *                           <tr valign="top">
2986
     *                           <td><b>ENT_XML1</b></td>
2987
     *                           <td>
2988
     *                           Handle code as XML 1.
2989
     *                           </td>
2990
     *                           </tr>
2991
     *                           <tr valign="top">
2992
     *                           <td><b>ENT_XHTML</b></td>
2993
     *                           <td>
2994
     *                           Handle code as XHTML.
2995
     *                           </td>
2996
     *                           </tr>
2997
     *                           <tr valign="top">
2998
     *                           <td><b>ENT_HTML5</b></td>
2999
     *                           <td>
3000
     *                           Handle code as HTML 5.
3001
     *                           </td>
3002
     *                           </tr>
3003
     *                           </table>
3004
     *                           </p>
3005
     * @param string   $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
3006
     *
3007
     * @psalm-pure
3008
     *
3009
     * @return string the decoded string
3010
     */
3011 51
    public static function html_entity_decode(
3012
        string $str,
3013
        int $flags = null,
3014
        string $encoding = 'UTF-8'
3015
    ): string {
3016
        if (
3017 51
            !isset($str[3]) // examples: &; || &x;
3018
            ||
3019 51
            \strpos($str, '&') === false // no "&"
3020
        ) {
3021 24
            return $str;
3022
        }
3023
3024 49
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
3025 9
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
3026
        }
3027
3028 49
        if ($flags === null) {
3029 11
            $flags = \ENT_QUOTES | \ENT_HTML5;
3030
        }
3031
3032
        /** @noinspection InArrayCanBeUsedInspection */
3033
        if (
3034 49
            $encoding !== 'UTF-8'
3035
            &&
3036 49
            $encoding !== 'ISO-8859-1'
3037
            &&
3038 49
            $encoding !== 'WINDOWS-1252'
3039
            &&
3040 49
            self::$SUPPORT['mbstring'] === false
3041
        ) {
3042
            /**
3043
             * @psalm-suppress ImpureFunctionCall - is is only a warning
3044
             */
3045
            \trigger_error('UTF8::html_entity_decode() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
3046
        }
3047
3048
        do {
3049 49
            $str_compare = $str;
3050
3051 49
            if (\strpos($str, '&') !== false) {
3052 49
                if (\strpos($str, '&#') !== false) {
3053
                    // decode also numeric & UTF16 two byte entities
3054 41
                    $str = (string) \preg_replace(
3055 41
                        '/(&#(?:x0*[0-9a-fA-F]{2,6}(?![0-9a-fA-F;])|(?:0*\d{2,6}(?![0-9;]))))/S',
3056 41
                        '$1;',
3057 41
                        $str
3058
                    );
3059
                }
3060
3061 49
                $str = \html_entity_decode(
3062 49
                    $str,
3063 49
                    $flags,
3064 49
                    $encoding
3065
                );
3066
            }
3067 49
        } while ($str_compare !== $str);
3068
3069 49
        return $str;
3070
    }
3071
3072
    /**
3073
     * Create a escape html version of the string via "UTF8::htmlspecialchars()".
3074
     *
3075
     * @param string $str
3076
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
3077
     *
3078
     * @psalm-pure
3079
     *
3080
     * @return string
3081
     */
3082 6
    public static function html_escape(string $str, string $encoding = 'UTF-8'): string
3083
    {
3084 6
        return self::htmlspecialchars(
3085 6
            $str,
3086 6
            \ENT_QUOTES | \ENT_SUBSTITUTE,
3087 6
            $encoding
3088
        );
3089
    }
3090
3091
    /**
3092
     * Remove empty html-tag.
3093
     *
3094
     * e.g.: <pre><tag></tag></pre>
3095
     *
3096
     * @param string $str
3097
     *
3098
     * @psalm-pure
3099
     *
3100
     * @return string
3101
     */
3102 1
    public static function html_stripe_empty_tags(string $str): string
3103
    {
3104 1
        return (string) \preg_replace(
3105 1
            '/<[^\\/>]*?>\\s*?<\\/[^>]*?>/u',
3106 1
            '',
3107 1
            $str
3108
        );
3109
    }
3110
3111
    /**
3112
     * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities().
3113
     *
3114
     * EXAMPLE: <code>UTF8::htmlentities('<白-öäü>'); // '&lt;&#30333;-&ouml;&auml;&uuml;&gt;'</code>
3115
     *
3116
     * @see http://php.net/manual/en/function.htmlentities.php
3117
     *
3118
     * @param string $str           <p>
3119
     *                              The input string.
3120
     *                              </p>
3121
     * @param int    $flags         [optional] <p>
3122
     *                              A bitmask of one or more of the following flags, which specify how to handle
3123
     *                              quotes, invalid code unit sequences and the used document type. The default is
3124
     *                              ENT_COMPAT | ENT_HTML401.
3125
     *                              <table>
3126
     *                              Available <i>flags</i> constants
3127
     *                              <tr valign="top">
3128
     *                              <td>Constant Name</td>
3129
     *                              <td>Description</td>
3130
     *                              </tr>
3131
     *                              <tr valign="top">
3132
     *                              <td><b>ENT_COMPAT</b></td>
3133
     *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
3134
     *                              </tr>
3135
     *                              <tr valign="top">
3136
     *                              <td><b>ENT_QUOTES</b></td>
3137
     *                              <td>Will convert both double and single quotes.</td>
3138
     *                              </tr>
3139
     *                              <tr valign="top">
3140
     *                              <td><b>ENT_NOQUOTES</b></td>
3141
     *                              <td>Will leave both double and single quotes unconverted.</td>
3142
     *                              </tr>
3143
     *                              <tr valign="top">
3144
     *                              <td><b>ENT_IGNORE</b></td>
3145
     *                              <td>
3146
     *                              Silently discard invalid code unit sequences instead of returning
3147
     *                              an empty string. Using this flag is discouraged as it
3148
     *                              may have security implications.
3149
     *                              </td>
3150
     *                              </tr>
3151
     *                              <tr valign="top">
3152
     *                              <td><b>ENT_SUBSTITUTE</b></td>
3153
     *                              <td>
3154
     *                              Replace invalid code unit sequences with a Unicode Replacement Character
3155
     *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty
3156
     *                              string.
3157
     *                              </td>
3158
     *                              </tr>
3159
     *                              <tr valign="top">
3160
     *                              <td><b>ENT_DISALLOWED</b></td>
3161
     *                              <td>
3162
     *                              Replace invalid code points for the given document type with a
3163
     *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
3164
     *                              (otherwise) instead of leaving them as is. This may be useful, for
3165
     *                              instance, to ensure the well-formedness of XML documents with
3166
     *                              embedded external content.
3167
     *                              </td>
3168
     *                              </tr>
3169
     *                              <tr valign="top">
3170
     *                              <td><b>ENT_HTML401</b></td>
3171
     *                              <td>
3172
     *                              Handle code as HTML 4.01.
3173
     *                              </td>
3174
     *                              </tr>
3175
     *                              <tr valign="top">
3176
     *                              <td><b>ENT_XML1</b></td>
3177
     *                              <td>
3178
     *                              Handle code as XML 1.
3179
     *                              </td>
3180
     *                              </tr>
3181
     *                              <tr valign="top">
3182
     *                              <td><b>ENT_XHTML</b></td>
3183
     *                              <td>
3184
     *                              Handle code as XHTML.
3185
     *                              </td>
3186
     *                              </tr>
3187
     *                              <tr valign="top">
3188
     *                              <td><b>ENT_HTML5</b></td>
3189
     *                              <td>
3190
     *                              Handle code as HTML 5.
3191
     *                              </td>
3192
     *                              </tr>
3193
     *                              </table>
3194
     *                              </p>
3195
     * @param string $encoding      [optional] <p>
3196
     *                              Like <b>htmlspecialchars</b>,
3197
     *                              <b>htmlentities</b> takes an optional third argument
3198
     *                              <i>encoding</i> which defines encoding used in
3199
     *                              conversion.
3200
     *                              Although this argument is technically optional, you are highly
3201
     *                              encouraged to specify the correct value for your code.
3202
     *                              </p>
3203
     * @param bool   $double_encode [optional] <p>
3204
     *                              When <i>double_encode</i> is turned off PHP will not
3205
     *                              encode existing html entities. The default is to convert everything.
3206
     *                              </p>
3207
     *
3208
     * @psalm-pure
3209
     *
3210
     * @return string
3211
     *                <p>
3212
     *                The encoded string.
3213
     *                <br><br>
3214
     *                If the input <i>string</i> contains an invalid code unit
3215
     *                sequence within the given <i>encoding</i> an empty string
3216
     *                will be returned, unless either the <b>ENT_IGNORE</b> or
3217
     *                <b>ENT_SUBSTITUTE</b> flags are set.
3218
     *                </p>
3219
     */
3220 9
    public static function htmlentities(
3221
        string $str,
3222
        int $flags = \ENT_COMPAT,
3223
        string $encoding = 'UTF-8',
3224
        bool $double_encode = true
3225
    ): string {
3226 9
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
3227 7
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
3228
        }
3229
3230 9
        $str = \htmlentities(
3231 9
            $str,
3232 9
            $flags,
3233 9
            $encoding,
3234 9
            $double_encode
3235
        );
3236
3237
        /**
3238
         * PHP doesn't replace a backslash to its html entity since this is something
3239
         * that's mostly used to escape characters when inserting in a database. Since
3240
         * we're using a decent database layer, we don't need this shit and we're replacing
3241
         * the double backslashes by its' html entity equivalent.
3242
         *
3243
         * https://github.com/forkcms/library/blob/master/spoon/filter/filter.php#L303
3244
         */
3245 9
        $str = \str_replace('\\', '&#92;', $str);
3246
3247 9
        return self::html_encode($str, true, $encoding);
3248
    }
3249
3250
    /**
3251
     * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
3252
     *
3253
     * INFO: Take a look at "UTF8::htmlentities()"
3254
     *
3255
     * EXAMPLE: <code>UTF8::htmlspecialchars('<白-öäü>'); // '&lt;白-öäü&gt;'</code>
3256
     *
3257
     * @see http://php.net/manual/en/function.htmlspecialchars.php
3258
     *
3259
     * @param string $str           <p>
3260
     *                              The string being converted.
3261
     *                              </p>
3262
     * @param int    $flags         [optional] <p>
3263
     *                              A bitmask of one or more of the following flags, which specify how to handle
3264
     *                              quotes, invalid code unit sequences and the used document type. The default is
3265
     *                              ENT_COMPAT | ENT_HTML401.
3266
     *                              <table>
3267
     *                              Available <i>flags</i> constants
3268
     *                              <tr valign="top">
3269
     *                              <td>Constant Name</td>
3270
     *                              <td>Description</td>
3271
     *                              </tr>
3272
     *                              <tr valign="top">
3273
     *                              <td><b>ENT_COMPAT</b></td>
3274
     *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
3275
     *                              </tr>
3276
     *                              <tr valign="top">
3277
     *                              <td><b>ENT_QUOTES</b></td>
3278
     *                              <td>Will convert both double and single quotes.</td>
3279
     *                              </tr>
3280
     *                              <tr valign="top">
3281
     *                              <td><b>ENT_NOQUOTES</b></td>
3282
     *                              <td>Will leave both double and single quotes unconverted.</td>
3283
     *                              </tr>
3284
     *                              <tr valign="top">
3285
     *                              <td><b>ENT_IGNORE</b></td>
3286
     *                              <td>
3287
     *                              Silently discard invalid code unit sequences instead of returning
3288
     *                              an empty string. Using this flag is discouraged as it
3289
     *                              may have security implications.
3290
     *                              </td>
3291
     *                              </tr>
3292
     *                              <tr valign="top">
3293
     *                              <td><b>ENT_SUBSTITUTE</b></td>
3294
     *                              <td>
3295
     *                              Replace invalid code unit sequences with a Unicode Replacement Character
3296
     *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty
3297
     *                              string.
3298
     *                              </td>
3299
     *                              </tr>
3300
     *                              <tr valign="top">
3301
     *                              <td><b>ENT_DISALLOWED</b></td>
3302
     *                              <td>
3303
     *                              Replace invalid code points for the given document type with a
3304
     *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
3305
     *                              (otherwise) instead of leaving them as is. This may be useful, for
3306
     *                              instance, to ensure the well-formedness of XML documents with
3307
     *                              embedded external content.
3308
     *                              </td>
3309
     *                              </tr>
3310
     *                              <tr valign="top">
3311
     *                              <td><b>ENT_HTML401</b></td>
3312
     *                              <td>
3313
     *                              Handle code as HTML 4.01.
3314
     *                              </td>
3315
     *                              </tr>
3316
     *                              <tr valign="top">
3317
     *                              <td><b>ENT_XML1</b></td>
3318
     *                              <td>
3319
     *                              Handle code as XML 1.
3320
     *                              </td>
3321
     *                              </tr>
3322
     *                              <tr valign="top">
3323
     *                              <td><b>ENT_XHTML</b></td>
3324
     *                              <td>
3325
     *                              Handle code as XHTML.
3326
     *                              </td>
3327
     *                              </tr>
3328
     *                              <tr valign="top">
3329
     *                              <td><b>ENT_HTML5</b></td>
3330
     *                              <td>
3331
     *                              Handle code as HTML 5.
3332
     *                              </td>
3333
     *                              </tr>
3334
     *                              </table>
3335
     *                              </p>
3336
     * @param string $encoding      [optional] <p>
3337
     *                              Defines encoding used in conversion.
3338
     *                              </p>
3339
     *                              <p>
3340
     *                              For the purposes of this function, the encodings
3341
     *                              ISO-8859-1, ISO-8859-15,
3342
     *                              UTF-8, cp866,
3343
     *                              cp1251, cp1252, and
3344
     *                              KOI8-R are effectively equivalent, provided the
3345
     *                              <i>string</i> itself is valid for the encoding, as
3346
     *                              the characters affected by <b>htmlspecialchars</b> occupy
3347
     *                              the same positions in all of these encodings.
3348
     *                              </p>
3349
     * @param bool   $double_encode [optional] <p>
3350
     *                              When <i>double_encode</i> is turned off PHP will not
3351
     *                              encode existing html entities, the default is to convert everything.
3352
     *                              </p>
3353
     *
3354
     * @psalm-pure
3355
     *
3356
     * @return string the converted string.
3357
     *                </p>
3358
     *                <p>
3359
     *                If the input <i>string</i> contains an invalid code unit
3360
     *                sequence within the given <i>encoding</i> an empty string
3361
     *                will be returned, unless either the <b>ENT_IGNORE</b> or
3362
     *                <b>ENT_SUBSTITUTE</b> flags are set
3363
     */
3364 8
    public static function htmlspecialchars(
3365
        string $str,
3366
        int $flags = \ENT_COMPAT,
3367
        string $encoding = 'UTF-8',
3368
        bool $double_encode = true
3369
    ): string {
3370 8
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
3371 8
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
3372
        }
3373
3374 8
        return \htmlspecialchars(
3375 8
            $str,
3376 8
            $flags,
3377 8
            $encoding,
3378 8
            $double_encode
3379
        );
3380
    }
3381
3382
    /**
3383
     * Checks whether iconv is available on the server.
3384
     *
3385
     * @psalm-pure
3386
     *
3387
     * @return bool
3388
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
3389
     */
3390
    public static function iconv_loaded(): bool
3391
    {
3392
        return \extension_loaded('iconv');
3393
    }
3394
3395
    /**
3396
     * alias for "UTF8::decimal_to_chr()"
3397
     *
3398
     * @param int|string $int
3399
     *
3400
     * @psalm-param int|numeric-string $int
3401
     *
3402
     * @psalm-pure
3403
     *
3404
     * @return string
3405
     *
3406
     * @see        UTF8::decimal_to_chr()
3407
     * @deprecated <p>please use "UTF8::decimal_to_chr()"</p>
3408
     */
3409 4
    public static function int_to_chr($int): string
3410
    {
3411 4
        return self::decimal_to_chr($int);
3412
    }
3413
3414
    /**
3415
     * Converts Integer to hexadecimal U+xxxx code point representation.
3416
     *
3417
     * INFO: opposite to UTF8::hex_to_int()
3418
     *
3419
     * EXAMPLE: <code>UTF8::int_to_hex(241); // 'U+00f1'</code>
3420
     *
3421
     * @param int    $int    <p>The integer to be converted to hexadecimal code point.</p>
3422
     * @param string $prefix [optional]
3423
     *
3424
     * @psalm-pure
3425
     *
3426
     * @return string the code point, or empty string on failure
3427
     */
3428 6
    public static function int_to_hex(int $int, string $prefix = 'U+'): string
3429
    {
3430 6
        $hex = \dechex($int);
3431
3432 6
        $hex = (\strlen($hex) < 4 ? \substr('0000' . $hex, -4) : $hex);
3433
3434 6
        return $prefix . $hex . '';
3435
    }
3436
3437
    /**
3438
     * Checks whether intl-char is available on the server.
3439
     *
3440
     * @psalm-pure
3441
     *
3442
     * @return bool
3443
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
3444
     */
3445
    public static function intlChar_loaded(): bool
3446
    {
3447
        return \class_exists('IntlChar');
3448
    }
3449
3450
    /**
3451
     * Checks whether intl is available on the server.
3452
     *
3453
     * @psalm-pure
3454
     *
3455
     * @return bool
3456
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
3457
     */
3458 5
    public static function intl_loaded(): bool
3459
    {
3460 5
        return \extension_loaded('intl');
3461
    }
3462
3463
    /**
3464
     * alias for "UTF8::is_ascii()"
3465
     *
3466
     * @param string $str
3467
     *
3468
     * @psalm-pure
3469
     *
3470
     * @return bool
3471
     *
3472
     * @see        UTF8::is_ascii()
3473
     * @deprecated <p>please use "UTF8::is_ascii()"</p>
3474
     */
3475 2
    public static function isAscii(string $str): bool
3476
    {
3477 2
        return ASCII::is_ascii($str);
3478
    }
3479
3480
    /**
3481
     * alias for "UTF8::is_base64()"
3482
     *
3483
     * @param string $str
3484
     *
3485
     * @psalm-pure
3486
     *
3487
     * @return bool
3488
     *
3489
     * @see        UTF8::is_base64()
3490
     * @deprecated <p>please use "UTF8::is_base64()"</p>
3491
     */
3492 2
    public static function isBase64($str): bool
3493
    {
3494 2
        return self::is_base64($str);
3495
    }
3496
3497
    /**
3498
     * alias for "UTF8::is_binary()"
3499
     *
3500
     * @param int|string $str
3501
     * @param bool       $strict
3502
     *
3503
     * @psalm-pure
3504
     *
3505
     * @return bool
3506
     *
3507
     * @see        UTF8::is_binary()
3508
     * @deprecated <p>please use "UTF8::is_binary()"</p>
3509
     */
3510 4
    public static function isBinary($str, bool $strict = false): bool
3511
    {
3512 4
        return self::is_binary($str, $strict);
3513
    }
3514
3515
    /**
3516
     * alias for "UTF8::is_bom()"
3517
     *
3518
     * @param string $utf8_chr
3519
     *
3520
     * @psalm-pure
3521
     *
3522
     * @return bool
3523
     *
3524
     * @see        UTF8::is_bom()
3525
     * @deprecated <p>please use "UTF8::is_bom()"</p>
3526
     */
3527 2
    public static function isBom(string $utf8_chr): bool
3528
    {
3529 2
        return self::is_bom($utf8_chr);
3530
    }
3531
3532
    /**
3533
     * alias for "UTF8::is_html()"
3534
     *
3535
     * @param string $str
3536
     *
3537
     * @psalm-pure
3538
     *
3539
     * @return bool
3540
     *
3541
     * @see        UTF8::is_html()
3542
     * @deprecated <p>please use "UTF8::is_html()"</p>
3543
     */
3544 2
    public static function isHtml(string $str): bool
3545
    {
3546 2
        return self::is_html($str);
3547
    }
3548
3549
    /**
3550
     * alias for "UTF8::is_json()"
3551
     *
3552
     * @param string $str
3553
     *
3554
     * @return bool
3555
     *
3556
     * @see        UTF8::is_json()
3557
     * @deprecated <p>please use "UTF8::is_json()"</p>
3558
     */
3559 1
    public static function isJson(string $str): bool
3560
    {
3561 1
        return self::is_json($str);
3562
    }
3563
3564
    /**
3565
     * alias for "UTF8::is_utf16()"
3566
     *
3567
     * @param string $str
3568
     *
3569
     * @psalm-pure
3570
     *
3571
     * @return false|int
3572
     *                   <strong>false</strong> if is't not UTF16,<br>
3573
     *                   <strong>1</strong> for UTF-16LE,<br>
3574
     *                   <strong>2</strong> for UTF-16BE
3575
     *
3576
     * @see        UTF8::is_utf16()
3577
     * @deprecated <p>please use "UTF8::is_utf16()"</p>
3578
     */
3579 2
    public static function isUtf16($str)
3580
    {
3581 2
        return self::is_utf16($str);
3582
    }
3583
3584
    /**
3585
     * alias for "UTF8::is_utf32()"
3586
     *
3587
     * @param string $str
3588
     *
3589
     * @psalm-pure
3590
     *
3591
     * @return false|int
3592
     *                   <strong>false</strong> if is't not UTF16,
3593
     *                   <strong>1</strong> for UTF-32LE,
3594
     *                   <strong>2</strong> for UTF-32BE
3595
     *
3596
     * @see        UTF8::is_utf32()
3597
     * @deprecated <p>please use "UTF8::is_utf32()"</p>
3598
     */
3599 2
    public static function isUtf32($str)
3600
    {
3601 2
        return self::is_utf32($str);
3602
    }
3603
3604
    /**
3605
     * alias for "UTF8::is_utf8()"
3606
     *
3607
     * @param string $str
3608
     * @param bool   $strict
3609
     *
3610
     * @psalm-pure
3611
     *
3612
     * @return bool
3613
     *
3614
     * @see        UTF8::is_utf8()
3615
     * @deprecated <p>please use "UTF8::is_utf8()"</p>
3616
     */
3617 17
    public static function isUtf8($str, bool $strict = false): bool
3618
    {
3619 17
        return self::is_utf8($str, $strict);
3620
    }
3621
3622
    /**
3623
     * Returns true if the string contains only alphabetic chars, false otherwise.
3624
     *
3625
     * @param string $str <p>The input string.</p>
3626
     *
3627
     * @psalm-pure
3628
     *
3629
     * @return bool
3630
     *              <p>Whether or not $str contains only alphabetic chars.</p>
3631
     */
3632 10
    public static function is_alpha(string $str): bool
3633
    {
3634 10
        if (self::$SUPPORT['mbstring'] === true) {
3635
            /** @noinspection PhpComposerExtensionStubsInspection */
3636 10
            return \mb_ereg_match('^[[:alpha:]]*$', $str);
3637
        }
3638
3639
        return self::str_matches_pattern($str, '^[[:alpha:]]*$');
3640
    }
3641
3642
    /**
3643
     * Returns true if the string contains only alphabetic and numeric chars, false otherwise.
3644
     *
3645
     * @param string $str <p>The input string.</p>
3646
     *
3647
     * @psalm-pure
3648
     *
3649
     * @return bool
3650
     *              <p>Whether or not $str contains only alphanumeric chars.</p>
3651
     */
3652 13
    public static function is_alphanumeric(string $str): bool
3653
    {
3654 13
        if (self::$SUPPORT['mbstring'] === true) {
3655
            /** @noinspection PhpComposerExtensionStubsInspection */
3656 13
            return \mb_ereg_match('^[[:alnum:]]*$', $str);
3657
        }
3658
3659
        return self::str_matches_pattern($str, '^[[:alnum:]]*$');
3660
    }
3661
3662
    /**
3663
     * Returns true if the string contains only punctuation chars, false otherwise.
3664
     *
3665
     * @param string $str <p>The input string.</p>
3666
     *
3667
     * @psalm-pure
3668
     *
3669
     * @return bool
3670
     *              <p>Whether or not $str contains only punctuation chars.</p>
3671
     */
3672 10
    public static function is_punctuation(string $str): bool
3673
    {
3674 10
        return self::str_matches_pattern($str, '^[[:punct:]]*$');
3675
    }
3676
3677
    /**
3678
     * Returns true if the string contains only printable (non-invisible) chars, false otherwise.
3679
     *
3680
     * @param string $str <p>The input string.</p>
3681
     *
3682
     * @psalm-pure
3683
     *
3684
     * @return bool
3685
     *              <p>Whether or not $str contains only printable (non-invisible) chars.</p>
3686
     */
3687 1
    public static function is_printable(string $str): bool
3688
    {
3689 1
        return self::remove_invisible_characters($str) === $str;
3690
    }
3691
3692
    /**
3693
     * Checks if a string is 7 bit ASCII.
3694
     *
3695
     * EXAMPLE: <code>UTF8::is_ascii('白'); // false</code>
3696
     *
3697
     * @param string $str <p>The string to check.</p>
3698
     *
3699
     * @psalm-pure
3700
     *
3701
     * @return bool
3702
     *              <p>
3703
     *              <strong>true</strong> if it is ASCII<br>
3704
     *              <strong>false</strong> otherwise
3705
     *              </p>
3706
     */
3707 8
    public static function is_ascii(string $str): bool
3708
    {
3709 8
        return ASCII::is_ascii($str);
3710
    }
3711
3712
    /**
3713
     * Returns true if the string is base64 encoded, false otherwise.
3714
     *
3715
     * EXAMPLE: <code>UTF8::is_base64('4KSu4KWL4KSo4KS/4KSa'); // true</code>
3716
     *
3717
     * @param string|null $str                   <p>The input string.</p>
3718
     * @param bool        $empty_string_is_valid [optional] <p>Is an empty string valid base64 or not?</p>
3719
     *
3720
     * @psalm-pure
3721
     *
3722
     * @return bool
3723
     *              <p>Whether or not $str is base64 encoded.</p>
3724
     */
3725 16
    public static function is_base64($str, bool $empty_string_is_valid = false): bool
3726
    {
3727
        if (
3728 16
            !$empty_string_is_valid
3729
            &&
3730 16
            $str === ''
3731
        ) {
3732 3
            return false;
3733
        }
3734
3735 15
        if (!\is_string($str)) {
3736 2
            return false;
3737
        }
3738
3739 15
        $base64String = \base64_decode($str, true);
3740
3741 15
        return $base64String !== false && \base64_encode($base64String) === $str;
3742
    }
3743
3744
    /**
3745
     * Check if the input is binary... (is look like a hack).
3746
     *
3747
     * EXAMPLE: <code>UTF8::is_binary(01); // true</code>
3748
     *
3749
     * @param int|string $input
3750
     * @param bool       $strict
3751
     *
3752
     * @psalm-pure
3753
     *
3754
     * @return bool
3755
     */
3756 40
    public static function is_binary($input, bool $strict = false): bool
3757
    {
3758 40
        $input = (string) $input;
3759 40
        if ($input === '') {
3760 10
            return false;
3761
        }
3762
3763 40
        if (\preg_match('~^[01]+$~', $input)) {
3764 13
            return true;
3765
        }
3766
3767 40
        $ext = self::get_file_type($input);
3768 40
        if ($ext['type'] === 'binary') {
3769 7
            return true;
3770
        }
3771
3772 39
        $test_length = \strlen($input);
3773 39
        $test_null_counting = \substr_count($input, "\x0", 0, $test_length);
3774 39
        if (($test_null_counting / $test_length) > 0.25) {
3775 15
            return true;
3776
        }
3777
3778 35
        if ($strict) {
3779 35
            if (self::$SUPPORT['finfo'] === false) {
3780
                throw new \RuntimeException('ext-fileinfo: is not installed');
3781
            }
3782
3783
            /**
3784
             * @noinspection   PhpComposerExtensionStubsInspection
3785
             * @psalm-suppress ImpureMethodCall - it will return the same result for the same file ...
3786
             */
3787 35
            $finfo_encoding = (new \finfo(\FILEINFO_MIME_ENCODING))->buffer($input);
3788 35
            if ($finfo_encoding && $finfo_encoding === 'binary') {
3789 15
                return true;
3790
            }
3791
        }
3792
3793 31
        return false;
3794
    }
3795
3796
    /**
3797
     * Check if the file is binary.
3798
     *
3799
     * EXAMPLE: <code>UTF8::is_binary('./utf32.txt'); // true</code>
3800
     *
3801
     * @param string $file
3802
     *
3803
     * @return bool
3804
     */
3805 6
    public static function is_binary_file($file): bool
3806
    {
3807
        // init
3808 6
        $block = '';
3809
3810 6
        $fp = \fopen($file, 'rb');
3811 6
        if (\is_resource($fp)) {
3812 6
            $block = \fread($fp, 512);
3813 6
            \fclose($fp);
3814
        }
3815
3816 6
        if ($block === '' || $block === false) {
3817 2
            return false;
3818
        }
3819
3820 6
        return self::is_binary($block, true);
3821
    }
3822
3823
    /**
3824
     * Returns true if the string contains only whitespace chars, false otherwise.
3825
     *
3826
     * @param string $str <p>The input string.</p>
3827
     *
3828
     * @psalm-pure
3829
     *
3830
     * @return bool
3831
     *              <p>Whether or not $str contains only whitespace characters.</p>
3832
     */
3833 15
    public static function is_blank(string $str): bool
3834
    {
3835 15
        if (self::$SUPPORT['mbstring'] === true) {
3836
            /** @noinspection PhpComposerExtensionStubsInspection */
3837 15
            return \mb_ereg_match('^[[:space:]]*$', $str);
3838
        }
3839
3840
        return self::str_matches_pattern($str, '^[[:space:]]*$');
3841
    }
3842
3843
    /**
3844
     * Checks if the given string is equal to any "Byte Order Mark".
3845
     *
3846
     * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
3847
     *
3848
     * EXAMPLE: <code>UTF8::is_bom("\xef\xbb\xbf"); // true</code>
3849
     *
3850
     * @param string $str <p>The input string.</p>
3851
     *
3852
     * @psalm-pure
3853
     *
3854
     * @return bool
3855
     *              <p><strong>true</strong> if the $utf8_chr is Byte Order Mark, <strong>false</strong> otherwise.</p>
3856
     */
3857 2
    public static function is_bom($str): bool
3858
    {
3859
        /** @noinspection PhpUnusedLocalVariableInspection */
3860 2
        foreach (self::$BOM as $bom_string => &$bom_byte_length) {
3861 2
            if ($str === $bom_string) {
3862 2
                return true;
3863
            }
3864
        }
3865
3866 2
        return false;
3867
    }
3868
3869
    /**
3870
     * Determine whether the string is considered to be empty.
3871
     *
3872
     * A variable is considered empty if it does not exist or if its value equals FALSE.
3873
     * empty() does not generate a warning if the variable does not exist.
3874
     *
3875
     * @param array|float|int|string $str
3876
     *
3877
     * @psalm-pure
3878
     *
3879
     * @return bool
3880
     *              <p>Whether or not $str is empty().</p>
3881
     */
3882 1
    public static function is_empty($str): bool
3883
    {
3884 1
        return empty($str);
3885
    }
3886
3887
    /**
3888
     * Returns true if the string contains only hexadecimal chars, false otherwise.
3889
     *
3890
     * @param string $str <p>The input string.</p>
3891
     *
3892
     * @psalm-pure
3893
     *
3894
     * @return bool
3895
     *              <p>Whether or not $str contains only hexadecimal chars.</p>
3896
     */
3897 13
    public static function is_hexadecimal(string $str): bool
3898
    {
3899 13
        if (self::$SUPPORT['mbstring'] === true) {
3900
            /** @noinspection PhpComposerExtensionStubsInspection */
3901 13
            return \mb_ereg_match('^[[:xdigit:]]*$', $str);
3902
        }
3903
3904
        return self::str_matches_pattern($str, '^[[:xdigit:]]*$');
3905
    }
3906
3907
    /**
3908
     * Check if the string contains any HTML tags.
3909
     *
3910
     * EXAMPLE: <code>UTF8::is_html('<b>lall</b>'); // true</code>
3911
     *
3912
     * @param string $str <p>The input string.</p>
3913
     *
3914
     * @psalm-pure
3915
     *
3916
     * @return bool
3917
     *              <p>Whether or not $str contains html elements.</p>
3918
     */
3919 3
    public static function is_html(string $str): bool
3920
    {
3921 3
        if ($str === '') {
3922 3
            return false;
3923
        }
3924
3925
        // init
3926 3
        $matches = [];
3927
3928 3
        $str = self::emoji_encode($str); // hack for emoji support :/
3929
3930 3
        \preg_match("/<\\/?\\w+(?:(?:\\s+\\w+(?:\\s*=\\s*(?:\".*?\"|'.*?'|[^'\">\\s]+))?)*\\s*|\\s*)\\/?>/u", $str, $matches);
3931
3932 3
        return $matches !== [];
3933
    }
3934
3935
    /**
3936
     * Check if $url is an correct url.
3937
     *
3938
     * @param string $url
3939
     * @param bool   $disallow_localhost
3940
     *
3941
     * @psalm-pure
3942
     *
3943
     * @return bool
3944
     */
3945 1
    public static function is_url(string $url, bool $disallow_localhost = false): bool
3946
    {
3947 1
        if ($url === '') {
3948 1
            return false;
3949
        }
3950
3951
        // WARNING: keep this as hack protection
3952 1
        if (!self::str_istarts_with_any($url, ['http://', 'https://'])) {
3953 1
            return false;
3954
        }
3955
3956
        // e.g. -> the server itself connect to "https://foo.localhost/phpmyadmin/...
3957 1
        if ($disallow_localhost) {
3958 1
            if (self::str_istarts_with_any(
3959 1
                $url,
3960
                [
3961 1
                    'http://localhost',
3962
                    'https://localhost',
3963
                    'http://127.0.0.1',
3964
                    'https://127.0.0.1',
3965
                    'http://::1',
3966
                    'https://::1',
3967
                ]
3968
            )) {
3969 1
                return false;
3970
            }
3971
3972 1
            $regex = '/^(?:http(?:s)?:\/\/).*?(?:\.localhost)/iu';
3973
            /** @noinspection BypassedUrlValidationInspection */
3974 1
            if (\preg_match($regex, $url)) {
3975 1
                return false;
3976
            }
3977
        }
3978
3979
        // INFO: this is needed for e.g. "http://müller.de/" (internationalized domain names) and non ASCII-parameters
3980
        /** @noinspection SuspiciousAssignmentsInspection - false-positive - https://github.com/kalessil/phpinspectionsea/issues/1500 */
3981 1
        $regex = '/^(?:http(?:s)?:\\/\\/)(?:[\p{L}0-9][\p{L}0-9_-]*(?:\\.[\p{L}0-9][\p{L}0-9_-]*))(?:\\d+)?(?:\\/\\.*)?/iu';
3982
        /** @noinspection BypassedUrlValidationInspection */
3983 1
        if (\preg_match($regex, $url)) {
3984 1
            return true;
3985
        }
3986
3987
        /** @noinspection BypassedUrlValidationInspection */
3988 1
        return \filter_var($url, \FILTER_VALIDATE_URL) !== false;
3989
    }
3990
3991
    /**
3992
     * Try to check if "$str" is a JSON-string.
3993
     *
3994
     * EXAMPLE: <code>UTF8::is_json('{"array":[1,"¥","ä"]}'); // true</code>
3995
     *
3996
     * @param string $str                                    <p>The input string.</p>
3997
     * @param bool   $only_array_or_object_results_are_valid [optional] <p>Only array and objects are valid json
3998
     *                                                       results.</p>
3999
     *
4000
     * @return bool
4001
     *              <p>Whether or not the $str is in JSON format.</p>
4002
     */
4003 42
    public static function is_json(string $str, bool $only_array_or_object_results_are_valid = true): bool
4004
    {
4005 42
        if ($str === '') {
4006 4
            return false;
4007
        }
4008
4009 40
        if (self::$SUPPORT['json'] === false) {
4010
            throw new \RuntimeException('ext-json: is not installed');
4011
        }
4012
4013 40
        $jsonOrNull = self::json_decode($str);
4014 40
        if ($jsonOrNull === null && \strtoupper($str) !== 'NULL') {
4015 18
            return false;
4016
        }
4017
4018
        if (
4019 24
            $only_array_or_object_results_are_valid
4020
            &&
4021 24
            !\is_object($jsonOrNull)
4022
            &&
4023 24
            !\is_array($jsonOrNull)
4024
        ) {
4025 5
            return false;
4026
        }
4027
4028
        /** @noinspection PhpComposerExtensionStubsInspection */
4029 19
        return \json_last_error() === \JSON_ERROR_NONE;
4030
    }
4031
4032
    /**
4033
     * @param string $str <p>The input string.</p>
4034
     *
4035
     * @psalm-pure
4036
     *
4037
     * @return bool
4038
     *              <p>Whether or not $str contains only lowercase chars.</p>
4039
     */
4040 8
    public static function is_lowercase(string $str): bool
4041
    {
4042 8
        if (self::$SUPPORT['mbstring'] === true) {
4043
            /** @noinspection PhpComposerExtensionStubsInspection */
4044 8
            return \mb_ereg_match('^[[:lower:]]*$', $str);
4045
        }
4046
4047
        return self::str_matches_pattern($str, '^[[:lower:]]*$');
4048
    }
4049
4050
    /**
4051
     * Returns true if the string is serialized, false otherwise.
4052
     *
4053
     * @param string $str <p>The input string.</p>
4054
     *
4055
     * @psalm-pure
4056
     *
4057
     * @return bool
4058
     *              <p>Whether or not $str is serialized.</p>
4059
     */
4060 7
    public static function is_serialized(string $str): bool
4061
    {
4062 7
        if ($str === '') {
4063 1
            return false;
4064
        }
4065
4066
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
4067
        /** @noinspection UnserializeExploitsInspection */
4068 6
        return $str === 'b:0;'
4069
               ||
4070 6
               @\unserialize($str) !== false;
4071
    }
4072
4073
    /**
4074
     * Returns true if the string contains only lower case chars, false
4075
     * otherwise.
4076
     *
4077
     * @param string $str <p>The input string.</p>
4078
     *
4079
     * @psalm-pure
4080
     *
4081
     * @return bool
4082
     *              <p>Whether or not $str contains only lower case characters.</p>
4083
     */
4084 8
    public static function is_uppercase(string $str): bool
4085
    {
4086 8
        if (self::$SUPPORT['mbstring'] === true) {
4087
            /** @noinspection PhpComposerExtensionStubsInspection */
4088 8
            return \mb_ereg_match('^[[:upper:]]*$', $str);
4089
        }
4090
4091
        return self::str_matches_pattern($str, '^[[:upper:]]*$');
4092
    }
4093
4094
    /**
4095
     * Check if the string is UTF-16.
4096
     *
4097
     * EXAMPLE: <code>
4098
     * UTF8::is_utf16(file_get_contents('utf-16-le.txt')); // 1
4099
     * //
4100
     * UTF8::is_utf16(file_get_contents('utf-16-be.txt')); // 2
4101
     * //
4102
     * UTF8::is_utf16(file_get_contents('utf-8.txt')); // false
4103
     * </code>
4104
     *
4105
     * @param string $str                       <p>The input string.</p>
4106
     * @param bool   $check_if_string_is_binary
4107
     *
4108
     * @psalm-pure
4109
     *
4110
     * @return false|int
4111
     *                   <strong>false</strong> if is't not UTF-16,<br>
4112
     *                   <strong>1</strong> for UTF-16LE,<br>
4113
     *                   <strong>2</strong> for UTF-16BE
4114
     */
4115 22
    public static function is_utf16($str, bool $check_if_string_is_binary = true)
4116
    {
4117
        // init
4118 22
        $str = (string) $str;
4119 22
        $str_chars = [];
4120
4121
        if (
4122 22
            $check_if_string_is_binary
4123
            &&
4124 22
            !self::is_binary($str, true)
4125
        ) {
4126 2
            return false;
4127
        }
4128
4129 22
        if (self::$SUPPORT['mbstring'] === false) {
4130
            /**
4131
             * @psalm-suppress ImpureFunctionCall - is is only a warning
4132
             */
4133 3
            \trigger_error('UTF8::is_utf16() without mbstring may did not work correctly', \E_USER_WARNING);
4134
        }
4135
4136 22
        $str = self::remove_bom($str);
4137
4138 22
        $maybe_utf16le = 0;
4139 22
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
4140 22
        if ($test) {
4141 15
            $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
4142 15
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
4143 15
            if ($test3 === $test) {
4144
                /**
4145
                 * @psalm-suppress RedundantCondition
4146
                 */
4147 15
                if ($str_chars === []) {
4148 15
                    $str_chars = self::count_chars($str, true, false);
4149
                }
4150 15
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
4151 15
                    if (\in_array($test3char, $str_chars, true)) {
4152 15
                        ++$maybe_utf16le;
4153
                    }
4154
                }
4155 15
                unset($test3charEmpty);
4156
            }
4157
        }
4158
4159 22
        $maybe_utf16be = 0;
4160 22
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
4161 22
        if ($test) {
4162 15
            $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
4163 15
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
4164 15
            if ($test3 === $test) {
4165 15
                if ($str_chars === []) {
4166 7
                    $str_chars = self::count_chars($str, true, false);
4167
                }
4168 15
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
4169 15
                    if (\in_array($test3char, $str_chars, true)) {
4170 15
                        ++$maybe_utf16be;
4171
                    }
4172
                }
4173 15
                unset($test3charEmpty);
4174
            }
4175
        }
4176
4177 22
        if ($maybe_utf16be !== $maybe_utf16le) {
4178 7
            if ($maybe_utf16le > $maybe_utf16be) {
4179 5
                return 1;
4180
            }
4181
4182 6
            return 2;
4183
        }
4184
4185 18
        return false;
4186
    }
4187
4188
    /**
4189
     * Check if the string is UTF-32.
4190
     *
4191
     * EXAMPLE: <code>
4192
     * UTF8::is_utf32(file_get_contents('utf-32-le.txt')); // 1
4193
     * //
4194
     * UTF8::is_utf32(file_get_contents('utf-32-be.txt')); // 2
4195
     * //
4196
     * UTF8::is_utf32(file_get_contents('utf-8.txt')); // false
4197
     * </code>
4198
     *
4199
     * @param string $str                       <p>The input string.</p>
4200
     * @param bool   $check_if_string_is_binary
4201
     *
4202
     * @psalm-pure
4203
     *
4204
     * @return false|int
4205
     *                   <strong>false</strong> if is't not UTF-32,<br>
4206
     *                   <strong>1</strong> for UTF-32LE,<br>
4207
     *                   <strong>2</strong> for UTF-32BE
4208
     */
4209 20
    public static function is_utf32($str, bool $check_if_string_is_binary = true)
4210
    {
4211
        // init
4212 20
        $str = (string) $str;
4213 20
        $str_chars = [];
4214
4215
        if (
4216 20
            $check_if_string_is_binary
4217
            &&
4218 20
            !self::is_binary($str, true)
4219
        ) {
4220 2
            return false;
4221
        }
4222
4223 20
        if (self::$SUPPORT['mbstring'] === false) {
4224
            /**
4225
             * @psalm-suppress ImpureFunctionCall - is is only a warning
4226
             */
4227 3
            \trigger_error('UTF8::is_utf32() without mbstring may did not work correctly', \E_USER_WARNING);
4228
        }
4229
4230 20
        $str = self::remove_bom($str);
4231
4232 20
        $maybe_utf32le = 0;
4233 20
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
4234 20
        if ($test) {
4235 13
            $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
4236 13
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
4237 13
            if ($test3 === $test) {
4238
                /**
4239
                 * @psalm-suppress RedundantCondition
4240
                 */
4241 13
                if ($str_chars === []) {
4242 13
                    $str_chars = self::count_chars($str, true, false);
4243
                }
4244 13
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
4245 13
                    if (\in_array($test3char, $str_chars, true)) {
4246 13
                        ++$maybe_utf32le;
4247
                    }
4248
                }
4249 13
                unset($test3charEmpty);
4250
            }
4251
        }
4252
4253 20
        $maybe_utf32be = 0;
4254 20
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
4255 20
        if ($test) {
4256 13
            $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
4257 13
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
4258 13
            if ($test3 === $test) {
4259 13
                if ($str_chars === []) {
4260 7
                    $str_chars = self::count_chars($str, true, false);
4261
                }
4262 13
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
4263 13
                    if (\in_array($test3char, $str_chars, true)) {
4264 13
                        ++$maybe_utf32be;
4265
                    }
4266
                }
4267 13
                unset($test3charEmpty);
4268
            }
4269
        }
4270
4271 20
        if ($maybe_utf32be !== $maybe_utf32le) {
4272 3
            if ($maybe_utf32le > $maybe_utf32be) {
4273 2
                return 1;
4274
            }
4275
4276 3
            return 2;
4277
        }
4278
4279 20
        return false;
4280
    }
4281
4282
    /**
4283
     * Checks whether the passed input contains only byte sequences that appear valid UTF-8.
4284
     *
4285
     * EXAMPLE: <code>
4286
     * UTF8::is_utf8(['Iñtërnâtiônàlizætiøn', 'foo']); // true
4287
     * //
4288
     * UTF8::is_utf8(["Iñtërnâtiônàlizætiøn\xA0\xA1", 'bar']); // false
4289
     * </code>
4290
     *
4291
     * @param int|string|string[]|null $str    <p>The input to be checked.</p>
4292
     * @param bool                     $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
4293
     *
4294
     * @psalm-pure
4295
     *
4296
     * @return bool
4297
     */
4298 83
    public static function is_utf8($str, bool $strict = false): bool
4299
    {
4300 83
        if (\is_array($str)) {
4301 2
            foreach ($str as &$v) {
4302 2
                if (!self::is_utf8($v, $strict)) {
4303 2
                    return false;
4304
                }
4305
            }
4306
4307
            return true;
4308
        }
4309
4310 83
        return self::is_utf8_string((string) $str, $strict);
4311
    }
4312
4313
    /**
4314
     * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
4315
     * Decodes a JSON string
4316
     *
4317
     * EXAMPLE: <code>UTF8::json_decode('[1,"\u00a5","\u00e4"]'); // array(1, '¥', 'ä')</code>
4318
     *
4319
     * @see http://php.net/manual/en/function.json-decode.php
4320
     *
4321
     * @param string $json    <p>
4322
     *                        The <i>json</i> string being decoded.
4323
     *                        </p>
4324
     *                        <p>
4325
     *                        This function only works with UTF-8 encoded strings.
4326
     *                        </p>
4327
     *                        <p>PHP implements a superset of
4328
     *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
4329
     *                        only supports these values when they are nested inside an array or an object.
4330
     *                        </p>
4331
     * @param bool   $assoc   [optional] <p>
4332
     *                        When <b>TRUE</b>, returned objects will be converted into
4333
     *                        associative arrays.
4334
     *                        </p>
4335
     * @param int    $depth   [optional] <p>
4336
     *                        User specified recursion depth.
4337
     *                        </p>
4338
     * @param int    $options [optional] <p>
4339
     *                        Bitmask of JSON decode options. Currently only
4340
     *                        <b>JSON_BIGINT_AS_STRING</b>
4341
     *                        is supported (default is to cast large integers as floats)
4342
     *                        </p>
4343
     *
4344
     * @psalm-pure
4345
     *
4346
     * @return mixed
4347
     *               <p>The value encoded in <i>json</i> in appropriate PHP type. Values true, false and
4348
     *               null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b> and <b>NULL</b> respectively.
4349
     *               <b>NULL</b> is returned if the <i>json</i> cannot be decoded or if the encoded data
4350
     *               is deeper than the recursion limit.</p>
4351
     */
4352 43
    public static function json_decode(
4353
        string $json,
4354
        bool $assoc = false,
4355
        int $depth = 512,
4356
        int $options = 0
4357
    ) {
4358 43
        $json = self::filter($json);
4359
4360 43
        if (self::$SUPPORT['json'] === false) {
4361
            throw new \RuntimeException('ext-json: is not installed');
4362
        }
4363
4364
        /** @noinspection PhpComposerExtensionStubsInspection */
4365 43
        return \json_decode($json, $assoc, $depth, $options);
4366
    }
4367
4368
    /**
4369
     * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
4370
     * Returns the JSON representation of a value.
4371
     *
4372
     * EXAMPLE: <code>UTF8::json_enocde(array(1, '¥', 'ä')); // '[1,"\u00a5","\u00e4"]'</code>
4373
     *
4374
     * @see http://php.net/manual/en/function.json-encode.php
4375
     *
4376
     * @param mixed $value   <p>
4377
     *                       The <i>value</i> being encoded. Can be any type except
4378
     *                       a resource.
4379
     *                       </p>
4380
     *                       <p>
4381
     *                       All string data must be UTF-8 encoded.
4382
     *                       </p>
4383
     *                       <p>PHP implements a superset of
4384
     *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
4385
     *                       only supports these values when they are nested inside an array or an object.
4386
     *                       </p>
4387
     * @param int   $options [optional] <p>
4388
     *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
4389
     *                       <b>JSON_HEX_TAG</b>,
4390
     *                       <b>JSON_HEX_AMP</b>,
4391
     *                       <b>JSON_HEX_APOS</b>,
4392
     *                       <b>JSON_NUMERIC_CHECK</b>,
4393
     *                       <b>JSON_PRETTY_PRINT</b>,
4394
     *                       <b>JSON_UNESCAPED_SLASHES</b>,
4395
     *                       <b>JSON_FORCE_OBJECT</b>,
4396
     *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
4397
     *                       constants is described on
4398
     *                       the JSON constants page.
4399
     *                       </p>
4400
     * @param int   $depth   [optional] <p>
4401
     *                       Set the maximum depth. Must be greater than zero.
4402
     *                       </p>
4403
     *
4404
     * @psalm-pure
4405
     *
4406
     * @return false|string
4407
     *                      A JSON encoded <strong>string</strong> on success or<br>
4408
     *                      <strong>FALSE</strong> on failure
4409
     */
4410 5
    public static function json_encode($value, int $options = 0, int $depth = 512)
4411
    {
4412 5
        $value = self::filter($value);
4413
4414 5
        if (self::$SUPPORT['json'] === false) {
4415
            throw new \RuntimeException('ext-json: is not installed');
4416
        }
4417
4418
        /** @noinspection PhpComposerExtensionStubsInspection */
4419 5
        return \json_encode($value, $options, $depth);
4420
    }
4421
4422
    /**
4423
     * Checks whether JSON is available on the server.
4424
     *
4425
     * @psalm-pure
4426
     *
4427
     * @return bool
4428
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
4429
     */
4430
    public static function json_loaded(): bool
4431
    {
4432
        return \function_exists('json_decode');
4433
    }
4434
4435
    /**
4436
     * Makes string's first char lowercase.
4437
     *
4438
     * EXAMPLE: <code>UTF8::lcfirst('ÑTËRNÂTIÔNÀLIZÆTIØN'); // ñTËRNÂTIÔNÀLIZÆTIØN</code>
4439
     *
4440
     * @param string      $str                           <p>The input string</p>
4441
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
4442
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
4443
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
4444
     *                                                   tr</p>
4445
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
4446
     *                                                   -> ß</p>
4447
     *
4448
     * @psalm-pure
4449
     *
4450
     * @return string the resulting string
4451
     */
4452 46
    public static function lcfirst(
4453
        string $str,
4454
        string $encoding = 'UTF-8',
4455
        bool $clean_utf8 = false,
4456
        string $lang = null,
4457
        bool $try_to_keep_the_string_length = false
4458
    ): string {
4459 46
        if ($clean_utf8) {
4460
            $str = self::clean($str);
4461
        }
4462
4463 46
        $use_mb_functions = ($lang === null && !$try_to_keep_the_string_length);
4464
4465 46
        if ($encoding === 'UTF-8') {
4466 43
            $str_part_two = (string) \mb_substr($str, 1);
4467
4468 43
            if ($use_mb_functions) {
4469 43
                $str_part_one = \mb_strtolower(
4470 43
                    (string) \mb_substr($str, 0, 1)
4471
                );
4472
            } else {
4473
                $str_part_one = self::strtolower(
4474
                    (string) \mb_substr($str, 0, 1),
4475
                    $encoding,
4476
                    false,
4477
                    $lang,
4478 43
                    $try_to_keep_the_string_length
4479
                );
4480
            }
4481
        } else {
4482 3
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4483
4484 3
            $str_part_two = (string) self::substr($str, 1, null, $encoding);
4485
4486 3
            $str_part_one = self::strtolower(
4487 3
                (string) self::substr($str, 0, 1, $encoding),
4488 3
                $encoding,
4489 3
                false,
4490 3
                $lang,
4491 3
                $try_to_keep_the_string_length
4492
            );
4493
        }
4494
4495 46
        return $str_part_one . $str_part_two;
4496
    }
4497
4498
    /**
4499
     * alias for "UTF8::lcfirst()"
4500
     *
4501
     * @param string      $str
4502
     * @param string      $encoding
4503
     * @param bool        $clean_utf8
4504
     * @param string|null $lang
4505
     * @param bool        $try_to_keep_the_string_length
4506
     *
4507
     * @psalm-pure
4508
     *
4509
     * @return string
4510
     *
4511
     * @see        UTF8::lcfirst()
4512
     * @deprecated <p>please use "UTF8::lcfirst()"</p>
4513
     */
4514 2
    public static function lcword(
4515
        string $str,
4516
        string $encoding = 'UTF-8',
4517
        bool $clean_utf8 = false,
4518
        string $lang = null,
4519
        bool $try_to_keep_the_string_length = false
4520
    ): string {
4521 2
        return self::lcfirst(
4522 2
            $str,
4523 2
            $encoding,
4524 2
            $clean_utf8,
4525 2
            $lang,
4526 2
            $try_to_keep_the_string_length
4527
        );
4528
    }
4529
4530
    /**
4531
     * Lowercase for all words in the string.
4532
     *
4533
     * @param string      $str                           <p>The input string.</p>
4534
     * @param string[]    $exceptions                    [optional] <p>Exclusion for some words.</p>
4535
     * @param string      $char_list                     [optional] <p>Additional chars that contains to words and do
4536
     *                                                   not start a new word.</p>
4537
     * @param string      $encoding                      [optional] <p>Set the charset.</p>
4538
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
4539
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
4540
     *                                                   tr</p>
4541
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
4542
     *                                                   -> ß</p>
4543
     *
4544
     * @psalm-pure
4545
     *
4546
     * @return string
4547
     */
4548 2
    public static function lcwords(
4549
        string $str,
4550
        array $exceptions = [],
4551
        string $char_list = '',
4552
        string $encoding = 'UTF-8',
4553
        bool $clean_utf8 = false,
4554
        string $lang = null,
4555
        bool $try_to_keep_the_string_length = false
4556
    ): string {
4557 2
        if (!$str) {
4558 2
            return '';
4559
        }
4560
4561 2
        $words = self::str_to_words($str, $char_list);
4562 2
        $use_exceptions = $exceptions !== [];
4563
4564 2
        $words_str = '';
4565 2
        foreach ($words as &$word) {
4566 2
            if (!$word) {
4567 2
                continue;
4568
            }
4569
4570
            if (
4571 2
                !$use_exceptions
4572
                ||
4573 2
                !\in_array($word, $exceptions, true)
4574
            ) {
4575 2
                $words_str .= self::lcfirst($word, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
4576
            } else {
4577 2
                $words_str .= $word;
4578
            }
4579
        }
4580
4581 2
        return $words_str;
4582
    }
4583
4584
    /**
4585
     * alias for "UTF8::lcfirst()"
4586
     *
4587
     * @param string      $str
4588
     * @param string      $encoding
4589
     * @param bool        $clean_utf8
4590
     * @param string|null $lang
4591
     * @param bool        $try_to_keep_the_string_length
4592
     *
4593
     * @psalm-pure
4594
     *
4595
     * @return string
4596
     *
4597
     * @see        UTF8::lcfirst()
4598
     * @deprecated <p>please use "UTF8::lcfirst()"</p>
4599
     */
4600 5
    public static function lowerCaseFirst(
4601
        string $str,
4602
        string $encoding = 'UTF-8',
4603
        bool $clean_utf8 = false,
4604
        string $lang = null,
4605
        bool $try_to_keep_the_string_length = false
4606
    ): string {
4607 5
        return self::lcfirst(
4608 5
            $str,
4609 5
            $encoding,
4610 5
            $clean_utf8,
4611 5
            $lang,
4612 5
            $try_to_keep_the_string_length
4613
        );
4614
    }
4615
4616
    /**
4617
     * Strip whitespace or other characters from the beginning of a UTF-8 string.
4618
     *
4619
     * EXAMPLE: <code>UTF8::ltrim(' 中文空白  '); // '中文空白  '</code>
4620
     *
4621
     * @param string      $str   <p>The string to be trimmed</p>
4622
     * @param string|null $chars <p>Optional characters to be stripped</p>
4623
     *
4624
     * @psalm-pure
4625
     *
4626
     * @return string the string with unwanted characters stripped from the left
4627
     */
4628 23
    public static function ltrim(string $str = '', string $chars = null): string
4629
    {
4630 23
        if ($str === '') {
4631 3
            return '';
4632
        }
4633
4634 22
        if (self::$SUPPORT['mbstring'] === true) {
4635 22
            if ($chars !== null) {
4636
                /** @noinspection PregQuoteUsageInspection */
4637 11
                $chars = \preg_quote($chars);
4638 11
                $pattern = "^[${chars}]+";
4639
            } else {
4640 14
                $pattern = '^[\\s]+';
4641
            }
4642
4643
            /** @noinspection PhpComposerExtensionStubsInspection */
4644 22
            return (string) \mb_ereg_replace($pattern, '', $str);
4645
        }
4646
4647
        if ($chars !== null) {
4648
            $chars = \preg_quote($chars, '/');
4649
            $pattern = "^[${chars}]+";
4650
        } else {
4651
            $pattern = '^[\\s]+';
4652
        }
4653
4654
        return self::regex_replace($str, $pattern, '');
4655
    }
4656
4657
    /**
4658
     * Returns the UTF-8 character with the maximum code point in the given data.
4659
     *
4660
     * EXAMPLE: <code>UTF8::max('abc-äöü-中文空白'); // 'ø'</code>
4661
     *
4662
     * @param array<string>|string $arg <p>A UTF-8 encoded string or an array of such strings.</p>
4663
     *
4664
     * @psalm-pure
4665
     *
4666
     * @return string|null the character with the highest code point than others, returns null on failure or empty input
4667
     */
4668 2
    public static function max($arg)
4669
    {
4670 2
        if (\is_array($arg)) {
4671 2
            $arg = \implode('', $arg);
4672
        }
4673
4674 2
        $codepoints = self::codepoints($arg);
4675 2
        if ($codepoints === []) {
4676 2
            return null;
4677
        }
4678
4679 2
        $codepoint_max = \max($codepoints);
4680
4681 2
        return self::chr((int) $codepoint_max);
4682
    }
4683
4684
    /**
4685
     * Calculates and returns the maximum number of bytes taken by any
4686
     * UTF-8 encoded character in the given string.
4687
     *
4688
     * EXAMPLE: <code>UTF8::max_chr_width('Intërnâtiônàlizætiøn'); // 2</code>
4689
     *
4690
     * @param string $str <p>The original Unicode string.</p>
4691
     *
4692
     * @psalm-pure
4693
     *
4694
     * @return int
4695
     *             <p>Max byte lengths of the given chars.</p>
4696
     */
4697 2
    public static function max_chr_width(string $str): int
4698
    {
4699 2
        $bytes = self::chr_size_list($str);
4700 2
        if ($bytes !== []) {
4701 2
            return (int) \max($bytes);
4702
        }
4703
4704 2
        return 0;
4705
    }
4706
4707
    /**
4708
     * Checks whether mbstring is available on the server.
4709
     *
4710
     * @psalm-pure
4711
     *
4712
     * @return bool
4713
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
4714
     */
4715 26
    public static function mbstring_loaded(): bool
4716
    {
4717 26
        return \extension_loaded('mbstring');
4718
    }
4719
4720
    /**
4721
     * Returns the UTF-8 character with the minimum code point in the given data.
4722
     *
4723
     * EXAMPLE: <code>UTF8::min('abc-äöü-中文空白'); // '-'</code>
4724
     *
4725
     * @param string|string[] $arg <strong>A UTF-8 encoded string or an array of such strings.</strong>
4726
     *
4727
     * @psalm-pure
4728
     *
4729
     * @return string|null
4730
     *                     <p>The character with the lowest code point than others, returns null on failure or empty input.</p>
4731
     */
4732 2
    public static function min($arg)
4733
    {
4734 2
        if (\is_array($arg)) {
4735 2
            $arg = \implode('', $arg);
4736
        }
4737
4738 2
        $codepoints = self::codepoints($arg);
4739 2
        if ($codepoints === []) {
4740 2
            return null;
4741
        }
4742
4743 2
        $codepoint_min = \min($codepoints);
4744
4745 2
        return self::chr((int) $codepoint_min);
4746
    }
4747
4748
    /**
4749
     * alias for "UTF8::normalize_encoding()"
4750
     *
4751
     * @param mixed $encoding
4752
     * @param mixed $fallback
4753
     *
4754
     * @psalm-pure
4755
     *
4756
     * @return mixed
4757
     *
4758
     * @see        UTF8::normalize_encoding()
4759
     * @deprecated <p>please use "UTF8::normalize_encoding()"</p>
4760
     */
4761 2
    public static function normalizeEncoding($encoding, $fallback = '')
4762
    {
4763 2
        return self::normalize_encoding($encoding, $fallback);
4764
    }
4765
4766
    /**
4767
     * Normalize the encoding-"name" input.
4768
     *
4769
     * EXAMPLE: <code>UTF8::normalize_encoding('UTF8'); // 'UTF-8'</code>
4770
     *
4771
     * @param mixed $encoding <p>e.g.: ISO, UTF8, WINDOWS-1251 etc.</p>
4772
     * @param mixed $fallback <p>e.g.: UTF-8</p>
4773
     *
4774
     * @psalm-pure
4775
     *
4776
     * @return mixed|string
4777
     *                      <p>e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.<br>Will return a empty string as fallback (by default)</p>
4778
     *
4779
     * @template TNormalizeEncodingFallback
4780
     * @psalm-param string|TNormalizeEncodingFallback $fallback
4781
     * @psalm-return string|TNormalizeEncodingFallback
4782
     */
4783 339
    public static function normalize_encoding($encoding, $fallback = '')
4784
    {
4785
        /**
4786
         * @psalm-suppress ImpureStaticVariable
4787
         *
4788
         * @var array<string,string>
4789
         */
4790 339
        static $STATIC_NORMALIZE_ENCODING_CACHE = [];
4791
4792
        // init
4793 339
        $encoding = (string) $encoding;
4794
4795 339
        if (!$encoding) {
4796 290
            return $fallback;
4797
        }
4798
4799
        if (
4800 53
            $encoding === 'UTF-8'
4801
            ||
4802 53
            $encoding === 'UTF8'
4803
        ) {
4804 29
            return 'UTF-8';
4805
        }
4806
4807
        if (
4808 44
            $encoding === '8BIT'
4809
            ||
4810 44
            $encoding === 'BINARY'
4811
        ) {
4812
            return 'CP850';
4813
        }
4814
4815
        if (
4816 44
            $encoding === 'HTML'
4817
            ||
4818 44
            $encoding === 'HTML-ENTITIES'
4819
        ) {
4820 2
            return 'HTML-ENTITIES';
4821
        }
4822
4823
        if (
4824 44
            $encoding === 'ISO'
4825
            ||
4826 44
            $encoding === 'ISO-8859-1'
4827
        ) {
4828 41
            return 'ISO-8859-1';
4829
        }
4830
4831
        if (
4832 11
            $encoding === '1' // only a fallback, for non "strict_types" usage ...
4833
            ||
4834 11
            $encoding === '0' // only a fallback, for non "strict_types" usage ...
4835
        ) {
4836
            return $fallback;
4837
        }
4838
4839 11
        if (isset($STATIC_NORMALIZE_ENCODING_CACHE[$encoding])) {
4840 8
            return $STATIC_NORMALIZE_ENCODING_CACHE[$encoding];
4841
        }
4842
4843 5
        if (self::$ENCODINGS === null) {
4844 1
            self::$ENCODINGS = self::getData('encodings');
4845
        }
4846
4847 5
        if (\in_array($encoding, self::$ENCODINGS, true)) {
4848 3
            $STATIC_NORMALIZE_ENCODING_CACHE[$encoding] = $encoding;
4849
4850 3
            return $encoding;
4851
        }
4852
4853 4
        $encoding_original = $encoding;
4854 4
        $encoding = \strtoupper($encoding);
4855 4
        $encoding_upper_helper = (string) \preg_replace('/[^a-zA-Z0-9]/u', '', $encoding);
4856
4857
        $equivalences = [
4858 4
            'ISO8859'     => 'ISO-8859-1',
4859
            'ISO88591'    => 'ISO-8859-1',
4860
            'ISO'         => 'ISO-8859-1',
4861
            'LATIN'       => 'ISO-8859-1',
4862
            'LATIN1'      => 'ISO-8859-1', // Western European
4863
            'ISO88592'    => 'ISO-8859-2',
4864
            'LATIN2'      => 'ISO-8859-2', // Central European
4865
            'ISO88593'    => 'ISO-8859-3',
4866
            'LATIN3'      => 'ISO-8859-3', // Southern European
4867
            'ISO88594'    => 'ISO-8859-4',
4868
            'LATIN4'      => 'ISO-8859-4', // Northern European
4869
            'ISO88595'    => 'ISO-8859-5',
4870
            'ISO88596'    => 'ISO-8859-6', // Greek
4871
            'ISO88597'    => 'ISO-8859-7',
4872
            'ISO88598'    => 'ISO-8859-8', // Hebrew
4873
            'ISO88599'    => 'ISO-8859-9',
4874
            'LATIN5'      => 'ISO-8859-9', // Turkish
4875
            'ISO885911'   => 'ISO-8859-11',
4876
            'TIS620'      => 'ISO-8859-11', // Thai
4877
            'ISO885910'   => 'ISO-8859-10',
4878
            'LATIN6'      => 'ISO-8859-10', // Nordic
4879
            'ISO885913'   => 'ISO-8859-13',
4880
            'LATIN7'      => 'ISO-8859-13', // Baltic
4881
            'ISO885914'   => 'ISO-8859-14',
4882
            'LATIN8'      => 'ISO-8859-14', // Celtic
4883
            'ISO885915'   => 'ISO-8859-15',
4884
            'LATIN9'      => 'ISO-8859-15', // Western European (with some extra chars e.g. €)
4885
            'ISO885916'   => 'ISO-8859-16',
4886
            'LATIN10'     => 'ISO-8859-16', // Southeast European
4887
            'CP1250'      => 'WINDOWS-1250',
4888
            'WIN1250'     => 'WINDOWS-1250',
4889
            'WINDOWS1250' => 'WINDOWS-1250',
4890
            'CP1251'      => 'WINDOWS-1251',
4891
            'WIN1251'     => 'WINDOWS-1251',
4892
            'WINDOWS1251' => 'WINDOWS-1251',
4893
            'CP1252'      => 'WINDOWS-1252',
4894
            'WIN1252'     => 'WINDOWS-1252',
4895
            'WINDOWS1252' => 'WINDOWS-1252',
4896
            'CP1253'      => 'WINDOWS-1253',
4897
            'WIN1253'     => 'WINDOWS-1253',
4898
            'WINDOWS1253' => 'WINDOWS-1253',
4899
            'CP1254'      => 'WINDOWS-1254',
4900
            'WIN1254'     => 'WINDOWS-1254',
4901
            'WINDOWS1254' => 'WINDOWS-1254',
4902
            'CP1255'      => 'WINDOWS-1255',
4903
            'WIN1255'     => 'WINDOWS-1255',
4904
            'WINDOWS1255' => 'WINDOWS-1255',
4905
            'CP1256'      => 'WINDOWS-1256',
4906
            'WIN1256'     => 'WINDOWS-1256',
4907
            'WINDOWS1256' => 'WINDOWS-1256',
4908
            'CP1257'      => 'WINDOWS-1257',
4909
            'WIN1257'     => 'WINDOWS-1257',
4910
            'WINDOWS1257' => 'WINDOWS-1257',
4911
            'CP1258'      => 'WINDOWS-1258',
4912
            'WIN1258'     => 'WINDOWS-1258',
4913
            'WINDOWS1258' => 'WINDOWS-1258',
4914
            'UTF16'       => 'UTF-16',
4915
            'UTF32'       => 'UTF-32',
4916
            'UTF8'        => 'UTF-8',
4917
            'UTF'         => 'UTF-8',
4918
            'UTF7'        => 'UTF-7',
4919
            '8BIT'        => 'CP850',
4920
            'BINARY'      => 'CP850',
4921
        ];
4922
4923 4
        if (!empty($equivalences[$encoding_upper_helper])) {
4924 3
            $encoding = $equivalences[$encoding_upper_helper];
4925
        }
4926
4927 4
        $STATIC_NORMALIZE_ENCODING_CACHE[$encoding_original] = $encoding;
4928
4929 4
        return $encoding;
4930
    }
4931
4932
    /**
4933
     * Standardize line ending to unix-like.
4934
     *
4935
     * @param string          $str      <p>The input string.</p>
4936
     * @param string|string[] $replacer <p>The replacer char e.g. "\n" (Linux) or "\r\n" (Windows). You can also use \PHP_EOL
4937
     *                                  here.</p>
4938
     *
4939
     * @psalm-pure
4940
     *
4941
     * @return string
4942
     *                <p>A string with normalized line ending.</p>
4943
     */
4944 5
    public static function normalize_line_ending(string $str, $replacer = "\n"): string
4945
    {
4946 5
        return \str_replace(["\r\n", "\r", "\n"], $replacer, $str);
4947
    }
4948
4949
    /**
4950
     * Normalize some MS Word special characters.
4951
     *
4952
     * EXAMPLE: <code>UTF8::normalize_msword('„Abcdef…”'); // '"Abcdef..."'</code>
4953
     *
4954
     * @param string $str <p>The string to be normalized.</p>
4955
     *
4956
     * @psalm-pure
4957
     *
4958
     * @return string
4959
     *                <p>A string with normalized characters for commonly used chars in Word documents.</p>
4960
     */
4961 10
    public static function normalize_msword(string $str): string
4962
    {
4963 10
        return ASCII::normalize_msword($str);
4964
    }
4965
4966
    /**
4967
     * Normalize the whitespace.
4968
     *
4969
     * EXAMPLE: <code>UTF8::normalize_whitespace("abc-\xc2\xa0-öäü-\xe2\x80\xaf-\xE2\x80\xAC", true); // "abc-\xc2\xa0-öäü- -"</code>
4970
     *
4971
     * @param string $str                        <p>The string to be normalized.</p>
4972
     * @param bool   $keep_non_breaking_space    [optional] <p>Set to true, to keep non-breaking-spaces.</p>
4973
     * @param bool   $keep_bidi_unicode_controls [optional] <p>Set to true, to keep non-printable (for the web)
4974
     *                                           bidirectional text chars.</p>
4975
     *
4976
     * @psalm-pure
4977
     *
4978
     * @return string
4979
     *                <p>A string with normalized whitespace.</p>
4980
     */
4981 61
    public static function normalize_whitespace(
4982
        string $str,
4983
        bool $keep_non_breaking_space = false,
4984
        bool $keep_bidi_unicode_controls = false
4985
    ): string {
4986 61
        return ASCII::normalize_whitespace(
4987 61
            $str,
4988 61
            $keep_non_breaking_space,
4989 61
            $keep_bidi_unicode_controls
4990
        );
4991
    }
4992
4993
    /**
4994
     * Calculates Unicode code point of the given UTF-8 encoded character.
4995
     *
4996
     * INFO: opposite to UTF8::chr()
4997
     *
4998
     * EXAMPLE: <code>UTF8::ord('☃'); // 0x2603</code>
4999
     *
5000
     * @param string $chr      <p>The character of which to calculate code point.<p/>
5001
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5002
     *
5003
     * @psalm-pure
5004
     *
5005
     * @return int
5006
     *             <p>Unicode code point of the given character,<br>
5007
     *             0 on invalid UTF-8 byte sequence</p>
5008
     */
5009 27
    public static function ord($chr, string $encoding = 'UTF-8'): int
5010
    {
5011
        /**
5012
         * @psalm-suppress ImpureStaticVariable
5013
         *
5014
         * @var array<string,int>
5015
         */
5016 27
        static $CHAR_CACHE = [];
5017
5018
        // init
5019 27
        $chr = (string) $chr;
5020
5021 27
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
5022 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
5023
        }
5024
5025 27
        $cache_key = $chr . '_' . $encoding;
5026 27
        if (isset($CHAR_CACHE[$cache_key])) {
5027 27
            return $CHAR_CACHE[$cache_key];
5028
        }
5029
5030
        // check again, if it's still not UTF-8
5031 11
        if ($encoding !== 'UTF-8') {
5032 3
            $chr = self::encode($encoding, $chr);
5033
        }
5034
5035 11
        if (self::$ORD === null) {
5036
            self::$ORD = self::getData('ord');
5037
        }
5038
5039 11
        if (isset(self::$ORD[$chr])) {
5040 11
            return $CHAR_CACHE[$cache_key] = self::$ORD[$chr];
5041
        }
5042
5043
        //
5044
        // fallback via "IntlChar"
5045
        //
5046
5047 6
        if (self::$SUPPORT['intlChar'] === true) {
5048
            /** @noinspection PhpComposerExtensionStubsInspection */
5049 5
            $code = \IntlChar::ord($chr);
5050 5
            if ($code) {
5051 5
                return $CHAR_CACHE[$cache_key] = $code;
5052
            }
5053
        }
5054
5055
        //
5056
        // fallback via vanilla php
5057
        //
5058
5059
        /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
5060 1
        $chr = \unpack('C*', (string) \substr($chr, 0, 4));
5061
        /** @noinspection OffsetOperationsInspection */
5062 1
        $code = $chr ? $chr[1] : 0;
5063
5064
        /** @noinspection OffsetOperationsInspection */
5065 1
        if ($code >= 0xF0 && isset($chr[4])) {
5066
            /** @noinspection UnnecessaryCastingInspection */
5067
            /** @noinspection OffsetOperationsInspection */
5068
            return $CHAR_CACHE[$cache_key] = (int) ((($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80);
5069
        }
5070
5071
        /** @noinspection OffsetOperationsInspection */
5072 1
        if ($code >= 0xE0 && isset($chr[3])) {
5073
            /** @noinspection UnnecessaryCastingInspection */
5074
            /** @noinspection OffsetOperationsInspection */
5075 1
            return $CHAR_CACHE[$cache_key] = (int) ((($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80);
5076
        }
5077
5078
        /** @noinspection OffsetOperationsInspection */
5079 1
        if ($code >= 0xC0 && isset($chr[2])) {
5080
            /** @noinspection UnnecessaryCastingInspection */
5081
            /** @noinspection OffsetOperationsInspection */
5082 1
            return $CHAR_CACHE[$cache_key] = (int) ((($code - 0xC0) << 6) + $chr[2] - 0x80);
5083
        }
5084
5085
        return $CHAR_CACHE[$cache_key] = $code;
5086
    }
5087
5088
    /**
5089
     * Parses the string into an array (into the the second parameter).
5090
     *
5091
     * WARNING: Unlike "parse_str()", this method does not (re-)place variables in the current scope,
5092
     *          if the second parameter is not set!
5093
     *
5094
     * EXAMPLE: <code>
5095
     * UTF8::parse_str('Iñtërnâtiônéàlizætiøn=測試&arr[]=foo+測試&arr[]=ການທົດສອບ', $array);
5096
     * echo $array['Iñtërnâtiônéàlizætiøn']; // '測試'
5097
     * </code>
5098
     *
5099
     * @see http://php.net/manual/en/function.parse-str.php
5100
     *
5101
     * @param string $str        <p>The input string.</p>
5102
     * @param array  $result     <p>The result will be returned into this reference parameter.</p>
5103
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5104
     *
5105
     * @psalm-pure
5106
     *
5107
     * @return bool
5108
     *              <p>Will return <strong>false</strong> if php can't parse the string and we haven't any $result.</p>
5109
     */
5110 2
    public static function parse_str(string $str, &$result, bool $clean_utf8 = false): bool
5111
    {
5112 2
        if ($clean_utf8) {
5113 2
            $str = self::clean($str);
5114
        }
5115
5116 2
        if (self::$SUPPORT['mbstring'] === true) {
5117 2
            $return = \mb_parse_str($str, $result);
5118
5119 2
            return $return !== false && $result !== [];
5120
        }
5121
5122
        /**
5123
         * @psalm-suppress ImpureFunctionCall - we use the second parameter, so we don't change variables by magic
5124
         */
5125
        \parse_str($str, $result);
5126
5127
        return $result !== [];
5128
    }
5129
5130
    /**
5131
     * Checks if \u modifier is available that enables Unicode support in PCRE.
5132
     *
5133
     * @psalm-pure
5134
     *
5135
     * @return bool
5136
     *              <p>
5137
     *              <strong>true</strong> if support is available,<br>
5138
     *              <strong>false</strong> otherwise
5139
     *              </p>
5140
     */
5141
    public static function pcre_utf8_support(): bool
5142
    {
5143
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
5144
        return (bool) @\preg_match('//u', '');
5145
    }
5146
5147
    /**
5148
     * Create an array containing a range of UTF-8 characters.
5149
     *
5150
     * EXAMPLE: <code>UTF8::range('κ', 'ζ'); // array('κ', 'ι', 'θ', 'η', 'ζ',)</code>
5151
     *
5152
     * @param int|string $var1      <p>Numeric or hexadecimal code points, or a UTF-8 character to start from.</p>
5153
     * @param int|string $var2      <p>Numeric or hexadecimal code points, or a UTF-8 character to end at.</p>
5154
     * @param bool       $use_ctype <p>use ctype to detect numeric and hexadecimal, otherwise we will use a simple
5155
     *                              "is_numeric"</p>
5156
     * @param string     $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
5157
     * @param float|int  $step      [optional] <p>
5158
     *                              If a step value is given, it will be used as the
5159
     *                              increment between elements in the sequence. step
5160
     *                              should be given as a positive number. If not specified,
5161
     *                              step will default to 1.
5162
     *                              </p>
5163
     *
5164
     * @psalm-pure
5165
     *
5166
     * @return string[]
5167
     */
5168 2
    public static function range(
5169
        $var1,
5170
        $var2,
5171
        bool $use_ctype = true,
5172
        string $encoding = 'UTF-8',
5173
        $step = 1
5174
    ): array {
5175 2
        if (!$var1 || !$var2) {
5176 2
            return [];
5177
        }
5178
5179 2
        if ($step !== 1) {
5180
            /**
5181
             * @psalm-suppress RedundantConditionGivenDocblockType
5182
             * @psalm-suppress DocblockTypeContradiction
5183
             */
5184 1
            if (!\is_numeric($step)) {
0 ignored issues
show
introduced by
The condition is_numeric($step) is always true.
Loading history...
5185
                throw new \InvalidArgumentException('$step need to be a number, type given: ' . \gettype($step));
5186
            }
5187
5188
            /**
5189
             * @psalm-suppress RedundantConditionGivenDocblockType - false-positive from psalm?
5190
             */
5191 1
            if ($step <= 0) {
5192
                throw new \InvalidArgumentException('$step need to be a positive number, given: ' . $step);
5193
            }
5194
        }
5195
5196 2
        if ($use_ctype && self::$SUPPORT['ctype'] === false) {
5197
            throw new \RuntimeException('ext-ctype: is not installed');
5198
        }
5199
5200 2
        $is_digit = false;
5201 2
        $is_xdigit = false;
5202
5203
        /** @noinspection PhpComposerExtensionStubsInspection */
5204 2
        if ($use_ctype && \ctype_digit((string) $var1) && \ctype_digit((string) $var2)) {
5205 2
            $is_digit = true;
5206 2
            $start = (int) $var1;
5207 2
        } /** @noinspection PhpComposerExtensionStubsInspection */ elseif ($use_ctype && \ctype_xdigit($var1) && \ctype_xdigit($var2)) {
5208
            $is_xdigit = true;
5209
            $start = (int) self::hex_to_int((string) $var1);
5210 2
        } elseif (!$use_ctype && \is_numeric($var1)) {
5211 1
            $start = (int) $var1;
5212
        } else {
5213 2
            $start = self::ord((string) $var1);
5214
        }
5215
5216 2
        if (!$start) {
5217
            return [];
5218
        }
5219
5220 2
        if ($is_digit) {
5221 2
            $end = (int) $var2;
5222 2
        } elseif ($is_xdigit) {
5223
            $end = (int) self::hex_to_int((string) $var2);
5224 2
        } elseif (!$use_ctype && \is_numeric($var2)) {
5225 1
            $end = (int) $var2;
5226
        } else {
5227 2
            $end = self::ord((string) $var2);
5228
        }
5229
5230 2
        if (!$end) {
5231
            return [];
5232
        }
5233
5234 2
        $array = [];
5235 2
        foreach (\range($start, $end, $step) as $i) {
5236 2
            $array[] = (string) self::chr((int) $i, $encoding);
5237
        }
5238
5239 2
        return $array;
5240
    }
5241
5242
    /**
5243
     * Multi decode HTML entity + fix urlencoded-win1252-chars.
5244
     *
5245
     * EXAMPLE: <code>UTF8::rawurldecode('tes%20öäü%20\u00edtest+test'); // 'tes öäü ítest+test'</code>
5246
     *
5247
     * e.g:
5248
     * 'test+test'                     => 'test+test'
5249
     * 'D&#252;sseldorf'               => 'Düsseldorf'
5250
     * 'D%FCsseldorf'                  => 'Düsseldorf'
5251
     * 'D&#xFC;sseldorf'               => 'Düsseldorf'
5252
     * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
5253
     * 'Düsseldorf'                   => 'Düsseldorf'
5254
     * 'D%C3%BCsseldorf'               => 'Düsseldorf'
5255
     * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
5256
     * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
5257
     *
5258
     * @param string $str          <p>The input string.</p>
5259
     * @param bool   $multi_decode <p>Decode as often as possible.</p>
5260
     *
5261
     * @psalm-pure
5262
     *
5263
     * @return string
5264
     *                <p>The decoded URL, as a string.</p>
5265
     */
5266 7
    public static function rawurldecode(string $str, bool $multi_decode = true): string
5267
    {
5268 7
        if ($str === '') {
5269 4
            return '';
5270
        }
5271
5272
        if (
5273 7
            \strpos($str, '&') === false
5274
            &&
5275 7
            \strpos($str, '%') === false
5276
            &&
5277 7
            \strpos($str, '+') === false
5278
            &&
5279 7
            \strpos($str, '\u') === false
5280
        ) {
5281 4
            return self::fix_simple_utf8($str);
5282
        }
5283
5284 7
        $str = self::urldecode_unicode_helper($str);
5285
5286 7
        if ($multi_decode) {
5287
            do {
5288 6
                $str_compare = $str;
5289
5290
                /**
5291
                 * @psalm-suppress PossiblyInvalidArgument
5292
                 */
5293 6
                $str = self::fix_simple_utf8(
5294 6
                    \rawurldecode(
5295 6
                        self::html_entity_decode(
5296 6
                            self::to_utf8($str),
5297 6
                            \ENT_QUOTES | \ENT_HTML5
5298
                        )
5299
                    )
5300
                );
5301 6
            } while ($str_compare !== $str);
5302
        } else {
5303
            /**
5304
             * @psalm-suppress PossiblyInvalidArgument
5305
             */
5306 1
            $str = self::fix_simple_utf8(
5307 1
                \rawurldecode(
5308 1
                    self::html_entity_decode(
5309 1
                        self::to_utf8($str),
5310 1
                        \ENT_QUOTES | \ENT_HTML5
5311
                    )
5312
                )
5313
            );
5314
        }
5315
5316 7
        return $str;
5317
    }
5318
5319
    /**
5320
     * Replaces all occurrences of $pattern in $str by $replacement.
5321
     *
5322
     * @param string $str         <p>The input string.</p>
5323
     * @param string $pattern     <p>The regular expression pattern.</p>
5324
     * @param string $replacement <p>The string to replace with.</p>
5325
     * @param string $options     [optional] <p>Matching conditions to be used.</p>
5326
     * @param string $delimiter   [optional] <p>Delimiter the the regex. Default: '/'</p>
5327
     *
5328
     * @psalm-pure
5329
     *
5330
     * @return string
5331
     */
5332 18
    public static function regex_replace(
5333
        string $str,
5334
        string $pattern,
5335
        string $replacement,
5336
        string $options = '',
5337
        string $delimiter = '/'
5338
    ): string {
5339 18
        if ($options === 'msr') {
5340 9
            $options = 'ms';
5341
        }
5342
5343
        // fallback
5344 18
        if (!$delimiter) {
5345
            $delimiter = '/';
5346
        }
5347
5348 18
        return (string) \preg_replace(
5349 18
            $delimiter . $pattern . $delimiter . 'u' . $options,
5350 18
            $replacement,
5351 18
            $str
5352
        );
5353
    }
5354
5355
    /**
5356
     * alias for "UTF8::remove_bom()"
5357
     *
5358
     * @param string $str
5359
     *
5360
     * @psalm-pure
5361
     *
5362
     * @return string
5363
     *
5364
     * @see        UTF8::remove_bom()
5365
     * @deprecated <p>please use "UTF8::remove_bom()"</p>
5366
     */
5367 1
    public static function removeBOM(string $str): string
5368
    {
5369 1
        return self::remove_bom($str);
5370
    }
5371
5372
    /**
5373
     * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
5374
     *
5375
     * EXAMPLE: <code>UTF8::remove_bom("\xEF\xBB\xBFΜπορώ να"); // 'Μπορώ να'</code>
5376
     *
5377
     * @param string $str <p>The input string.</p>
5378
     *
5379
     * @psalm-pure
5380
     *
5381
     * @return string
5382
     *                <p>A string without UTF-BOM.</p>
5383
     */
5384 55
    public static function remove_bom(string $str): string
5385
    {
5386 55
        if ($str === '') {
5387 9
            return '';
5388
        }
5389
5390 55
        $str_length = \strlen($str);
5391 55
        foreach (self::$BOM as $bom_string => $bom_byte_length) {
5392 55
            if (\strpos($str, $bom_string) === 0) {
5393
                /** @var false|string $str_tmp - needed for PhpStan (stubs error) */
5394 11
                $str_tmp = \substr($str, $bom_byte_length, $str_length);
5395 11
                if ($str_tmp === false) {
5396
                    return '';
5397
                }
5398
5399 11
                $str_length -= (int) $bom_byte_length;
5400
5401 55
                $str = (string) $str_tmp;
5402
            }
5403
        }
5404
5405 55
        return $str;
5406
    }
5407
5408
    /**
5409
     * Removes duplicate occurrences of a string in another string.
5410
     *
5411
     * EXAMPLE: <code>UTF8::remove_duplicates('öäü-κόσμεκόσμε-äöü', 'κόσμε'); // 'öäü-κόσμε-äöü'</code>
5412
     *
5413
     * @param string          $str  <p>The base string.</p>
5414
     * @param string|string[] $what <p>String to search for in the base string.</p>
5415
     *
5416
     * @psalm-pure
5417
     *
5418
     * @return string
5419
     *                <p>A string with removed duplicates.</p>
5420
     */
5421 2
    public static function remove_duplicates(string $str, $what = ' '): string
5422
    {
5423 2
        if (\is_string($what)) {
5424 2
            $what = [$what];
5425
        }
5426
5427
        /**
5428
         * @psalm-suppress RedundantConditionGivenDocblockType
5429
         */
5430 2
        if (\is_array($what)) {
0 ignored issues
show
introduced by
The condition is_array($what) is always true.
Loading history...
5431 2
            foreach ($what as $item) {
5432 2
                $str = (string) \preg_replace('/(' . \preg_quote($item, '/') . ')+/u', $item, $str);
5433
            }
5434
        }
5435
5436 2
        return $str;
5437
    }
5438
5439
    /**
5440
     * Remove html via "strip_tags()" from the string.
5441
     *
5442
     * @param string $str            <p>The input string.</p>
5443
     * @param string $allowable_tags [optional] <p>You can use the optional second parameter to specify tags which
5444
     *                               should not be stripped. Default: null
5445
     *                               </p>
5446
     *
5447
     * @psalm-pure
5448
     *
5449
     * @return string
5450
     *                <p>A string with without html tags.</p>
5451
     */
5452 6
    public static function remove_html(string $str, string $allowable_tags = ''): string
5453
    {
5454 6
        return \strip_tags($str, $allowable_tags);
5455
    }
5456
5457
    /**
5458
     * Remove all breaks [<br> | \r\n | \r | \n | ...] from the string.
5459
     *
5460
     * @param string $str         <p>The input string.</p>
5461
     * @param string $replacement [optional] <p>Default is a empty string.</p>
5462
     *
5463
     * @psalm-pure
5464
     *
5465
     * @return string
5466
     *                <p>A string without breaks.</p>
5467
     */
5468 6
    public static function remove_html_breaks(string $str, string $replacement = ''): string
5469
    {
5470 6
        return (string) \preg_replace("#/\r\n|\r|\n|<br.*/?>#isU", $replacement, $str);
5471
    }
5472
5473
    /**
5474
     * Remove invisible characters from a string.
5475
     *
5476
     * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
5477
     *
5478
     * EXAMPLE: <code>UTF8::remove_invisible_characters("κόσ\0με"); // 'κόσμε'</code>
5479
     *
5480
     * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
5481
     *
5482
     * @param string $str         <p>The input string.</p>
5483
     * @param bool   $url_encoded [optional] <p>
5484
     *                            Try to remove url encoded control character.
5485
     *                            WARNING: maybe contains false-positives e.g. aa%0Baa -> aaaa.
5486
     *                            <br>
5487
     *                            Default: false
5488
     *                            </p>
5489
     * @param string $replacement [optional] <p>The replacement character.</p>
5490
     *
5491
     * @psalm-pure
5492
     *
5493
     * @return string
5494
     *                <p>A string without invisible chars.</p>
5495
     */
5496 91
    public static function remove_invisible_characters(
5497
        string $str,
5498
        bool $url_encoded = false,
5499
        string $replacement = ''
5500
    ): string {
5501 91
        return ASCII::remove_invisible_characters(
5502 91
            $str,
5503 91
            $url_encoded,
5504 91
            $replacement
5505
        );
5506
    }
5507
5508
    /**
5509
     * Returns a new string with the prefix $substring removed, if present.
5510
     *
5511
     * @param string $str       <p>The input string.</p>
5512
     * @param string $substring <p>The prefix to remove.</p>
5513
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
5514
     *
5515
     * @psalm-pure
5516
     *
5517
     * @return string
5518
     *                <p>A string without the prefix $substring.</p>
5519
     */
5520 12
    public static function remove_left(
5521
        string $str,
5522
        string $substring,
5523
        string $encoding = 'UTF-8'
5524
    ): string {
5525 12
        if ($substring && \strpos($str, $substring) === 0) {
5526 6
            if ($encoding === 'UTF-8') {
5527 4
                return (string) \mb_substr(
5528 4
                    $str,
5529 4
                    (int) \mb_strlen($substring)
5530
                );
5531
            }
5532
5533 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
5534
5535 2
            return (string) self::substr(
5536 2
                $str,
5537 2
                (int) self::strlen($substring, $encoding),
5538 2
                null,
5539 2
                $encoding
5540
            );
5541
        }
5542
5543 6
        return $str;
5544
    }
5545
5546
    /**
5547
     * Returns a new string with the suffix $substring removed, if present.
5548
     *
5549
     * @param string $str
5550
     * @param string $substring <p>The suffix to remove.</p>
5551
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
5552
     *
5553
     * @psalm-pure
5554
     *
5555
     * @return string
5556
     *                <p>A string having a $str without the suffix $substring.</p>
5557
     */
5558 12
    public static function remove_right(
5559
        string $str,
5560
        string $substring,
5561
        string $encoding = 'UTF-8'
5562
    ): string {
5563 12
        if ($substring && \substr($str, -\strlen($substring)) === $substring) {
5564 6
            if ($encoding === 'UTF-8') {
5565 4
                return (string) \mb_substr(
5566 4
                    $str,
5567 4
                    0,
5568 4
                    (int) \mb_strlen($str) - (int) \mb_strlen($substring)
5569
                );
5570
            }
5571
5572 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
5573
5574 2
            return (string) self::substr(
5575 2
                $str,
5576 2
                0,
5577 2
                (int) self::strlen($str, $encoding) - (int) self::strlen($substring, $encoding),
5578 2
                $encoding
5579
            );
5580
        }
5581
5582 6
        return $str;
5583
    }
5584
5585
    /**
5586
     * Replaces all occurrences of $search in $str by $replacement.
5587
     *
5588
     * @param string $str            <p>The input string.</p>
5589
     * @param string $search         <p>The needle to search for.</p>
5590
     * @param string $replacement    <p>The string to replace with.</p>
5591
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5592
     *
5593
     * @psalm-pure
5594
     *
5595
     * @return string
5596
     *                <p>A string with replaced parts.</p>
5597
     */
5598 29
    public static function replace(
5599
        string $str,
5600
        string $search,
5601
        string $replacement,
5602
        bool $case_sensitive = true
5603
    ): string {
5604 29
        if ($case_sensitive) {
5605 22
            return \str_replace($search, $replacement, $str);
5606
        }
5607
5608 7
        return self::str_ireplace($search, $replacement, $str);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::str_ireplac...ch, $replacement, $str) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
5609
    }
5610
5611
    /**
5612
     * Replaces all occurrences of $search in $str by $replacement.
5613
     *
5614
     * @param string       $str            <p>The input string.</p>
5615
     * @param array        $search         <p>The elements to search for.</p>
5616
     * @param array|string $replacement    <p>The string to replace with.</p>
5617
     * @param bool         $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5618
     *
5619
     * @psalm-pure
5620
     *
5621
     * @return string
5622
     *                <p>A string with replaced parts.</p>
5623
     */
5624 30
    public static function replace_all(
5625
        string $str,
5626
        array $search,
5627
        $replacement,
5628
        bool $case_sensitive = true
5629
    ): string {
5630 30
        if ($case_sensitive) {
5631 23
            return \str_replace($search, $replacement, $str);
5632
        }
5633
5634 7
        return self::str_ireplace($search, $replacement, $str);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::str_ireplac...ch, $replacement, $str) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
5635
    }
5636
5637
    /**
5638
     * Replace the diamond question mark (�) and invalid-UTF8 chars with the replacement.
5639
     *
5640
     * EXAMPLE: <code>UTF8::replace_diamond_question_mark('中文空白�', ''); // '中文空白'</code>
5641
     *
5642
     * @param string $str                        <p>The input string</p>
5643
     * @param string $replacement_char           <p>The replacement character.</p>
5644
     * @param bool   $process_invalid_utf8_chars <p>Convert invalid UTF-8 chars </p>
5645
     *
5646
     * @psalm-pure
5647
     *
5648
     * @return string
5649
     *                <p>A string without diamond question marks (�).</p>
5650
     */
5651 35
    public static function replace_diamond_question_mark(
5652
        string $str,
5653
        string $replacement_char = '',
5654
        bool $process_invalid_utf8_chars = true
5655
    ): string {
5656 35
        if ($str === '') {
5657 9
            return '';
5658
        }
5659
5660 35
        if ($process_invalid_utf8_chars) {
5661 35
            $replacement_char_helper = $replacement_char;
5662 35
            if ($replacement_char === '') {
5663 35
                $replacement_char_helper = 'none';
5664
            }
5665
5666 35
            if (self::$SUPPORT['mbstring'] === false) {
5667
                // if there is no native support for "mbstring",
5668
                // then we need to clean the string before ...
5669
                $str = self::clean($str);
5670
            }
5671
5672
            /**
5673
             * @psalm-suppress ImpureFunctionCall - we will reset the value in the next step
5674
             */
5675 35
            $save = \mb_substitute_character();
5676
            /** @noinspection PhpUsageOfSilenceOperatorInspection - ignore "Unknown character" warnings, it's working anyway */
5677 35
            @\mb_substitute_character($replacement_char_helper);
0 ignored issues
show
Security Best Practice introduced by
It seems like you do not handle an error condition for mb_substitute_character(). This can introduce security issues, and is generally not recommended. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-unhandled  annotation

5677
            /** @scrutinizer ignore-unhandled */ @\mb_substitute_character($replacement_char_helper);

If you suppress an error, we recommend checking for the error condition explicitly:

// For example instead of
@mkdir($dir);

// Better use
if (@mkdir($dir) === false) {
    throw new \RuntimeException('The directory '.$dir.' could not be created.');
}
Loading history...
5678
            // the polyfill maybe return false, so cast to string
5679 35
            $str = (string) \mb_convert_encoding($str, 'UTF-8', 'UTF-8');
5680 35
            \mb_substitute_character($save);
5681
        }
5682
5683 35
        return \str_replace(
5684
            [
5685 35
                "\xEF\xBF\xBD",
5686
                '�',
5687
            ],
5688
            [
5689 35
                $replacement_char,
5690 35
                $replacement_char,
5691
            ],
5692 35
            $str
5693
        );
5694
    }
5695
5696
    /**
5697
     * Strip whitespace or other characters from the end of a UTF-8 string.
5698
     *
5699
     * EXAMPLE: <code>UTF8::rtrim('-ABC-中文空白-  '); // '-ABC-中文空白-'</code>
5700
     *
5701
     * @param string      $str   <p>The string to be trimmed.</p>
5702
     * @param string|null $chars <p>Optional characters to be stripped.</p>
5703
     *
5704
     * @psalm-pure
5705
     *
5706
     * @return string
5707
     *                <p>A string with unwanted characters stripped from the right.</p>
5708
     */
5709 21
    public static function rtrim(string $str = '', string $chars = null): string
5710
    {
5711 21
        if ($str === '') {
5712 3
            return '';
5713
        }
5714
5715 20
        if (self::$SUPPORT['mbstring'] === true) {
5716 20
            if ($chars !== null) {
5717
                /** @noinspection PregQuoteUsageInspection */
5718 9
                $chars = \preg_quote($chars);
5719 9
                $pattern = "[${chars}]+$";
5720
            } else {
5721 14
                $pattern = '[\\s]+$';
5722
            }
5723
5724
            /** @noinspection PhpComposerExtensionStubsInspection */
5725 20
            return (string) \mb_ereg_replace($pattern, '', $str);
5726
        }
5727
5728
        if ($chars !== null) {
5729
            $chars = \preg_quote($chars, '/');
5730
            $pattern = "[${chars}]+$";
5731
        } else {
5732
            $pattern = '[\\s]+$';
5733
        }
5734
5735
        return self::regex_replace($str, $pattern, '');
5736
    }
5737
5738
    /**
5739
     * WARNING: Print native UTF-8 support (libs) by default, e.g. for debugging.
5740
     *
5741
     * @param bool $useEcho
5742
     *
5743
     * @psalm-pure
5744
     *
5745
     * @return string|void
5746
     */
5747 2
    public static function showSupport(bool $useEcho = true)
5748
    {
5749
        // init
5750 2
        $html = '';
5751
5752 2
        $html .= '<pre>';
5753
        /** @noinspection AlterInForeachInspection */
5754 2
        foreach (self::$SUPPORT as $key => &$value) {
5755 2
            $html .= $key . ' - ' . \print_r($value, true) . "\n<br>";
5756
        }
5757 2
        $html .= '</pre>';
5758
5759 2
        if ($useEcho) {
5760 1
            echo $html;
5761
        }
5762
5763 2
        return $html;
5764
    }
5765
5766
    /**
5767
     * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
5768
     *
5769
     * EXAMPLE: <code>UTF8::single_chr_html_encode('κ'); // '&#954;'</code>
5770
     *
5771
     * @param string $char             <p>The Unicode character to be encoded as numbered entity.</p>
5772
     * @param bool   $keep_ascii_chars <p>Set to <strong>true</strong> to keep ASCII chars.</>
5773
     * @param string $encoding         [optional] <p>Set the charset for e.g. "mb_" function</p>
5774
     *
5775
     * @psalm-pure
5776
     *
5777
     * @return string
5778
     *                <p>The HTML numbered entity for the given character.</p>
5779
     */
5780 2
    public static function single_chr_html_encode(
5781
        string $char,
5782
        bool $keep_ascii_chars = false,
5783
        string $encoding = 'UTF-8'
5784
    ): string {
5785 2
        if ($char === '') {
5786 2
            return '';
5787
        }
5788
5789
        if (
5790 2
            $keep_ascii_chars
5791
            &&
5792 2
            ASCII::is_ascii($char)
5793
        ) {
5794 2
            return $char;
5795
        }
5796
5797 2
        return '&#' . self::ord($char, $encoding) . ';';
5798
    }
5799
5800
    /**
5801
     * @param string $str
5802
     * @param int    $tab_length
5803
     *
5804
     * @psalm-pure
5805
     *
5806
     * @return string
5807
     */
5808 5
    public static function spaces_to_tabs(string $str, int $tab_length = 4): string
5809
    {
5810 5
        if ($tab_length === 4) {
5811 3
            $tab = '    ';
5812 2
        } elseif ($tab_length === 2) {
5813 1
            $tab = '  ';
5814
        } else {
5815 1
            $tab = \str_repeat(' ', $tab_length);
5816
        }
5817
5818 5
        return \str_replace($tab, "\t", $str);
5819
    }
5820
5821
    /**
5822
     * alias for "UTF8::str_split()"
5823
     *
5824
     * @param int|string $str
5825
     * @param int        $length
5826
     * @param bool       $clean_utf8
5827
     *
5828
     * @psalm-pure
5829
     *
5830
     * @return string[]
5831
     *
5832
     * @see        UTF8::str_split()
5833
     * @deprecated <p>please use "UTF8::str_split()"</p>
5834
     */
5835 9
    public static function split(
5836
        $str,
5837
        int $length = 1,
5838
        bool $clean_utf8 = false
5839
    ): array {
5840
        /** @var string[] */
5841 9
        return self::str_split($str, $length, $clean_utf8);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::str_split($str, $length, $clean_utf8) returns an array which contains values of type array|string[] which are incompatible with the documented value type string.
Loading history...
5842
    }
5843
5844
    /**
5845
     * alias for "UTF8::str_starts_with()"
5846
     *
5847
     * @param string $haystack
5848
     * @param string $needle
5849
     *
5850
     * @psalm-pure
5851
     *
5852
     * @return bool
5853
     *
5854
     * @see        UTF8::str_starts_with()
5855
     * @deprecated <p>please use "UTF8::str_starts_with()"</p>
5856
     */
5857 1
    public static function str_begins(string $haystack, string $needle): bool
5858
    {
5859 1
        return self::str_starts_with($haystack, $needle);
5860
    }
5861
5862
    /**
5863
     * Returns a camelCase version of the string. Trims surrounding spaces,
5864
     * capitalizes letters following digits, spaces, dashes and underscores,
5865
     * and removes spaces, dashes, as well as underscores.
5866
     *
5867
     * @param string      $str                           <p>The input string.</p>
5868
     * @param string      $encoding                      [optional] <p>Default: 'UTF-8'</p>
5869
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
5870
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
5871
     *                                                   tr</p>
5872
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
5873
     *                                                   -> ß</p>
5874
     *
5875
     * @psalm-pure
5876
     *
5877
     * @return string
5878
     */
5879 32
    public static function str_camelize(
5880
        string $str,
5881
        string $encoding = 'UTF-8',
5882
        bool $clean_utf8 = false,
5883
        string $lang = null,
5884
        bool $try_to_keep_the_string_length = false
5885
    ): string {
5886 32
        if ($clean_utf8) {
5887
            $str = self::clean($str);
5888
        }
5889
5890 32
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
5891 26
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
5892
        }
5893
5894 32
        $str = self::lcfirst(
5895 32
            \trim($str),
5896 32
            $encoding,
5897 32
            false,
5898 32
            $lang,
5899 32
            $try_to_keep_the_string_length
5900
        );
5901 32
        $str = (string) \preg_replace('/^[-_]+/', '', $str);
5902
5903 32
        $use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
5904
5905 32
        $str = (string) \preg_replace_callback(
5906 32
            '/[-_\\s]+(.)?/u',
5907
            /**
5908
             * @param array $match
5909
             *
5910
             * @psalm-pure
5911
             *
5912
             * @return string
5913
             */
5914
            static function (array $match) use ($use_mb_functions, $encoding, $lang, $try_to_keep_the_string_length): string {
5915 27
                if (isset($match[1])) {
5916 27
                    if ($use_mb_functions) {
5917 27
                        if ($encoding === 'UTF-8') {
5918 27
                            return \mb_strtoupper($match[1]);
5919
                        }
5920
5921
                        return \mb_strtoupper($match[1], $encoding);
5922
                    }
5923
5924
                    return self::strtoupper($match[1], $encoding, false, $lang, $try_to_keep_the_string_length);
5925
                }
5926
5927 1
                return '';
5928 32
            },
5929 32
            $str
5930
        );
5931
5932 32
        return (string) \preg_replace_callback(
5933 32
            '/[\\p{N}]+(.)?/u',
5934
            /**
5935
             * @param array $match
5936
             *
5937
             * @psalm-pure
5938
             *
5939
             * @return string
5940
             */
5941
            static function (array $match) use ($use_mb_functions, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length): string {
5942 6
                if ($use_mb_functions) {
5943 6
                    if ($encoding === 'UTF-8') {
5944 6
                        return \mb_strtoupper($match[0]);
5945
                    }
5946
5947
                    return \mb_strtoupper($match[0], $encoding);
5948
                }
5949
5950
                return self::strtoupper($match[0], $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
5951 32
            },
5952 32
            $str
5953
        );
5954
    }
5955
5956
    /**
5957
     * Returns the string with the first letter of each word capitalized,
5958
     * except for when the word is a name which shouldn't be capitalized.
5959
     *
5960
     * @param string $str
5961
     *
5962
     * @psalm-pure
5963
     *
5964
     * @return string
5965
     *                <p>A string with $str capitalized.</p>
5966
     */
5967 1
    public static function str_capitalize_name(string $str): string
5968
    {
5969 1
        return self::str_capitalize_name_helper(
5970 1
            self::str_capitalize_name_helper(
5971 1
                self::collapse_whitespace($str),
5972 1
                ' '
5973
            ),
5974 1
            '-'
5975
        );
5976
    }
5977
5978
    /**
5979
     * Returns true if the string contains $needle, false otherwise. By default
5980
     * the comparison is case-sensitive, but can be made insensitive by setting
5981
     * $case_sensitive to false.
5982
     *
5983
     * @param string $haystack       <p>The input string.</p>
5984
     * @param string $needle         <p>Substring to look for.</p>
5985
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5986
     *
5987
     * @psalm-pure
5988
     *
5989
     * @return bool
5990
     *              <p>Whether or not $haystack contains $needle.</p>
5991
     */
5992 21
    public static function str_contains(
5993
        string $haystack,
5994
        string $needle,
5995
        bool $case_sensitive = true
5996
    ): bool {
5997 21
        if ($case_sensitive) {
5998 11
            return \strpos($haystack, $needle) !== false;
5999
        }
6000
6001 10
        return \mb_stripos($haystack, $needle) !== false;
6002
    }
6003
6004
    /**
6005
     * Returns true if the string contains all $needles, false otherwise. By
6006
     * default the comparison is case-sensitive, but can be made insensitive by
6007
     * setting $case_sensitive to false.
6008
     *
6009
     * @param string $haystack       <p>The input string.</p>
6010
     * @param array  $needles        <p>SubStrings to look for.</p>
6011
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
6012
     *
6013
     * @psalm-pure
6014
     *
6015
     * @return bool
6016
     *              <p>Whether or not $haystack contains $needle.</p>
6017
     */
6018 45
    public static function str_contains_all(
6019
        string $haystack,
6020
        array $needles,
6021
        bool $case_sensitive = true
6022
    ): bool {
6023 45
        if ($haystack === '' || $needles === []) {
6024 1
            return false;
6025
        }
6026
6027
        /** @noinspection LoopWhichDoesNotLoopInspection */
6028 44
        foreach ($needles as &$needle) {
6029 44
            if ($case_sensitive) {
6030
                /** @noinspection NestedPositiveIfStatementsInspection */
6031 24
                if (!$needle || \strpos($haystack, $needle) === false) {
6032 12
                    return false;
6033
                }
6034
            }
6035
6036 33
            if (!$needle || \mb_stripos($haystack, $needle) === false) {
6037 33
                return false;
6038
            }
6039
        }
6040
6041 24
        return true;
6042
    }
6043
6044
    /**
6045
     * Returns true if the string contains any $needles, false otherwise. By
6046
     * default the comparison is case-sensitive, but can be made insensitive by
6047
     * setting $case_sensitive to false.
6048
     *
6049
     * @param string $haystack       <p>The input string.</p>
6050
     * @param array  $needles        <p>SubStrings to look for.</p>
6051
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
6052
     *
6053
     * @psalm-pure
6054
     *
6055
     * @return bool
6056
     *              <p>Whether or not $str contains $needle.</p>
6057
     */
6058 46
    public static function str_contains_any(
6059
        string $haystack,
6060
        array $needles,
6061
        bool $case_sensitive = true
6062
    ): bool {
6063 46
        if ($haystack === '' || $needles === []) {
6064 1
            return false;
6065
        }
6066
6067
        /** @noinspection LoopWhichDoesNotLoopInspection */
6068 45
        foreach ($needles as &$needle) {
6069 45
            if (!$needle) {
6070
                continue;
6071
            }
6072
6073 45
            if ($case_sensitive) {
6074 25
                if (\strpos($haystack, $needle) !== false) {
6075 14
                    return true;
6076
                }
6077
6078 13
                continue;
6079
            }
6080
6081 20
            if (\mb_stripos($haystack, $needle) !== false) {
6082 20
                return true;
6083
            }
6084
        }
6085
6086 19
        return false;
6087
    }
6088
6089
    /**
6090
     * Returns a lowercase and trimmed string separated by dashes. Dashes are
6091
     * inserted before uppercase characters (with the exception of the first
6092
     * character of the string), and in place of spaces as well as underscores.
6093
     *
6094
     * @param string $str      <p>The input string.</p>
6095
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6096
     *
6097
     * @psalm-pure
6098
     *
6099
     * @return string
6100
     */
6101 19
    public static function str_dasherize(string $str, string $encoding = 'UTF-8'): string
6102
    {
6103 19
        return self::str_delimit($str, '-', $encoding);
6104
    }
6105
6106
    /**
6107
     * Returns a lowercase and trimmed string separated by the given delimiter.
6108
     * Delimiters are inserted before uppercase characters (with the exception
6109
     * of the first character of the string), and in place of spaces, dashes,
6110
     * and underscores. Alpha delimiters are not converted to lowercase.
6111
     *
6112
     * @param string      $str                           <p>The input string.</p>
6113
     * @param string      $delimiter                     <p>Sequence used to separate parts of the string.</p>
6114
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
6115
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
6116
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
6117
     *                                                   tr</p>
6118
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ ->
6119
     *                                                   ß</p>
6120
     *
6121
     * @psalm-pure
6122
     *
6123
     * @return string
6124
     */
6125 49
    public static function str_delimit(
6126
        string $str,
6127
        string $delimiter,
6128
        string $encoding = 'UTF-8',
6129
        bool $clean_utf8 = false,
6130
        string $lang = null,
6131
        bool $try_to_keep_the_string_length = false
6132
    ): string {
6133 49
        if (self::$SUPPORT['mbstring'] === true) {
6134
            /** @noinspection PhpComposerExtensionStubsInspection */
6135 49
            $str = (string) \mb_ereg_replace('\\B(\\p{Lu})', '-\1', \trim($str));
6136
6137 49
            $use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
6138 49
            if ($use_mb_functions && $encoding === 'UTF-8') {
6139 22
                $str = \mb_strtolower($str);
6140
            } else {
6141 27
                $str = self::strtolower($str, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
6142
            }
6143
6144
            /** @noinspection PhpComposerExtensionStubsInspection */
6145 49
            return (string) \mb_ereg_replace('[\\-_\\s]+', $delimiter, $str);
6146
        }
6147
6148
        $str = (string) \preg_replace('/\\B(\\p{Lu})/u', '-\1', \trim($str));
6149
6150
        $use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
6151
        if ($use_mb_functions && $encoding === 'UTF-8') {
6152
            $str = \mb_strtolower($str);
6153
        } else {
6154
            $str = self::strtolower($str, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
6155
        }
6156
6157
        return (string) \preg_replace('/[\\-_\\s]+/u', $delimiter, $str);
6158
    }
6159
6160
    /**
6161
     * Optimized "mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
6162
     *
6163
     * EXAMPLE: <code>
6164
     * UTF8::str_detect_encoding('中文空白'); // 'UTF-8'
6165
     * UTF8::str_detect_encoding('Abc'); // 'ASCII'
6166
     * </code>
6167
     *
6168
     * @param string $str <p>The input string.</p>
6169
     *
6170
     * @psalm-pure
6171
     *
6172
     * @return false|string
6173
     *                      <p>
6174
     *                      The detected string-encoding e.g. UTF-8 or UTF-16BE,<br>
6175
     *                      otherwise it will return false e.g. for BINARY or not detected encoding.
6176
     *                      </p>
6177
     */
6178 31
    public static function str_detect_encoding($str)
6179
    {
6180
        // init
6181 31
        $str = (string) $str;
6182
6183
        //
6184
        // 1.) check binary strings (010001001...) like UTF-16 / UTF-32 / PDF / Images / ...
6185
        //
6186
6187 31
        if (self::is_binary($str, true)) {
6188 11
            $is_utf32 = self::is_utf32($str, false);
6189 11
            if ($is_utf32 === 1) {
6190
                return 'UTF-32LE';
6191
            }
6192 11
            if ($is_utf32 === 2) {
6193 1
                return 'UTF-32BE';
6194
            }
6195
6196 11
            $is_utf16 = self::is_utf16($str, false);
6197 11
            if ($is_utf16 === 1) {
6198 3
                return 'UTF-16LE';
6199
            }
6200 11
            if ($is_utf16 === 2) {
6201 2
                return 'UTF-16BE';
6202
            }
6203
6204
            // is binary but not "UTF-16" or "UTF-32"
6205 9
            return false;
6206
        }
6207
6208
        //
6209
        // 2.) simple check for ASCII chars
6210
        //
6211
6212 27
        if (ASCII::is_ascii($str)) {
6213 10
            return 'ASCII';
6214
        }
6215
6216
        //
6217
        // 3.) simple check for UTF-8 chars
6218
        //
6219
6220 27
        if (self::is_utf8_string($str)) {
6221 19
            return 'UTF-8';
6222
        }
6223
6224
        //
6225
        // 4.) check via "mb_detect_encoding()"
6226
        //
6227
        // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "mb_detect_encoding()"
6228
6229
        $encoding_detecting_order = [
6230 16
            'ISO-8859-1',
6231
            'ISO-8859-2',
6232
            'ISO-8859-3',
6233
            'ISO-8859-4',
6234
            'ISO-8859-5',
6235
            'ISO-8859-6',
6236
            'ISO-8859-7',
6237
            'ISO-8859-8',
6238
            'ISO-8859-9',
6239
            'ISO-8859-10',
6240
            'ISO-8859-13',
6241
            'ISO-8859-14',
6242
            'ISO-8859-15',
6243
            'ISO-8859-16',
6244
            'WINDOWS-1251',
6245
            'WINDOWS-1252',
6246
            'WINDOWS-1254',
6247
            'CP932',
6248
            'CP936',
6249
            'CP950',
6250
            'CP866',
6251
            'CP850',
6252
            'CP51932',
6253
            'CP50220',
6254
            'CP50221',
6255
            'CP50222',
6256
            'ISO-2022-JP',
6257
            'ISO-2022-KR',
6258
            'JIS',
6259
            'JIS-ms',
6260
            'EUC-CN',
6261
            'EUC-JP',
6262
        ];
6263
6264 16
        if (self::$SUPPORT['mbstring'] === true) {
6265
            // info: do not use the symfony polyfill here
6266 16
            $encoding = \mb_detect_encoding($str, $encoding_detecting_order, true);
6267 16
            if ($encoding) {
6268 16
                return $encoding;
6269
            }
6270
        }
6271
6272
        //
6273
        // 5.) check via "iconv()"
6274
        //
6275
6276
        if (self::$ENCODINGS === null) {
6277
            self::$ENCODINGS = self::getData('encodings');
6278
        }
6279
6280
        foreach (self::$ENCODINGS as $encoding_tmp) {
6281
            // INFO: //IGNORE but still throw notice
6282
            /** @noinspection PhpUsageOfSilenceOperatorInspection */
6283
            if ((string) @\iconv($encoding_tmp, $encoding_tmp . '//IGNORE', $str) === $str) {
6284
                return $encoding_tmp;
6285
            }
6286
        }
6287
6288
        return false;
6289
    }
6290
6291
    /**
6292
     * alias for "UTF8::str_ends_with()"
6293
     *
6294
     * @param string $haystack
6295
     * @param string $needle
6296
     *
6297
     * @psalm-pure
6298
     *
6299
     * @return bool
6300
     *
6301
     * @see        UTF8::str_ends_with()
6302
     * @deprecated <p>please use "UTF8::str_ends_with()"</p>
6303
     */
6304 1
    public static function str_ends(string $haystack, string $needle): bool
6305
    {
6306 1
        return self::str_ends_with($haystack, $needle);
6307
    }
6308
6309
    /**
6310
     * Check if the string ends with the given substring.
6311
     *
6312
     * EXAMPLE: <code>
6313
     * UTF8::str_ends_with('BeginMiddleΚόσμε', 'Κόσμε'); // true
6314
     * UTF8::str_ends_with('BeginMiddleΚόσμε', 'κόσμε'); // false
6315
     * </code>
6316
     *
6317
     * @param string $haystack <p>The string to search in.</p>
6318
     * @param string $needle   <p>The substring to search for.</p>
6319
     *
6320
     * @psalm-pure
6321
     *
6322
     * @return bool
6323
     */
6324 9
    public static function str_ends_with(string $haystack, string $needle): bool
6325
    {
6326 9
        if ($needle === '') {
6327 2
            return true;
6328
        }
6329
6330 9
        if ($haystack === '') {
6331
            return false;
6332
        }
6333
6334 9
        return \substr($haystack, -\strlen($needle)) === $needle;
6335
    }
6336
6337
    /**
6338
     * Returns true if the string ends with any of $substrings, false otherwise.
6339
     *
6340
     * - case-sensitive
6341
     *
6342
     * @param string   $str        <p>The input string.</p>
6343
     * @param string[] $substrings <p>Substrings to look for.</p>
6344
     *
6345
     * @psalm-pure
6346
     *
6347
     * @return bool
6348
     *              <p>Whether or not $str ends with $substring.</p>
6349
     */
6350 7
    public static function str_ends_with_any(string $str, array $substrings): bool
6351
    {
6352 7
        if ($substrings === []) {
6353
            return false;
6354
        }
6355
6356 7
        foreach ($substrings as &$substring) {
6357 7
            if (\substr($str, -\strlen($substring)) === $substring) {
6358 7
                return true;
6359
            }
6360
        }
6361
6362 6
        return false;
6363
    }
6364
6365
    /**
6366
     * Ensures that the string begins with $substring. If it doesn't, it's
6367
     * prepended.
6368
     *
6369
     * @param string $str       <p>The input string.</p>
6370
     * @param string $substring <p>The substring to add if not present.</p>
6371
     *
6372
     * @psalm-pure
6373
     *
6374
     * @return string
6375
     */
6376 10
    public static function str_ensure_left(string $str, string $substring): string
6377
    {
6378
        if (
6379 10
            $substring !== ''
6380
            &&
6381 10
            \strpos($str, $substring) === 0
6382
        ) {
6383 6
            return $str;
6384
        }
6385
6386 4
        return $substring . $str;
6387
    }
6388
6389
    /**
6390
     * Ensures that the string ends with $substring. If it doesn't, it's appended.
6391
     *
6392
     * @param string $str       <p>The input string.</p>
6393
     * @param string $substring <p>The substring to add if not present.</p>
6394
     *
6395
     * @psalm-pure
6396
     *
6397
     * @return string
6398
     */
6399 10
    public static function str_ensure_right(string $str, string $substring): string
6400
    {
6401
        if (
6402 10
            $str === ''
6403
            ||
6404 10
            $substring === ''
6405
            ||
6406 10
            \substr($str, -\strlen($substring)) !== $substring
6407
        ) {
6408 4
            $str .= $substring;
6409
        }
6410
6411 10
        return $str;
6412
    }
6413
6414
    /**
6415
     * Capitalizes the first word of the string, replaces underscores with
6416
     * spaces, and strips '_id'.
6417
     *
6418
     * @param string $str
6419
     *
6420
     * @psalm-pure
6421
     *
6422
     * @return string
6423
     */
6424 3
    public static function str_humanize($str): string
6425
    {
6426 3
        $str = \str_replace(
6427
            [
6428 3
                '_id',
6429
                '_',
6430
            ],
6431
            [
6432 3
                '',
6433
                ' ',
6434
            ],
6435 3
            $str
6436
        );
6437
6438 3
        return self::ucfirst(\trim($str));
6439
    }
6440
6441
    /**
6442
     * alias for "UTF8::str_istarts_with()"
6443
     *
6444
     * @param string $haystack
6445
     * @param string $needle
6446
     *
6447
     * @psalm-pure
6448
     *
6449
     * @return bool
6450
     *
6451
     * @see        UTF8::str_istarts_with()
6452
     * @deprecated <p>please use "UTF8::str_istarts_with()"</p>
6453
     */
6454 1
    public static function str_ibegins(string $haystack, string $needle): bool
6455
    {
6456 1
        return self::str_istarts_with($haystack, $needle);
6457
    }
6458
6459
    /**
6460
     * alias for "UTF8::str_iends_with()"
6461
     *
6462
     * @param string $haystack
6463
     * @param string $needle
6464
     *
6465
     * @psalm-pure
6466
     *
6467
     * @return bool
6468
     *
6469
     * @see        UTF8::str_iends_with()
6470
     * @deprecated <p>please use "UTF8::str_iends_with()"</p>
6471
     */
6472 1
    public static function str_iends(string $haystack, string $needle): bool
6473
    {
6474 1
        return self::str_iends_with($haystack, $needle);
6475
    }
6476
6477
    /**
6478
     * Check if the string ends with the given substring, case-insensitive.
6479
     *
6480
     * EXAMPLE: <code>
6481
     * UTF8::str_iends_with('BeginMiddleΚόσμε', 'Κόσμε'); // true
6482
     * UTF8::str_iends_with('BeginMiddleΚόσμε', 'κόσμε'); // true
6483
     * </code>
6484
     *
6485
     * @param string $haystack <p>The string to search in.</p>
6486
     * @param string $needle   <p>The substring to search for.</p>
6487
     *
6488
     * @psalm-pure
6489
     *
6490
     * @return bool
6491
     */
6492 12
    public static function str_iends_with(string $haystack, string $needle): bool
6493
    {
6494 12
        if ($needle === '') {
6495 2
            return true;
6496
        }
6497
6498 12
        if ($haystack === '') {
6499
            return false;
6500
        }
6501
6502 12
        return self::strcasecmp(\substr($haystack, -\strlen($needle)), $needle) === 0;
6503
    }
6504
6505
    /**
6506
     * Returns true if the string ends with any of $substrings, false otherwise.
6507
     *
6508
     * - case-insensitive
6509
     *
6510
     * @param string   $str        <p>The input string.</p>
6511
     * @param string[] $substrings <p>Substrings to look for.</p>
6512
     *
6513
     * @psalm-pure
6514
     *
6515
     * @return bool
6516
     *              <p>Whether or not $str ends with $substring.</p>
6517
     */
6518 4
    public static function str_iends_with_any(string $str, array $substrings): bool
6519
    {
6520 4
        if ($substrings === []) {
6521
            return false;
6522
        }
6523
6524 4
        foreach ($substrings as &$substring) {
6525 4
            if (self::str_iends_with($str, $substring)) {
6526 4
                return true;
6527
            }
6528
        }
6529
6530
        return false;
6531
    }
6532
6533
    /**
6534
     * Returns the index of the first occurrence of $needle in the string,
6535
     * and false if not found. Accepts an optional offset from which to begin
6536
     * the search.
6537
     *
6538
     * @param string $str      <p>The input string.</p>
6539
     * @param string $needle   <p>Substring to look for.</p>
6540
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
6541
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6542
     *
6543
     * @psalm-pure
6544
     *
6545
     * @return false|int
6546
     *                   <p>The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.</p>
6547
     *
6548
     * @see        UTF8::stripos()
6549
     * @deprecated <p>please use "UTF8::stripos()"</p>
6550
     */
6551 1
    public static function str_iindex_first(
6552
        string $str,
6553
        string $needle,
6554
        int $offset = 0,
6555
        string $encoding = 'UTF-8'
6556
    ) {
6557 1
        return self::stripos(
6558 1
            $str,
6559 1
            $needle,
6560 1
            $offset,
6561 1
            $encoding
6562
        );
6563
    }
6564
6565
    /**
6566
     * Returns the index of the last occurrence of $needle in the string,
6567
     * and false if not found. Accepts an optional offset from which to begin
6568
     * the search. Offsets may be negative to count from the last character
6569
     * in the string.
6570
     *
6571
     * @param string $str      <p>The input string.</p>
6572
     * @param string $needle   <p>Substring to look for.</p>
6573
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
6574
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6575
     *
6576
     * @psalm-pure
6577
     *
6578
     * @return false|int
6579
     *                   <p>The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.</p>
6580
     *
6581
     * @see        UTF8::strripos()
6582
     * @deprecated <p>please use "UTF8::strripos()"</p>
6583
     */
6584 10
    public static function str_iindex_last(
6585
        string $str,
6586
        string $needle,
6587
        int $offset = 0,
6588
        string $encoding = 'UTF-8'
6589
    ) {
6590 10
        return self::strripos(
6591 10
            $str,
6592 10
            $needle,
6593 10
            $offset,
6594 10
            $encoding
6595
        );
6596
    }
6597
6598
    /**
6599
     * Returns the index of the first occurrence of $needle in the string,
6600
     * and false if not found. Accepts an optional offset from which to begin
6601
     * the search.
6602
     *
6603
     * @param string $str      <p>The input string.</p>
6604
     * @param string $needle   <p>Substring to look for.</p>
6605
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
6606
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6607
     *
6608
     * @psalm-pure
6609
     *
6610
     * @return false|int
6611
     *                   <p>The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.</p>
6612
     *
6613
     * @see        UTF8::strpos()
6614
     * @deprecated <p>please use "UTF8::strpos()"</p>
6615
     */
6616 11
    public static function str_index_first(
6617
        string $str,
6618
        string $needle,
6619
        int $offset = 0,
6620
        string $encoding = 'UTF-8'
6621
    ) {
6622 11
        return self::strpos(
6623 11
            $str,
6624 11
            $needle,
6625 11
            $offset,
6626 11
            $encoding
6627
        );
6628
    }
6629
6630
    /**
6631
     * Returns the index of the last occurrence of $needle in the string,
6632
     * and false if not found. Accepts an optional offset from which to begin
6633
     * the search. Offsets may be negative to count from the last character
6634
     * in the string.
6635
     *
6636
     * @param string $str      <p>The input string.</p>
6637
     * @param string $needle   <p>Substring to look for.</p>
6638
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
6639
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6640
     *
6641
     * @psalm-pure
6642
     *
6643
     * @return false|int
6644
     *                   <p>The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.</p>
6645
     *
6646
     * @see        UTF8::strrpos()
6647
     * @deprecated <p>please use "UTF8::strrpos()"</p>
6648
     */
6649 10
    public static function str_index_last(
6650
        string $str,
6651
        string $needle,
6652
        int $offset = 0,
6653
        string $encoding = 'UTF-8'
6654
    ) {
6655 10
        return self::strrpos(
6656 10
            $str,
6657 10
            $needle,
6658 10
            $offset,
6659 10
            $encoding
6660
        );
6661
    }
6662
6663
    /**
6664
     * Inserts $substring into the string at the $index provided.
6665
     *
6666
     * @param string $str       <p>The input string.</p>
6667
     * @param string $substring <p>String to be inserted.</p>
6668
     * @param int    $index     <p>The index at which to insert the substring.</p>
6669
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
6670
     *
6671
     * @psalm-pure
6672
     *
6673
     * @return string
6674
     */
6675 8
    public static function str_insert(
6676
        string $str,
6677
        string $substring,
6678
        int $index,
6679
        string $encoding = 'UTF-8'
6680
    ): string {
6681 8
        if ($encoding === 'UTF-8') {
6682 4
            $len = (int) \mb_strlen($str);
6683 4
            if ($index > $len) {
6684
                return $str;
6685
            }
6686
6687
            /** @noinspection UnnecessaryCastingInspection */
6688 4
            return (string) \mb_substr($str, 0, $index) .
6689 4
                   $substring .
6690 4
                   (string) \mb_substr($str, $index, $len);
6691
        }
6692
6693 4
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6694
6695 4
        $len = (int) self::strlen($str, $encoding);
6696 4
        if ($index > $len) {
6697 1
            return $str;
6698
        }
6699
6700 3
        return ((string) self::substr($str, 0, $index, $encoding)) .
6701 3
               $substring .
6702 3
               ((string) self::substr($str, $index, $len, $encoding));
6703
    }
6704
6705
    /**
6706
     * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
6707
     *
6708
     * EXAMPLE: <code>
6709
     * UTF8::str_ireplace('lIzÆ', 'lise', 'Iñtërnâtiônàlizætiøn'); // 'Iñtërnâtiônàlisetiøn'
6710
     * </code>
6711
     *
6712
     * @see http://php.net/manual/en/function.str-ireplace.php
6713
     *
6714
     * @param string|string[] $search      <p>
6715
     *                                     Every replacement with search array is
6716
     *                                     performed on the result of previous replacement.
6717
     *                                     </p>
6718
     * @param string|string[] $replacement <p>The replacement.</p>
6719
     * @param string|string[] $subject     <p>
6720
     *                                     If subject is an array, then the search and
6721
     *                                     replace is performed with every entry of
6722
     *                                     subject, and the return value is an array as
6723
     *                                     well.
6724
     *                                     </p>
6725
     * @param int             $count       [optional] <p>
6726
     *                                     The number of matched and replaced needles will
6727
     *                                     be returned in count which is passed by
6728
     *                                     reference.
6729
     *                                     </p>
6730
     *
6731
     * @psalm-pure
6732
     *
6733
     * @return string|string[]
6734
     *                         <p>A string or an array of replacements.</p>
6735
     *
6736
     * @template TStrIReplaceSubject
6737
     * @psalm-param TStrIReplaceSubject $subject
6738
     * @psalm-return TStrIReplaceSubject
6739
     */
6740 29
    public static function str_ireplace($search, $replacement, $subject, &$count = null)
6741
    {
6742 29
        $search = (array) $search;
6743
6744
        /** @noinspection AlterInForeachInspection */
6745 29
        foreach ($search as &$s) {
6746 29
            $s = (string) $s;
6747 29
            if ($s === '') {
6748 6
                $s = '/^(?<=.)$/';
6749
            } else {
6750 29
                $s = '/' . \preg_quote($s, '/') . '/ui';
6751
            }
6752
        }
6753
6754
        /**
6755
         * @psalm-suppress PossiblyNullArgument
6756
         * @psalm-var TStrIReplaceSubject $subject
6757
         */
6758 29
        $subject = \preg_replace($search, $replacement, $subject, -1, $count);
6759
6760 29
        return $subject;
6761
    }
6762
6763
    /**
6764
     * Replaces $search from the beginning of string with $replacement.
6765
     *
6766
     * @param string $str         <p>The input string.</p>
6767
     * @param string $search      <p>The string to search for.</p>
6768
     * @param string $replacement <p>The replacement.</p>
6769
     *
6770
     * @psalm-pure
6771
     *
6772
     * @return string
6773
     *                <p>The string after the replacement.</p>
6774
     */
6775 17
    public static function str_ireplace_beginning(string $str, string $search, string $replacement): string
6776
    {
6777 17
        if ($str === '') {
6778 4
            if ($replacement === '') {
6779 2
                return '';
6780
            }
6781
6782 2
            if ($search === '') {
6783 2
                return $replacement;
6784
            }
6785
        }
6786
6787 13
        if ($search === '') {
6788 2
            return $str . $replacement;
6789
        }
6790
6791 11
        if (\stripos($str, $search) === 0) {
6792 10
            return $replacement . \substr($str, \strlen($search));
6793
        }
6794
6795 1
        return $str;
6796
    }
6797
6798
    /**
6799
     * Replaces $search from the ending of string with $replacement.
6800
     *
6801
     * @param string $str         <p>The input string.</p>
6802
     * @param string $search      <p>The string to search for.</p>
6803
     * @param string $replacement <p>The replacement.</p>
6804
     *
6805
     * @psalm-pure
6806
     *
6807
     * @return string
6808
     *                <p>The string after the replacement.</p>
6809
     */
6810 17
    public static function str_ireplace_ending(string $str, string $search, string $replacement): string
6811
    {
6812 17
        if ($str === '') {
6813 4
            if ($replacement === '') {
6814 2
                return '';
6815
            }
6816
6817 2
            if ($search === '') {
6818 2
                return $replacement;
6819
            }
6820
        }
6821
6822 13
        if ($search === '') {
6823 2
            return $str . $replacement;
6824
        }
6825
6826 11
        if (\stripos($str, $search, \strlen($str) - \strlen($search)) !== false) {
6827 9
            $str = \substr($str, 0, -\strlen($search)) . $replacement;
6828
        }
6829
6830 11
        return $str;
6831
    }
6832
6833
    /**
6834
     * Check if the string starts with the given substring, case-insensitive.
6835
     *
6836
     * EXAMPLE: <code>
6837
     * UTF8::str_istarts_with('ΚόσμεMiddleEnd', 'Κόσμε'); // true
6838
     * UTF8::str_istarts_with('ΚόσμεMiddleEnd', 'κόσμε'); // true
6839
     * </code>
6840
     *
6841
     * @param string $haystack <p>The string to search in.</p>
6842
     * @param string $needle   <p>The substring to search for.</p>
6843
     *
6844
     * @psalm-pure
6845
     *
6846
     * @return bool
6847
     */
6848 13
    public static function str_istarts_with(string $haystack, string $needle): bool
6849
    {
6850 13
        if ($needle === '') {
6851 2
            return true;
6852
        }
6853
6854 13
        if ($haystack === '') {
6855
            return false;
6856
        }
6857
6858 13
        return self::stripos($haystack, $needle) === 0;
6859
    }
6860
6861
    /**
6862
     * Returns true if the string begins with any of $substrings, false otherwise.
6863
     *
6864
     * - case-insensitive
6865
     *
6866
     * @param string $str        <p>The input string.</p>
6867
     * @param array  $substrings <p>Substrings to look for.</p>
6868
     *
6869
     * @psalm-pure
6870
     *
6871
     * @return bool
6872
     *              <p>Whether or not $str starts with $substring.</p>
6873
     */
6874 5
    public static function str_istarts_with_any(string $str, array $substrings): bool
6875
    {
6876 5
        if ($str === '') {
6877
            return false;
6878
        }
6879
6880 5
        if ($substrings === []) {
6881
            return false;
6882
        }
6883
6884 5
        foreach ($substrings as &$substring) {
6885 5
            if (self::str_istarts_with($str, $substring)) {
6886 5
                return true;
6887
            }
6888
        }
6889
6890 1
        return false;
6891
    }
6892
6893
    /**
6894
     * Gets the substring after the first occurrence of a separator.
6895
     *
6896
     * @param string $str       <p>The input string.</p>
6897
     * @param string $separator <p>The string separator.</p>
6898
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6899
     *
6900
     * @psalm-pure
6901
     *
6902
     * @return string
6903
     */
6904 1
    public static function str_isubstr_after_first_separator(
6905
        string $str,
6906
        string $separator,
6907
        string $encoding = 'UTF-8'
6908
    ): string {
6909 1
        if ($separator === '' || $str === '') {
6910 1
            return '';
6911
        }
6912
6913 1
        $offset = self::stripos($str, $separator);
6914 1
        if ($offset === false) {
6915 1
            return '';
6916
        }
6917
6918 1
        if ($encoding === 'UTF-8') {
6919 1
            return (string) \mb_substr(
6920 1
                $str,
6921 1
                $offset + (int) \mb_strlen($separator)
6922
            );
6923
        }
6924
6925
        return (string) self::substr(
6926
            $str,
6927
            $offset + (int) self::strlen($separator, $encoding),
6928
            null,
6929
            $encoding
6930
        );
6931
    }
6932
6933
    /**
6934
     * Gets the substring after the last occurrence of a separator.
6935
     *
6936
     * @param string $str       <p>The input string.</p>
6937
     * @param string $separator <p>The string separator.</p>
6938
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6939
     *
6940
     * @psalm-pure
6941
     *
6942
     * @return string
6943
     */
6944 1
    public static function str_isubstr_after_last_separator(
6945
        string $str,
6946
        string $separator,
6947
        string $encoding = 'UTF-8'
6948
    ): string {
6949 1
        if ($separator === '' || $str === '') {
6950 1
            return '';
6951
        }
6952
6953 1
        $offset = self::strripos($str, $separator);
6954 1
        if ($offset === false) {
6955 1
            return '';
6956
        }
6957
6958 1
        if ($encoding === 'UTF-8') {
6959 1
            return (string) \mb_substr(
6960 1
                $str,
6961 1
                $offset + (int) self::strlen($separator)
6962
            );
6963
        }
6964
6965
        return (string) self::substr(
6966
            $str,
6967
            $offset + (int) self::strlen($separator, $encoding),
6968
            null,
6969
            $encoding
6970
        );
6971
    }
6972
6973
    /**
6974
     * Gets the substring before the first occurrence of a separator.
6975
     *
6976
     * @param string $str       <p>The input string.</p>
6977
     * @param string $separator <p>The string separator.</p>
6978
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6979
     *
6980
     * @psalm-pure
6981
     *
6982
     * @return string
6983
     */
6984 1
    public static function str_isubstr_before_first_separator(
6985
        string $str,
6986
        string $separator,
6987
        string $encoding = 'UTF-8'
6988
    ): string {
6989 1
        if ($separator === '' || $str === '') {
6990 1
            return '';
6991
        }
6992
6993 1
        $offset = self::stripos($str, $separator);
6994 1
        if ($offset === false) {
6995 1
            return '';
6996
        }
6997
6998 1
        if ($encoding === 'UTF-8') {
6999 1
            return (string) \mb_substr($str, 0, $offset);
7000
        }
7001
7002
        return (string) self::substr($str, 0, $offset, $encoding);
7003
    }
7004
7005
    /**
7006
     * Gets the substring before the last occurrence of a separator.
7007
     *
7008
     * @param string $str       <p>The input string.</p>
7009
     * @param string $separator <p>The string separator.</p>
7010
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7011
     *
7012
     * @psalm-pure
7013
     *
7014
     * @return string
7015
     */
7016 1
    public static function str_isubstr_before_last_separator(
7017
        string $str,
7018
        string $separator,
7019
        string $encoding = 'UTF-8'
7020
    ): string {
7021 1
        if ($separator === '' || $str === '') {
7022 1
            return '';
7023
        }
7024
7025 1
        if ($encoding === 'UTF-8') {
7026 1
            $offset = \mb_strripos($str, $separator);
7027 1
            if ($offset === false) {
7028 1
                return '';
7029
            }
7030
7031 1
            return (string) \mb_substr($str, 0, $offset);
7032
        }
7033
7034
        $offset = self::strripos($str, $separator, 0, $encoding);
7035
        if ($offset === false) {
7036
            return '';
7037
        }
7038
7039
        return (string) self::substr($str, 0, $offset, $encoding);
7040
    }
7041
7042
    /**
7043
     * Gets the substring after (or before via "$before_needle") the first occurrence of the "$needle".
7044
     *
7045
     * @param string $str           <p>The input string.</p>
7046
     * @param string $needle        <p>The string to look for.</p>
7047
     * @param bool   $before_needle [optional] <p>Default: false</p>
7048
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
7049
     *
7050
     * @psalm-pure
7051
     *
7052
     * @return string
7053
     */
7054 2
    public static function str_isubstr_first(
7055
        string $str,
7056
        string $needle,
7057
        bool $before_needle = false,
7058
        string $encoding = 'UTF-8'
7059
    ): string {
7060
        if (
7061 2
            $needle === ''
7062
            ||
7063 2
            $str === ''
7064
        ) {
7065 2
            return '';
7066
        }
7067
7068 2
        $part = self::stristr(
7069 2
            $str,
7070 2
            $needle,
7071 2
            $before_needle,
7072 2
            $encoding
7073
        );
7074 2
        if ($part === false) {
7075 2
            return '';
7076
        }
7077
7078 2
        return $part;
7079
    }
7080
7081
    /**
7082
     * Gets the substring after (or before via "$before_needle") the last occurrence of the "$needle".
7083
     *
7084
     * @param string $str           <p>The input string.</p>
7085
     * @param string $needle        <p>The string to look for.</p>
7086
     * @param bool   $before_needle [optional] <p>Default: false</p>
7087
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
7088
     *
7089
     * @psalm-pure
7090
     *
7091
     * @return string
7092
     */
7093 1
    public static function str_isubstr_last(
7094
        string $str,
7095
        string $needle,
7096
        bool $before_needle = false,
7097
        string $encoding = 'UTF-8'
7098
    ): string {
7099
        if (
7100 1
            $needle === ''
7101
            ||
7102 1
            $str === ''
7103
        ) {
7104 1
            return '';
7105
        }
7106
7107 1
        $part = self::strrichr(
7108 1
            $str,
7109 1
            $needle,
7110 1
            $before_needle,
7111 1
            $encoding
7112
        );
7113 1
        if ($part === false) {
7114 1
            return '';
7115
        }
7116
7117 1
        return $part;
7118
    }
7119
7120
    /**
7121
     * Returns the last $n characters of the string.
7122
     *
7123
     * @param string $str      <p>The input string.</p>
7124
     * @param int    $n        <p>Number of characters to retrieve from the end.</p>
7125
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7126
     *
7127
     * @psalm-pure
7128
     *
7129
     * @return string
7130
     */
7131 12
    public static function str_last_char(
7132
        string $str,
7133
        int $n = 1,
7134
        string $encoding = 'UTF-8'
7135
    ): string {
7136 12
        if ($str === '' || $n <= 0) {
7137 4
            return '';
7138
        }
7139
7140 8
        if ($encoding === 'UTF-8') {
7141 4
            return (string) \mb_substr($str, -$n);
7142
        }
7143
7144 4
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
7145
7146 4
        return (string) self::substr($str, -$n, null, $encoding);
7147
    }
7148
7149
    /**
7150
     * Limit the number of characters in a string.
7151
     *
7152
     * @param string $str        <p>The input string.</p>
7153
     * @param int    $length     [optional] <p>Default: 100</p>
7154
     * @param string $str_add_on [optional] <p>Default: …</p>
7155
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
7156
     *
7157
     * @psalm-pure
7158
     *
7159
     * @return string
7160
     */
7161 2
    public static function str_limit(
7162
        string $str,
7163
        int $length = 100,
7164
        string $str_add_on = '…',
7165
        string $encoding = 'UTF-8'
7166
    ): string {
7167 2
        if ($str === '' || $length <= 0) {
7168 2
            return '';
7169
        }
7170
7171 2
        if ($encoding === 'UTF-8') {
7172 2
            if ((int) \mb_strlen($str) <= $length) {
7173 2
                return $str;
7174
            }
7175
7176
            /** @noinspection UnnecessaryCastingInspection */
7177 2
            return (string) \mb_substr($str, 0, $length - (int) self::strlen($str_add_on)) . $str_add_on;
7178
        }
7179
7180
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
7181
7182
        if ((int) self::strlen($str, $encoding) <= $length) {
7183
            return $str;
7184
        }
7185
7186
        return ((string) self::substr($str, 0, $length - (int) self::strlen($str_add_on), $encoding)) . $str_add_on;
7187
    }
7188
7189
    /**
7190
     * Limit the number of characters in a string, but also after the next word.
7191
     *
7192
     * EXAMPLE: <code>UTF8::str_limit_after_word('fòô bàř fòô', 8, ''); // 'fòô bàř'</code>
7193
     *
7194
     * @param string $str        <p>The input string.</p>
7195
     * @param int    $length     [optional] <p>Default: 100</p>
7196
     * @param string $str_add_on [optional] <p>Default: …</p>
7197
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
7198
     *
7199
     * @psalm-pure
7200
     *
7201
     * @return string
7202
     */
7203 6
    public static function str_limit_after_word(
7204
        string $str,
7205
        int $length = 100,
7206
        string $str_add_on = '…',
7207
        string $encoding = 'UTF-8'
7208
    ): string {
7209 6
        if ($str === '' || $length <= 0) {
7210 2
            return '';
7211
        }
7212
7213 6
        if ($encoding === 'UTF-8') {
7214
            /** @noinspection UnnecessaryCastingInspection */
7215 2
            if ((int) \mb_strlen($str) <= $length) {
7216 2
                return $str;
7217
            }
7218
7219 2
            if (\mb_substr($str, $length - 1, 1) === ' ') {
7220 2
                return ((string) \mb_substr($str, 0, $length - 1)) . $str_add_on;
7221
            }
7222
7223 2
            $str = \mb_substr($str, 0, $length);
7224
7225 2
            $array = \explode(' ', $str, -1);
7226 2
            $new_str = \implode(' ', $array);
7227
7228 2
            if ($new_str === '') {
7229 2
                return ((string) \mb_substr($str, 0, $length - 1)) . $str_add_on;
7230
            }
7231
        } else {
7232 4
            if ((int) self::strlen($str, $encoding) <= $length) {
7233
                return $str;
7234
            }
7235
7236 4
            if (self::substr($str, $length - 1, 1, $encoding) === ' ') {
7237 3
                return ((string) self::substr($str, 0, $length - 1, $encoding)) . $str_add_on;
7238
            }
7239
7240
            /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
7241 1
            $str = self::substr($str, 0, $length, $encoding);
7242
            /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
7243 1
            if ($str === false) {
7244
                return '' . $str_add_on;
7245
            }
7246
7247 1
            $array = \explode(' ', $str, -1);
7248 1
            $new_str = \implode(' ', $array);
7249
7250 1
            if ($new_str === '') {
7251
                return ((string) self::substr($str, 0, $length - 1, $encoding)) . $str_add_on;
7252
            }
7253
        }
7254
7255 3
        return $new_str . $str_add_on;
7256
    }
7257
7258
    /**
7259
     * Returns the longest common prefix between the $str1 and $str2.
7260
     *
7261
     * @param string $str1     <p>The input sting.</p>
7262
     * @param string $str2     <p>Second string for comparison.</p>
7263
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7264
     *
7265
     * @psalm-pure
7266
     *
7267
     * @return string
7268
     */
7269 10
    public static function str_longest_common_prefix(
7270
        string $str1,
7271
        string $str2,
7272
        string $encoding = 'UTF-8'
7273
    ): string {
7274
        // init
7275 10
        $longest_common_prefix = '';
7276
7277 10
        if ($encoding === 'UTF-8') {
7278 5
            $max_length = (int) \min(
7279 5
                \mb_strlen($str1),
7280 5
                \mb_strlen($str2)
7281
            );
7282
7283 5
            for ($i = 0; $i < $max_length; ++$i) {
7284 4
                $char = \mb_substr($str1, $i, 1);
7285
7286
                if (
7287 4
                    $char !== false
7288
                    &&
7289 4
                    $char === \mb_substr($str2, $i, 1)
7290
                ) {
7291 3
                    $longest_common_prefix .= $char;
7292
                } else {
7293 3
                    break;
7294
                }
7295
            }
7296
        } else {
7297 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7298
7299 5
            $max_length = (int) \min(
7300 5
                self::strlen($str1, $encoding),
7301 5
                self::strlen($str2, $encoding)
7302
            );
7303
7304 5
            for ($i = 0; $i < $max_length; ++$i) {
7305 4
                $char = self::substr($str1, $i, 1, $encoding);
7306
7307
                if (
7308 4
                    $char !== false
7309
                    &&
7310 4
                    $char === self::substr($str2, $i, 1, $encoding)
7311
                ) {
7312 3
                    $longest_common_prefix .= $char;
7313
                } else {
7314 3
                    break;
7315
                }
7316
            }
7317
        }
7318
7319 10
        return $longest_common_prefix;
7320
    }
7321
7322
    /**
7323
     * Returns the longest common substring between the $str1 and $str2.
7324
     * In the case of ties, it returns that which occurs first.
7325
     *
7326
     * @param string $str1
7327
     * @param string $str2     <p>Second string for comparison.</p>
7328
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7329
     *
7330
     * @psalm-pure
7331
     *
7332
     * @return string
7333
     *                <p>A string with its $str being the longest common substring.</p>
7334
     */
7335 11
    public static function str_longest_common_substring(
7336
        string $str1,
7337
        string $str2,
7338
        string $encoding = 'UTF-8'
7339
    ): string {
7340 11
        if ($str1 === '' || $str2 === '') {
7341 2
            return '';
7342
        }
7343
7344
        // Uses dynamic programming to solve
7345
        // http://en.wikipedia.org/wiki/Longest_common_substring_problem
7346
7347 9
        if ($encoding === 'UTF-8') {
7348 4
            $str_length = (int) \mb_strlen($str1);
7349 4
            $other_length = (int) \mb_strlen($str2);
7350
        } else {
7351 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7352
7353 5
            $str_length = (int) self::strlen($str1, $encoding);
7354 5
            $other_length = (int) self::strlen($str2, $encoding);
7355
        }
7356
7357
        // Return if either string is empty
7358 9
        if ($str_length === 0 || $other_length === 0) {
7359
            return '';
7360
        }
7361
7362 9
        $len = 0;
7363 9
        $end = 0;
7364 9
        $table = \array_fill(
7365 9
            0,
7366 9
            $str_length + 1,
7367 9
            \array_fill(0, $other_length + 1, 0)
7368
        );
7369
7370 9
        if ($encoding === 'UTF-8') {
7371 9
            for ($i = 1; $i <= $str_length; ++$i) {
7372 9
                for ($j = 1; $j <= $other_length; ++$j) {
7373 9
                    $str_char = \mb_substr($str1, $i - 1, 1);
7374 9
                    $other_char = \mb_substr($str2, $j - 1, 1);
7375
7376 9
                    if ($str_char === $other_char) {
7377 8
                        $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
7378 8
                        if ($table[$i][$j] > $len) {
7379 8
                            $len = $table[$i][$j];
7380 8
                            $end = $i;
7381
                        }
7382
                    } else {
7383 9
                        $table[$i][$j] = 0;
7384
                    }
7385
                }
7386
            }
7387
        } else {
7388
            for ($i = 1; $i <= $str_length; ++$i) {
7389
                for ($j = 1; $j <= $other_length; ++$j) {
7390
                    $str_char = self::substr($str1, $i - 1, 1, $encoding);
7391
                    $other_char = self::substr($str2, $j - 1, 1, $encoding);
7392
7393
                    if ($str_char === $other_char) {
7394
                        $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
7395
                        if ($table[$i][$j] > $len) {
7396
                            $len = $table[$i][$j];
7397
                            $end = $i;
7398
                        }
7399
                    } else {
7400
                        $table[$i][$j] = 0;
7401
                    }
7402
                }
7403
            }
7404
        }
7405
7406 9
        if ($encoding === 'UTF-8') {
7407 9
            return (string) \mb_substr($str1, $end - $len, $len);
7408
        }
7409
7410
        return (string) self::substr($str1, $end - $len, $len, $encoding);
7411
    }
7412
7413
    /**
7414
     * Returns the longest common suffix between the $str1 and $str2.
7415
     *
7416
     * @param string $str1
7417
     * @param string $str2     <p>Second string for comparison.</p>
7418
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7419
     *
7420
     * @psalm-pure
7421
     *
7422
     * @return string
7423
     */
7424 10
    public static function str_longest_common_suffix(
7425
        string $str1,
7426
        string $str2,
7427
        string $encoding = 'UTF-8'
7428
    ): string {
7429 10
        if ($str1 === '' || $str2 === '') {
7430 2
            return '';
7431
        }
7432
7433 8
        if ($encoding === 'UTF-8') {
7434 4
            $max_length = (int) \min(
7435 4
                \mb_strlen($str1, $encoding),
7436 4
                \mb_strlen($str2, $encoding)
7437
            );
7438
7439 4
            $longest_common_suffix = '';
7440 4
            for ($i = 1; $i <= $max_length; ++$i) {
7441 4
                $char = \mb_substr($str1, -$i, 1);
7442
7443
                if (
7444 4
                    $char !== false
7445
                    &&
7446 4
                    $char === \mb_substr($str2, -$i, 1)
7447
                ) {
7448 3
                    $longest_common_suffix = $char . $longest_common_suffix;
7449
                } else {
7450 3
                    break;
7451
                }
7452
            }
7453
        } else {
7454 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7455
7456 4
            $max_length = (int) \min(
7457 4
                self::strlen($str1, $encoding),
7458 4
                self::strlen($str2, $encoding)
7459
            );
7460
7461 4
            $longest_common_suffix = '';
7462 4
            for ($i = 1; $i <= $max_length; ++$i) {
7463 4
                $char = self::substr($str1, -$i, 1, $encoding);
7464
7465
                if (
7466 4
                    $char !== false
7467
                    &&
7468 4
                    $char === self::substr($str2, -$i, 1, $encoding)
7469
                ) {
7470 3
                    $longest_common_suffix = $char . $longest_common_suffix;
7471
                } else {
7472 3
                    break;
7473
                }
7474
            }
7475
        }
7476
7477 8
        return $longest_common_suffix;
7478
    }
7479
7480
    /**
7481
     * Returns true if $str matches the supplied pattern, false otherwise.
7482
     *
7483
     * @param string $str     <p>The input string.</p>
7484
     * @param string $pattern <p>Regex pattern to match against.</p>
7485
     *
7486
     * @psalm-pure
7487
     *
7488
     * @return bool
7489
     *              <p>Whether or not $str matches the pattern.</p>
7490
     */
7491 10
    public static function str_matches_pattern(string $str, string $pattern): bool
7492
    {
7493 10
        return (bool) \preg_match('/' . $pattern . '/u', $str);
7494
    }
7495
7496
    /**
7497
     * Returns whether or not a character exists at an index. Offsets may be
7498
     * negative to count from the last character in the string. Implements
7499
     * part of the ArrayAccess interface.
7500
     *
7501
     * @param string $str      <p>The input string.</p>
7502
     * @param int    $offset   <p>The index to check.</p>
7503
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7504
     *
7505
     * @psalm-pure
7506
     *
7507
     * @return bool
7508
     *              <p>Whether or not the index exists.</p>
7509
     */
7510 6
    public static function str_offset_exists(string $str, int $offset, string $encoding = 'UTF-8'): bool
7511
    {
7512
        // init
7513 6
        $length = (int) self::strlen($str, $encoding);
7514
7515 6
        if ($offset >= 0) {
7516 3
            return $length > $offset;
7517
        }
7518
7519 3
        return $length >= \abs($offset);
7520
    }
7521
7522
    /**
7523
     * Returns the character at the given index. Offsets may be negative to
7524
     * count from the last character in the string. Implements part of the
7525
     * ArrayAccess interface, and throws an OutOfBoundsException if the index
7526
     * does not exist.
7527
     *
7528
     * @param string $str      <p>The input string.</p>
7529
     * @param int    $index    <p>The <strong>index</strong> from which to retrieve the char.</p>
7530
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7531
     *
7532
     * @throws \OutOfBoundsException if the positive or negative offset does not exist
7533
     *
7534
     * @return string
7535
     *                <p>The character at the specified index.</p>
7536
     *
7537
     * @psalm-pure
7538
     */
7539 2
    public static function str_offset_get(string $str, int $index, string $encoding = 'UTF-8'): string
7540
    {
7541
        // init
7542 2
        $length = (int) self::strlen($str);
7543
7544
        if (
7545 2
            ($index >= 0 && $length <= $index)
7546
            ||
7547 2
            $length < \abs($index)
7548
        ) {
7549 1
            throw new \OutOfBoundsException('No character exists at the index');
7550
        }
7551
7552 1
        return self::char_at($str, $index, $encoding);
7553
    }
7554
7555
    /**
7556
     * Pad a UTF-8 string to a given length with another string.
7557
     *
7558
     * EXAMPLE: <code>UTF8::str_pad('中文空白', 10, '_', STR_PAD_BOTH); // '___中文空白___'</code>
7559
     *
7560
     * @param string     $str        <p>The input string.</p>
7561
     * @param int        $pad_length <p>The length of return string.</p>
7562
     * @param string     $pad_string [optional] <p>String to use for padding the input string.</p>
7563
     * @param int|string $pad_type   [optional] <p>
7564
     *                               Can be <strong>STR_PAD_RIGHT</strong> (default), [or string "right"]<br>
7565
     *                               <strong>STR_PAD_LEFT</strong> [or string "left"] or<br>
7566
     *                               <strong>STR_PAD_BOTH</strong> [or string "both"]
7567
     *                               </p>
7568
     * @param string     $encoding   [optional] <p>Default: 'UTF-8'</p>
7569
     *
7570
     * @psalm-pure
7571
     *
7572
     * @return string
7573
     *                <p>Returns the padded string.</p>
7574
     */
7575 41
    public static function str_pad(
7576
        string $str,
7577
        int $pad_length,
7578
        string $pad_string = ' ',
7579
        $pad_type = \STR_PAD_RIGHT,
7580
        string $encoding = 'UTF-8'
7581
    ): string {
7582 41
        if ($pad_length === 0 || $pad_string === '') {
7583 1
            return $str;
7584
        }
7585
7586 41
        if ($pad_type !== (int) $pad_type) {
7587 13
            if ($pad_type === 'left') {
7588 3
                $pad_type = \STR_PAD_LEFT;
7589 10
            } elseif ($pad_type === 'right') {
7590 6
                $pad_type = \STR_PAD_RIGHT;
7591 4
            } elseif ($pad_type === 'both') {
7592 3
                $pad_type = \STR_PAD_BOTH;
7593
            } else {
7594 1
                throw new \InvalidArgumentException(
7595 1
                    'Pad expects $pad_type to be "STR_PAD_*" or ' . "to be one of 'left', 'right' or 'both'"
7596
                );
7597
            }
7598
        }
7599
7600 40
        if ($encoding === 'UTF-8') {
7601 25
            $str_length = (int) \mb_strlen($str);
7602
7603 25
            if ($pad_length >= $str_length) {
7604
                switch ($pad_type) {
7605 25
                    case \STR_PAD_LEFT:
7606 8
                        $ps_length = (int) \mb_strlen($pad_string);
7607
7608 8
                        $diff = ($pad_length - $str_length);
7609
7610 8
                        $pre = (string) \mb_substr(
7611 8
                            \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
7612 8
                            0,
7613 8
                            $diff
7614
                        );
7615 8
                        $post = '';
7616
7617 8
                        break;
7618
7619 20
                    case \STR_PAD_BOTH:
7620 14
                        $diff = ($pad_length - $str_length);
7621
7622 14
                        $ps_length_left = (int) \floor($diff / 2);
7623
7624 14
                        $ps_length_right = (int) \ceil($diff / 2);
7625
7626 14
                        $pre = (string) \mb_substr(
7627 14
                            \str_repeat($pad_string, $ps_length_left),
7628 14
                            0,
7629 14
                            $ps_length_left
7630
                        );
7631 14
                        $post = (string) \mb_substr(
7632 14
                            \str_repeat($pad_string, $ps_length_right),
7633 14
                            0,
7634 14
                            $ps_length_right
7635
                        );
7636
7637 14
                        break;
7638
7639 9
                    case \STR_PAD_RIGHT:
7640
                    default:
7641 9
                        $ps_length = (int) \mb_strlen($pad_string);
7642
7643 9
                        $diff = ($pad_length - $str_length);
7644
7645 9
                        $post = (string) \mb_substr(
7646 9
                            \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
7647 9
                            0,
7648 9
                            $diff
7649
                        );
7650 9
                        $pre = '';
7651
                }
7652
7653 25
                return $pre . $str . $post;
7654
            }
7655
7656 3
            return $str;
7657
        }
7658
7659 15
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
7660
7661 15
        $str_length = (int) self::strlen($str, $encoding);
7662
7663 15
        if ($pad_length >= $str_length) {
7664
            switch ($pad_type) {
7665 14
                case \STR_PAD_LEFT:
7666 5
                    $ps_length = (int) self::strlen($pad_string, $encoding);
7667
7668 5
                    $diff = ($pad_length - $str_length);
7669
7670 5
                    $pre = (string) self::substr(
7671 5
                        \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
7672 5
                        0,
7673 5
                        $diff,
7674 5
                        $encoding
7675
                    );
7676 5
                    $post = '';
7677
7678 5
                    break;
7679
7680 9
                case \STR_PAD_BOTH:
7681 3
                    $diff = ($pad_length - $str_length);
7682
7683 3
                    $ps_length_left = (int) \floor($diff / 2);
7684
7685 3
                    $ps_length_right = (int) \ceil($diff / 2);
7686
7687 3
                    $pre = (string) self::substr(
7688 3
                        \str_repeat($pad_string, $ps_length_left),
7689 3
                        0,
7690 3
                        $ps_length_left,
7691 3
                        $encoding
7692
                    );
7693 3
                    $post = (string) self::substr(
7694 3
                        \str_repeat($pad_string, $ps_length_right),
7695 3
                        0,
7696 3
                        $ps_length_right,
7697 3
                        $encoding
7698
                    );
7699
7700 3
                    break;
7701
7702 6
                case \STR_PAD_RIGHT:
7703
                default:
7704 6
                    $ps_length = (int) self::strlen($pad_string, $encoding);
7705
7706 6
                    $diff = ($pad_length - $str_length);
7707
7708 6
                    $post = (string) self::substr(
7709 6
                        \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
7710 6
                        0,
7711 6
                        $diff,
7712 6
                        $encoding
7713
                    );
7714 6
                    $pre = '';
7715
            }
7716
7717 14
            return $pre . $str . $post;
7718
        }
7719
7720 1
        return $str;
7721
    }
7722
7723
    /**
7724
     * Returns a new string of a given length such that both sides of the
7725
     * string are padded. Alias for "UTF8::str_pad()" with a $pad_type of 'both'.
7726
     *
7727
     * @param string $str
7728
     * @param int    $length   <p>Desired string length after padding.</p>
7729
     * @param string $pad_str  [optional] <p>String used to pad, defaults to space. Default: ' '</p>
7730
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7731
     *
7732
     * @psalm-pure
7733
     *
7734
     * @return string
7735
     *                <p>The string with padding applied.</p>
7736
     */
7737 11
    public static function str_pad_both(
7738
        string $str,
7739
        int $length,
7740
        string $pad_str = ' ',
7741
        string $encoding = 'UTF-8'
7742
    ): string {
7743 11
        return self::str_pad(
7744 11
            $str,
7745 11
            $length,
7746 11
            $pad_str,
7747 11
            \STR_PAD_BOTH,
7748 11
            $encoding
7749
        );
7750
    }
7751
7752
    /**
7753
     * Returns a new string of a given length such that the beginning of the
7754
     * string is padded. Alias for "UTF8::str_pad()" with a $pad_type of 'left'.
7755
     *
7756
     * @param string $str
7757
     * @param int    $length   <p>Desired string length after padding.</p>
7758
     * @param string $pad_str  [optional] <p>String used to pad, defaults to space. Default: ' '</p>
7759
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7760
     *
7761
     * @psalm-pure
7762
     *
7763
     * @return string
7764
     *                <p>The string with left padding.</p>
7765
     */
7766 7
    public static function str_pad_left(
7767
        string $str,
7768
        int $length,
7769
        string $pad_str = ' ',
7770
        string $encoding = 'UTF-8'
7771
    ): string {
7772 7
        return self::str_pad(
7773 7
            $str,
7774 7
            $length,
7775 7
            $pad_str,
7776 7
            \STR_PAD_LEFT,
7777 7
            $encoding
7778
        );
7779
    }
7780
7781
    /**
7782
     * Returns a new string of a given length such that the end of the string
7783
     * is padded. Alias for "UTF8::str_pad()" with a $pad_type of 'right'.
7784
     *
7785
     * @param string $str
7786
     * @param int    $length   <p>Desired string length after padding.</p>
7787
     * @param string $pad_str  [optional] <p>String used to pad, defaults to space. Default: ' '</p>
7788
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7789
     *
7790
     * @psalm-pure
7791
     *
7792
     * @return string
7793
     *                <p>The string with right padding.</p>
7794
     */
7795 7
    public static function str_pad_right(
7796
        string $str,
7797
        int $length,
7798
        string $pad_str = ' ',
7799
        string $encoding = 'UTF-8'
7800
    ): string {
7801 7
        return self::str_pad(
7802 7
            $str,
7803 7
            $length,
7804 7
            $pad_str,
7805 7
            \STR_PAD_RIGHT,
7806 7
            $encoding
7807
        );
7808
    }
7809
7810
    /**
7811
     * Repeat a string.
7812
     *
7813
     * EXAMPLE: <code>UTF8::str_repeat("°~\xf0\x90\x28\xbc", 2); // '°~ð(¼°~ð(¼'</code>
7814
     *
7815
     * @param string $str        <p>
7816
     *                           The string to be repeated.
7817
     *                           </p>
7818
     * @param int    $multiplier <p>
7819
     *                           Number of time the input string should be
7820
     *                           repeated.
7821
     *                           </p>
7822
     *                           <p>
7823
     *                           multiplier has to be greater than or equal to 0.
7824
     *                           If the multiplier is set to 0, the function
7825
     *                           will return an empty string.
7826
     *                           </p>
7827
     *
7828
     * @psalm-pure
7829
     *
7830
     * @return string
7831
     *                <p>The repeated string.</p>
7832
     */
7833 9
    public static function str_repeat(string $str, int $multiplier): string
7834
    {
7835 9
        $str = self::filter($str);
7836
7837 9
        return \str_repeat($str, $multiplier);
7838
    }
7839
7840
    /**
7841
     * INFO: This is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe.
7842
     *
7843
     * Replace all occurrences of the search string with the replacement string
7844
     *
7845
     * @see http://php.net/manual/en/function.str-replace.php
7846
     *
7847
     * @param string|string[] $search  <p>
7848
     *                                 The value being searched for, otherwise known as the needle.
7849
     *                                 An array may be used to designate multiple needles.
7850
     *                                 </p>
7851
     * @param string|string[] $replace <p>
7852
     *                                 The replacement value that replaces found search
7853
     *                                 values. An array may be used to designate multiple replacements.
7854
     *                                 </p>
7855
     * @param string|string[] $subject <p>
7856
     *                                 The string or array of strings being searched and replaced on,
7857
     *                                 otherwise known as the haystack.
7858
     *                                 </p>
7859
     *                                 <p>
7860
     *                                 If subject is an array, then the search and
7861
     *                                 replace is performed with every entry of
7862
     *                                 subject, and the return value is an array as
7863
     *                                 well.
7864
     *                                 </p>
7865
     * @param int|null        $count   [optional] If passed, this will hold the number of matched and replaced needles
7866
     *
7867
     * @psalm-pure
7868
     *
7869
     * @return string|string[]
7870
     *                         <p>This function returns a string or an array with the replaced values.</p>
7871
     *
7872
     * @template TStrReplaceSubject
7873
     * @psalm-param TStrReplaceSubject $subject
7874
     * @psalm-return TStrReplaceSubject
7875
     *
7876
     * @deprecated please use \str_replace() instead
7877
     */
7878 12
    public static function str_replace(
7879
        $search,
7880
        $replace,
7881
        $subject,
7882
        int &$count = null
7883
    ) {
7884
        /**
7885
         * @psalm-suppress PossiblyNullArgument
7886
         * @psalm-var TStrReplaceSubject $return;
7887
         */
7888 12
        $return = \str_replace(
7889 12
            $search,
7890 12
            $replace,
7891 12
            $subject,
7892 12
            $count
7893
        );
7894
7895 12
        return $return;
7896
    }
7897
7898
    /**
7899
     * Replaces $search from the beginning of string with $replacement.
7900
     *
7901
     * @param string $str         <p>The input string.</p>
7902
     * @param string $search      <p>The string to search for.</p>
7903
     * @param string $replacement <p>The replacement.</p>
7904
     *
7905
     * @psalm-pure
7906
     *
7907
     * @return string
7908
     *                <p>A string after the replacements.</p>
7909
     */
7910 17
    public static function str_replace_beginning(
7911
        string $str,
7912
        string $search,
7913
        string $replacement
7914
    ): string {
7915 17
        if ($str === '') {
7916 4
            if ($replacement === '') {
7917 2
                return '';
7918
            }
7919
7920 2
            if ($search === '') {
7921 2
                return $replacement;
7922
            }
7923
        }
7924
7925 13
        if ($search === '') {
7926 2
            return $str . $replacement;
7927
        }
7928
7929 11
        if (\strpos($str, $search) === 0) {
7930 9
            return $replacement . \substr($str, \strlen($search));
7931
        }
7932
7933 2
        return $str;
7934
    }
7935
7936
    /**
7937
     * Replaces $search from the ending of string with $replacement.
7938
     *
7939
     * @param string $str         <p>The input string.</p>
7940
     * @param string $search      <p>The string to search for.</p>
7941
     * @param string $replacement <p>The replacement.</p>
7942
     *
7943
     * @psalm-pure
7944
     *
7945
     * @return string
7946
     *                <p>A string after the replacements.</p>
7947
     */
7948 17
    public static function str_replace_ending(
7949
        string $str,
7950
        string $search,
7951
        string $replacement
7952
    ): string {
7953 17
        if ($str === '') {
7954 4
            if ($replacement === '') {
7955 2
                return '';
7956
            }
7957
7958 2
            if ($search === '') {
7959 2
                return $replacement;
7960
            }
7961
        }
7962
7963 13
        if ($search === '') {
7964 2
            return $str . $replacement;
7965
        }
7966
7967 11
        if (\strpos($str, $search, \strlen($str) - \strlen($search)) !== false) {
7968 8
            $str = \substr($str, 0, -\strlen($search)) . $replacement;
7969
        }
7970
7971 11
        return $str;
7972
    }
7973
7974
    /**
7975
     * Replace the first "$search"-term with the "$replace"-term.
7976
     *
7977
     * @param string $search
7978
     * @param string $replace
7979
     * @param string $subject
7980
     *
7981
     * @psalm-pure
7982
     *
7983
     * @return string
7984
     *
7985
     * @psalm-suppress InvalidReturnType
7986
     */
7987 2
    public static function str_replace_first(
7988
        string $search,
7989
        string $replace,
7990
        string $subject
7991
    ): string {
7992 2
        $pos = self::strpos($subject, $search);
7993
7994 2
        if ($pos !== false) {
7995
            /**
7996
             * @psalm-suppress InvalidReturnStatement
7997
             */
7998 2
            return self::substr_replace(
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr_repl...)self::strlen($search)) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
7999 2
                $subject,
8000 2
                $replace,
8001 2
                $pos,
8002 2
                (int) self::strlen($search)
8003
            );
8004
        }
8005
8006 2
        return $subject;
8007
    }
8008
8009
    /**
8010
     * Replace the last "$search"-term with the "$replace"-term.
8011
     *
8012
     * @param string $search
8013
     * @param string $replace
8014
     * @param string $subject
8015
     *
8016
     * @psalm-pure
8017
     *
8018
     * @return string
8019
     *
8020
     * @psalm-suppress InvalidReturnType
8021
     */
8022 2
    public static function str_replace_last(
8023
        string $search,
8024
        string $replace,
8025
        string $subject
8026
    ): string {
8027 2
        $pos = self::strrpos($subject, $search);
8028 2
        if ($pos !== false) {
8029
            /**
8030
             * @psalm-suppress InvalidReturnStatement
8031
             */
8032 2
            return self::substr_replace(
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr_repl...)self::strlen($search)) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
8033 2
                $subject,
8034 2
                $replace,
8035 2
                $pos,
8036 2
                (int) self::strlen($search)
8037
            );
8038
        }
8039
8040 2
        return $subject;
8041
    }
8042
8043
    /**
8044
     * Shuffles all the characters in the string.
8045
     *
8046
     * INFO: uses random algorithm which is weak for cryptography purposes
8047
     *
8048
     * EXAMPLE: <code>UTF8::str_shuffle('fòô bàř fòô'); // 'àòôřb ffòô '</code>
8049
     *
8050
     * @param string $str      <p>The input string</p>
8051
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8052
     *
8053
     * @return string
8054
     *                <p>The shuffled string.</p>
8055
     */
8056 5
    public static function str_shuffle(string $str, string $encoding = 'UTF-8'): string
8057
    {
8058 5
        if ($encoding === 'UTF-8') {
8059 5
            $indexes = \range(0, (int) \mb_strlen($str) - 1);
8060
            /** @noinspection NonSecureShuffleUsageInspection */
8061 5
            \shuffle($indexes);
8062
8063
            // init
8064 5
            $shuffled_str = '';
8065
8066 5
            foreach ($indexes as &$i) {
8067 5
                $tmp_sub_str = \mb_substr($str, $i, 1);
8068 5
                if ($tmp_sub_str !== false) {
8069 5
                    $shuffled_str .= $tmp_sub_str;
8070
                }
8071
            }
8072
        } else {
8073
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8074
8075
            $indexes = \range(0, (int) self::strlen($str, $encoding) - 1);
8076
            /** @noinspection NonSecureShuffleUsageInspection */
8077
            \shuffle($indexes);
8078
8079
            // init
8080
            $shuffled_str = '';
8081
8082
            foreach ($indexes as &$i) {
8083
                $tmp_sub_str = self::substr($str, $i, 1, $encoding);
8084
                if ($tmp_sub_str !== false) {
8085
                    $shuffled_str .= $tmp_sub_str;
8086
                }
8087
            }
8088
        }
8089
8090 5
        return $shuffled_str;
8091
    }
8092
8093
    /**
8094
     * Returns the substring beginning at $start, and up to, but not including
8095
     * the index specified by $end. If $end is omitted, the function extracts
8096
     * the remaining string. If $end is negative, it is computed from the end
8097
     * of the string.
8098
     *
8099
     * @param string   $str
8100
     * @param int      $start    <p>Initial index from which to begin extraction.</p>
8101
     * @param int|null $end      [optional] <p>Index at which to end extraction. Default: null</p>
8102
     * @param string   $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8103
     *
8104
     * @psalm-pure
8105
     *
8106
     * @return false|string
8107
     *                      <p>The extracted substring.</p><p>If <i>str</i> is shorter than <i>start</i>
8108
     *                      characters long, <b>FALSE</b> will be returned.
8109
     */
8110 18
    public static function str_slice(
8111
        string $str,
8112
        int $start,
8113
        int $end = null,
8114
        string $encoding = 'UTF-8'
8115
    ) {
8116 18
        if ($encoding === 'UTF-8') {
8117 7
            if ($end === null) {
8118 1
                $length = (int) \mb_strlen($str);
8119 6
            } elseif ($end >= 0 && $end <= $start) {
8120 2
                return '';
8121 4
            } elseif ($end < 0) {
8122 1
                $length = (int) \mb_strlen($str) + $end - $start;
8123
            } else {
8124 3
                $length = $end - $start;
8125
            }
8126
8127 5
            return \mb_substr($str, $start, $length);
8128
        }
8129
8130 11
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
8131
8132 11
        if ($end === null) {
8133 5
            $length = (int) self::strlen($str, $encoding);
8134 6
        } elseif ($end >= 0 && $end <= $start) {
8135 2
            return '';
8136 4
        } elseif ($end < 0) {
8137 1
            $length = (int) self::strlen($str, $encoding) + $end - $start;
8138
        } else {
8139 3
            $length = $end - $start;
8140
        }
8141
8142 9
        return self::substr($str, $start, $length, $encoding);
8143
    }
8144
8145
    /**
8146
     * Convert a string to e.g.: "snake_case"
8147
     *
8148
     * @param string $str
8149
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8150
     *
8151
     * @psalm-pure
8152
     *
8153
     * @return string
8154
     *                <p>A string in snake_case.</p>
8155
     */
8156 22
    public static function str_snakeize(string $str, string $encoding = 'UTF-8'): string
8157
    {
8158 22
        if ($str === '') {
8159
            return '';
8160
        }
8161
8162 22
        $str = \str_replace(
8163 22
            '-',
8164 22
            '_',
8165 22
            self::normalize_whitespace($str)
8166
        );
8167
8168 22
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8169 19
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8170
        }
8171
8172 22
        $str = (string) \preg_replace_callback(
8173 22
            '/([\\p{N}|\\p{Lu}])/u',
8174
            /**
8175
             * @param string[] $matches
8176
             *
8177
             * @psalm-pure
8178
             *
8179
             * @return string
8180
             */
8181
            static function (array $matches) use ($encoding): string {
8182 9
                $match = $matches[1];
8183 9
                $match_int = (int) $match;
8184
8185 9
                if ((string) $match_int === $match) {
8186 4
                    return '_' . $match . '_';
8187
                }
8188
8189 5
                if ($encoding === 'UTF-8') {
8190 5
                    return '_' . \mb_strtolower($match);
8191
                }
8192
8193
                return '_' . self::strtolower($match, $encoding);
8194 22
            },
8195 22
            $str
8196
        );
8197
8198 22
        $str = (string) \preg_replace(
8199
            [
8200 22
                '/\\s+/u',           // convert spaces to "_"
8201
                '/^\\s+|\\s+$/u', // trim leading & trailing spaces
8202
                '/_+/',                 // remove double "_"
8203
            ],
8204
            [
8205 22
                '_',
8206
                '',
8207
                '_',
8208
            ],
8209 22
            $str
8210
        );
8211
8212 22
        return \trim(\trim($str, '_')); // trim leading & trailing "_" + whitespace
8213
    }
8214
8215
    /**
8216
     * Sort all characters according to code points.
8217
     *
8218
     * EXAMPLE: <code>UTF8::str_sort('  -ABC-中文空白-  '); // '    ---ABC中文白空'</code>
8219
     *
8220
     * @param string $str    <p>A UTF-8 string.</p>
8221
     * @param bool   $unique <p>Sort unique. If <strong>true</strong>, repeated characters are ignored.</p>
8222
     * @param bool   $desc   <p>If <strong>true</strong>, will sort characters in reverse code point order.</p>
8223
     *
8224
     * @psalm-pure
8225
     *
8226
     * @return string
8227
     *                <p>A string of sorted characters.</p>
8228
     */
8229 2
    public static function str_sort(string $str, bool $unique = false, bool $desc = false): string
8230
    {
8231 2
        $array = self::codepoints($str);
8232
8233 2
        if ($unique) {
8234 2
            $array = \array_flip(\array_flip($array));
8235
        }
8236
8237 2
        if ($desc) {
8238 2
            \arsort($array);
0 ignored issues
show
Bug introduced by
It seems like $array can also be of type null; however, parameter $array of arsort() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8238
            \arsort(/** @scrutinizer ignore-type */ $array);
Loading history...
8239
        } else {
8240 2
            \asort($array);
0 ignored issues
show
Bug introduced by
It seems like $array can also be of type null; however, parameter $array of asort() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8240
            \asort(/** @scrutinizer ignore-type */ $array);
Loading history...
8241
        }
8242
8243 2
        return self::string($array);
8244
    }
8245
8246
    /**
8247
     * Convert a string to an array of Unicode characters.
8248
     *
8249
     * EXAMPLE: <code>
8250
     * UTF8::str_split_array(['中文空白', 'test'], 2); // [['中文', '空白'], ['te', 'st']]
8251
     * </code>
8252
     *
8253
     * @param int[]|string[] $input                   <p>The string[] or int[] to split into array.</p>
8254
     * @param int            $length                  [optional] <p>Max character length of each array
8255
     *                                                lement.</p>
8256
     * @param bool           $clean_utf8              [optional] <p>Remove non UTF-8 chars from the
8257
     *                                                string.</p>
8258
     * @param bool           $try_to_use_mb_functions [optional] <p>Set to false, if you don't want to use
8259
     *                                                "mb_substr"</p>
8260
     *
8261
     * @psalm-pure
8262
     *
8263
     * @return string[][]
8264
     *                    <p>An array containing chunks of the input.</p>
8265
     */
8266 1
    public static function str_split_array(
8267
        array $input,
8268
        int $length = 1,
8269
        bool $clean_utf8 = false,
8270
        bool $try_to_use_mb_functions = true
8271
    ): array {
8272 1
        foreach ($input as $k => &$v) {
8273 1
            $v = self::str_split(
8274 1
                $v,
8275 1
                $length,
8276 1
                $clean_utf8,
8277 1
                $try_to_use_mb_functions
8278
            );
8279
        }
8280
8281
        /** @var string[][] $input */
8282 1
        return $input;
8283
    }
8284
8285
    /**
8286
     * Convert a string to an array of unicode characters.
8287
     *
8288
     * EXAMPLE: <code>UTF8::str_split('中文空白'); // array('中', '文', '空', '白')</code>
8289
     *
8290
     * @param int|string $input                   <p>The string or int to split into array.</p>
8291
     * @param int        $length                  [optional] <p>Max character length of each array
8292
     *                                            element.</p>
8293
     * @param bool       $clean_utf8              [optional] <p>Remove non UTF-8 chars from the
8294
     *                                            string.</p>
8295
     * @param bool       $try_to_use_mb_functions [optional] <p>Set to false, if you don't want to use
8296
     *                                            "mb_substr"</p>
8297
     *
8298
     * @psalm-pure
8299
     *
8300
     * @return string[]
8301
     *                  <p>An array containing chunks of chars from the input.</p>
8302
     *
8303
     * @noinspection SuspiciousBinaryOperationInspection
8304
     * @noinspection OffsetOperationsInspection
8305
     */
8306 89
    public static function str_split(
8307
        $input,
8308
        int $length = 1,
8309
        bool $clean_utf8 = false,
8310
        bool $try_to_use_mb_functions = true
8311
    ): array {
8312 89
        if ($length <= 0) {
8313 3
            return [];
8314
        }
8315
8316
        // this is only an old fallback
8317
        /** @noinspection PhpSillyAssignmentInspection - hack for phpstan */
8318
        /** @var int|int[]|string|string[] $input */
8319 88
        $input = $input;
8320 88
        if (\is_array($input)) {
8321
            /**
8322
             * @psalm-suppress InvalidReturnStatement
8323
             */
8324
            return self::str_split_array(
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::str_split_a...ry_to_use_mb_functions) returns the type array<mixed,string[]> which is incompatible with the documented return type string[].
Loading history...
8325
                $input,
8326
                $length,
8327
                $clean_utf8,
8328
                $try_to_use_mb_functions
8329
            );
8330
        }
8331
8332
        // init
8333 88
        $input = (string) $input;
8334
8335 88
        if ($input === '') {
8336 13
            return [];
8337
        }
8338
8339 85
        if ($clean_utf8) {
8340 19
            $input = self::clean($input);
8341
        }
8342
8343
        if (
8344 85
            $try_to_use_mb_functions
8345
            &&
8346 85
            self::$SUPPORT['mbstring'] === true
8347
        ) {
8348 81
            if (Bootup::is_php('7.4')) {
8349
                /**
8350
                 * @psalm-suppress ImpureFunctionCall - why?
8351
                 */
8352
                $return = \mb_str_split($input, $length);
8353
                if ($return !== false) {
8354
                    return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
8355
                }
8356
            }
8357
8358 81
            $i_max = \mb_strlen($input);
8359 81
            if ($i_max <= 127) {
8360 75
                $ret = [];
8361 75
                for ($i = 0; $i < $i_max; ++$i) {
8362 75
                    $ret[] = \mb_substr($input, $i, 1);
8363
                }
8364
            } else {
8365 16
                $return_array = [];
8366 16
                \preg_match_all('/./us', $input, $return_array);
8367 81
                $ret = $return_array[0] ?? [];
8368
            }
8369 23
        } elseif (self::$SUPPORT['pcre_utf8'] === true) {
8370 17
            $return_array = [];
8371 17
            \preg_match_all('/./us', $input, $return_array);
8372 17
            $ret = $return_array[0] ?? [];
8373
        } else {
8374
8375
            // fallback
8376
8377 8
            $ret = [];
8378 8
            $len = \strlen($input);
8379
8380
            /** @noinspection ForeachInvariantsInspection */
8381 8
            for ($i = 0; $i < $len; ++$i) {
8382 8
                if (($input[$i] & "\x80") === "\x00") {
8383 8
                    $ret[] = $input[$i];
8384
                } elseif (
8385 8
                    isset($input[$i + 1])
8386
                    &&
8387 8
                    ($input[$i] & "\xE0") === "\xC0"
8388
                ) {
8389 4
                    if (($input[$i + 1] & "\xC0") === "\x80") {
8390 4
                        $ret[] = $input[$i] . $input[$i + 1];
8391
8392 4
                        ++$i;
8393
                    }
8394
                } elseif (
8395 6
                    isset($input[$i + 2])
8396
                    &&
8397 6
                    ($input[$i] & "\xF0") === "\xE0"
8398
                ) {
8399
                    if (
8400 6
                        ($input[$i + 1] & "\xC0") === "\x80"
8401
                        &&
8402 6
                        ($input[$i + 2] & "\xC0") === "\x80"
8403
                    ) {
8404 6
                        $ret[] = $input[$i] . $input[$i + 1] . $input[$i + 2];
8405
8406 6
                        $i += 2;
8407
                    }
8408
                } elseif (
8409
                    isset($input[$i + 3])
8410
                    &&
8411
                    ($input[$i] & "\xF8") === "\xF0"
8412
                ) {
8413
                    if (
8414
                        ($input[$i + 1] & "\xC0") === "\x80"
8415
                        &&
8416
                        ($input[$i + 2] & "\xC0") === "\x80"
8417
                        &&
8418
                        ($input[$i + 3] & "\xC0") === "\x80"
8419
                    ) {
8420
                        $ret[] = $input[$i] . $input[$i + 1] . $input[$i + 2] . $input[$i + 3];
8421
8422
                        $i += 3;
8423
                    }
8424
                }
8425
            }
8426
        }
8427
8428 85
        if ($length > 1) {
8429 11
            $ret = \array_chunk($ret, $length);
8430
8431 11
            return \array_map(
8432
                static function (array &$item): string {
8433 11
                    return \implode('', $item);
8434 11
                },
8435 11
                $ret
8436
            );
8437
        }
8438
8439 78
        if (isset($ret[0]) && $ret[0] === '') {
8440
            return [];
8441
        }
8442
8443 78
        return $ret;
8444
    }
8445
8446
    /**
8447
     * Splits the string with the provided regular expression, returning an
8448
     * array of strings. An optional integer $limit will truncate the
8449
     * results.
8450
     *
8451
     * @param string $str
8452
     * @param string $pattern <p>The regex with which to split the string.</p>
8453
     * @param int    $limit   [optional] <p>Maximum number of results to return. Default: -1 === no limit</p>
8454
     *
8455
     * @psalm-pure
8456
     *
8457
     * @return string[]
8458
     *                  <p>An array of strings.</p>
8459
     */
8460 16
    public static function str_split_pattern(string $str, string $pattern, int $limit = -1): array
8461
    {
8462 16
        if ($limit === 0) {
8463 2
            return [];
8464
        }
8465
8466 14
        if ($pattern === '') {
8467 1
            return [$str];
8468
        }
8469
8470 13
        if (self::$SUPPORT['mbstring'] === true) {
8471 13
            if ($limit >= 0) {
8472
                /** @noinspection PhpComposerExtensionStubsInspection */
8473 8
                $result_tmp = \mb_split($pattern, $str);
8474
8475 8
                $result = [];
8476 8
                foreach ($result_tmp as $item_tmp) {
8477 8
                    if ($limit === 0) {
8478 4
                        break;
8479
                    }
8480 8
                    --$limit;
8481
8482 8
                    $result[] = $item_tmp;
8483
                }
8484
8485 8
                return $result;
8486
            }
8487
8488
            /** @noinspection PhpComposerExtensionStubsInspection */
8489 5
            return \mb_split($pattern, $str);
8490
        }
8491
8492
        if ($limit > 0) {
8493
            ++$limit;
8494
        } else {
8495
            $limit = -1;
8496
        }
8497
8498
        $array = \preg_split('/' . \preg_quote($pattern, '/') . '/u', $str, $limit);
8499
8500
        if ($array === false) {
8501
            return [];
8502
        }
8503
8504
        if ($limit > 0 && \count($array) === $limit) {
8505
            \array_pop($array);
8506
        }
8507
8508
        return $array;
8509
    }
8510
8511
    /**
8512
     * Check if the string starts with the given substring.
8513
     *
8514
     * EXAMPLE: <code>
8515
     * UTF8::str_starts_with('ΚόσμεMiddleEnd', 'Κόσμε'); // true
8516
     * UTF8::str_starts_with('ΚόσμεMiddleEnd', 'κόσμε'); // false
8517
     * </code>
8518
     *
8519
     * @param string $haystack <p>The string to search in.</p>
8520
     * @param string $needle   <p>The substring to search for.</p>
8521
     *
8522
     * @psalm-pure
8523
     *
8524
     * @return bool
8525
     */
8526 19
    public static function str_starts_with(string $haystack, string $needle): bool
8527
    {
8528 19
        if ($needle === '') {
8529 2
            return true;
8530
        }
8531
8532 19
        if ($haystack === '') {
8533
            return false;
8534
        }
8535
8536 19
        return \strpos($haystack, $needle) === 0;
8537
    }
8538
8539
    /**
8540
     * Returns true if the string begins with any of $substrings, false otherwise.
8541
     *
8542
     * - case-sensitive
8543
     *
8544
     * @param string $str        <p>The input string.</p>
8545
     * @param array  $substrings <p>Substrings to look for.</p>
8546
     *
8547
     * @psalm-pure
8548
     *
8549
     * @return bool
8550
     *              <p>Whether or not $str starts with $substring.</p>
8551
     */
8552 8
    public static function str_starts_with_any(string $str, array $substrings): bool
8553
    {
8554 8
        if ($str === '') {
8555
            return false;
8556
        }
8557
8558 8
        if ($substrings === []) {
8559
            return false;
8560
        }
8561
8562 8
        foreach ($substrings as &$substring) {
8563 8
            if (self::str_starts_with($str, $substring)) {
8564 8
                return true;
8565
            }
8566
        }
8567
8568 6
        return false;
8569
    }
8570
8571
    /**
8572
     * Gets the substring after the first occurrence of a separator.
8573
     *
8574
     * @param string $str       <p>The input string.</p>
8575
     * @param string $separator <p>The string separator.</p>
8576
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
8577
     *
8578
     * @psalm-pure
8579
     *
8580
     * @return string
8581
     */
8582 1
    public static function str_substr_after_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
8583
    {
8584 1
        if ($separator === '' || $str === '') {
8585 1
            return '';
8586
        }
8587
8588 1
        if ($encoding === 'UTF-8') {
8589 1
            $offset = \mb_strpos($str, $separator);
8590 1
            if ($offset === false) {
8591 1
                return '';
8592
            }
8593
8594 1
            return (string) \mb_substr(
8595 1
                $str,
8596 1
                $offset + (int) \mb_strlen($separator)
8597
            );
8598
        }
8599
8600
        $offset = self::strpos($str, $separator, 0, $encoding);
8601
        if ($offset === false) {
8602
            return '';
8603
        }
8604
8605
        return (string) \mb_substr(
8606
            $str,
8607
            $offset + (int) self::strlen($separator, $encoding),
8608
            null,
8609
            $encoding
8610
        );
8611
    }
8612
8613
    /**
8614
     * Gets the substring after the last occurrence of a separator.
8615
     *
8616
     * @param string $str       <p>The input string.</p>
8617
     * @param string $separator <p>The string separator.</p>
8618
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
8619
     *
8620
     * @psalm-pure
8621
     *
8622
     * @return string
8623
     */
8624 1
    public static function str_substr_after_last_separator(
8625
        string $str,
8626
        string $separator,
8627
        string $encoding = 'UTF-8'
8628
    ): string {
8629 1
        if ($separator === '' || $str === '') {
8630 1
            return '';
8631
        }
8632
8633 1
        if ($encoding === 'UTF-8') {
8634 1
            $offset = \mb_strrpos($str, $separator);
8635 1
            if ($offset === false) {
8636 1
                return '';
8637
            }
8638
8639 1
            return (string) \mb_substr(
8640 1
                $str,
8641 1
                $offset + (int) \mb_strlen($separator)
8642
            );
8643
        }
8644
8645
        $offset = self::strrpos($str, $separator, 0, $encoding);
8646
        if ($offset === false) {
8647
            return '';
8648
        }
8649
8650
        return (string) self::substr(
8651
            $str,
8652
            $offset + (int) self::strlen($separator, $encoding),
8653
            null,
8654
            $encoding
8655
        );
8656
    }
8657
8658
    /**
8659
     * Gets the substring before the first occurrence of a separator.
8660
     *
8661
     * @param string $str       <p>The input string.</p>
8662
     * @param string $separator <p>The string separator.</p>
8663
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
8664
     *
8665
     * @psalm-pure
8666
     *
8667
     * @return string
8668
     */
8669 1
    public static function str_substr_before_first_separator(
8670
        string $str,
8671
        string $separator,
8672
        string $encoding = 'UTF-8'
8673
    ): string {
8674 1
        if ($separator === '' || $str === '') {
8675 1
            return '';
8676
        }
8677
8678 1
        if ($encoding === 'UTF-8') {
8679 1
            $offset = \mb_strpos($str, $separator);
8680 1
            if ($offset === false) {
8681 1
                return '';
8682
            }
8683
8684 1
            return (string) \mb_substr(
8685 1
                $str,
8686 1
                0,
8687 1
                $offset
8688
            );
8689
        }
8690
8691
        $offset = self::strpos($str, $separator, 0, $encoding);
8692
        if ($offset === false) {
8693
            return '';
8694
        }
8695
8696
        return (string) self::substr(
8697
            $str,
8698
            0,
8699
            $offset,
8700
            $encoding
8701
        );
8702
    }
8703
8704
    /**
8705
     * Gets the substring before the last occurrence of a separator.
8706
     *
8707
     * @param string $str       <p>The input string.</p>
8708
     * @param string $separator <p>The string separator.</p>
8709
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
8710
     *
8711
     * @psalm-pure
8712
     *
8713
     * @return string
8714
     */
8715 1
    public static function str_substr_before_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
8716
    {
8717 1
        if ($separator === '' || $str === '') {
8718 1
            return '';
8719
        }
8720
8721 1
        if ($encoding === 'UTF-8') {
8722 1
            $offset = \mb_strrpos($str, $separator);
8723 1
            if ($offset === false) {
8724 1
                return '';
8725
            }
8726
8727 1
            return (string) \mb_substr(
8728 1
                $str,
8729 1
                0,
8730 1
                $offset
8731
            );
8732
        }
8733
8734
        $offset = self::strrpos($str, $separator, 0, $encoding);
8735
        if ($offset === false) {
8736
            return '';
8737
        }
8738
8739
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
8740
8741
        return (string) self::substr(
8742
            $str,
8743
            0,
8744
            $offset,
8745
            $encoding
8746
        );
8747
    }
8748
8749
    /**
8750
     * Gets the substring after (or before via "$before_needle") the first occurrence of the "$needle".
8751
     *
8752
     * @param string $str           <p>The input string.</p>
8753
     * @param string $needle        <p>The string to look for.</p>
8754
     * @param bool   $before_needle [optional] <p>Default: false</p>
8755
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
8756
     *
8757
     * @psalm-pure
8758
     *
8759
     * @return string
8760
     */
8761 2
    public static function str_substr_first(
8762
        string $str,
8763
        string $needle,
8764
        bool $before_needle = false,
8765
        string $encoding = 'UTF-8'
8766
    ): string {
8767 2
        if ($str === '' || $needle === '') {
8768 2
            return '';
8769
        }
8770
8771 2
        if ($encoding === 'UTF-8') {
8772 2
            if ($before_needle) {
8773 1
                $part = \mb_strstr(
8774 1
                    $str,
8775 1
                    $needle,
8776 1
                    $before_needle
8777
                );
8778
            } else {
8779 1
                $part = \mb_strstr(
8780 1
                    $str,
8781 2
                    $needle
8782
                );
8783
            }
8784
        } else {
8785
            $part = self::strstr(
8786
                $str,
8787
                $needle,
8788
                $before_needle,
8789
                $encoding
8790
            );
8791
        }
8792
8793 2
        return $part === false ? '' : $part;
8794
    }
8795
8796
    /**
8797
     * Gets the substring after (or before via "$before_needle") the last occurrence of the "$needle".
8798
     *
8799
     * @param string $str           <p>The input string.</p>
8800
     * @param string $needle        <p>The string to look for.</p>
8801
     * @param bool   $before_needle [optional] <p>Default: false</p>
8802
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
8803
     *
8804
     * @psalm-pure
8805
     *
8806
     * @return string
8807
     */
8808 2
    public static function str_substr_last(
8809
        string $str,
8810
        string $needle,
8811
        bool $before_needle = false,
8812
        string $encoding = 'UTF-8'
8813
    ): string {
8814 2
        if ($str === '' || $needle === '') {
8815 2
            return '';
8816
        }
8817
8818 2
        if ($encoding === 'UTF-8') {
8819 2
            if ($before_needle) {
8820 1
                $part = \mb_strrchr(
8821 1
                    $str,
8822 1
                    $needle,
8823 1
                    $before_needle
8824
                );
8825
            } else {
8826 1
                $part = \mb_strrchr(
8827 1
                    $str,
8828 2
                    $needle
8829
                );
8830
            }
8831
        } else {
8832
            $part = self::strrchr(
8833
                $str,
8834
                $needle,
8835
                $before_needle,
8836
                $encoding
8837
            );
8838
        }
8839
8840 2
        return $part === false ? '' : $part;
8841
    }
8842
8843
    /**
8844
     * Surrounds $str with the given substring.
8845
     *
8846
     * @param string $str
8847
     * @param string $substring <p>The substring to add to both sides.</p>
8848
     *
8849
     * @psalm-pure
8850
     *
8851
     * @return string
8852
     *                <p>A string with the substring both prepended and appended.</p>
8853
     */
8854 5
    public static function str_surround(string $str, string $substring): string
8855
    {
8856 5
        return $substring . $str . $substring;
8857
    }
8858
8859
    /**
8860
     * Returns a trimmed string with the first letter of each word capitalized.
8861
     * Also accepts an array, $ignore, allowing you to list words not to be
8862
     * capitalized.
8863
     *
8864
     * @param string              $str
8865
     * @param array|string[]|null $ignore                        [optional] <p>An array of words not to capitalize or
8866
     *                                                           null. Default: null</p>
8867
     * @param string              $encoding                      [optional] <p>Default: 'UTF-8'</p>
8868
     * @param bool                $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the
8869
     *                                                           string.</p>
8870
     * @param string|null         $lang                          [optional] <p>Set the language for special cases: az,
8871
     *                                                           el, lt, tr</p>
8872
     * @param bool                $try_to_keep_the_string_length [optional] <p>true === try to keep the string length:
8873
     *                                                           e.g. ẞ -> ß</p>
8874
     * @param bool                $use_trim_first                [optional] <p>true === trim the input string,
8875
     *                                                           first</p>
8876
     * @param string|null         $word_define_chars             [optional] <p>An string of chars that will be used as
8877
     *                                                           whitespace separator === words.</p>
8878
     *
8879
     * @psalm-pure
8880
     *
8881
     * @return string
8882
     *                <p>The titleized string.</p>
8883
     *
8884
     * @noinspection PhpTooManyParametersInspection
8885
     */
8886 10
    public static function str_titleize(
8887
        string $str,
8888
        array $ignore = null,
8889
        string $encoding = 'UTF-8',
8890
        bool $clean_utf8 = false,
8891
        string $lang = null,
8892
        bool $try_to_keep_the_string_length = false,
8893
        bool $use_trim_first = true,
8894
        string $word_define_chars = null
8895
    ): string {
8896 10
        if ($str === '') {
8897
            return '';
8898
        }
8899
8900 10
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8901 9
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8902
        }
8903
8904 10
        if ($use_trim_first) {
8905 10
            $str = \trim($str);
8906
        }
8907
8908 10
        if ($clean_utf8) {
8909
            $str = self::clean($str);
8910
        }
8911
8912 10
        $use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
8913
8914 10
        if ($word_define_chars) {
8915 4
            $word_define_chars = \preg_quote($word_define_chars, '/');
8916
        } else {
8917 6
            $word_define_chars = '';
8918
        }
8919
8920 10
        $str = (string) \preg_replace_callback(
8921 10
            '/([^\\s' . $word_define_chars . ']+)/u',
8922
            static function (array $match) use ($try_to_keep_the_string_length, $lang, $ignore, $use_mb_functions, $encoding): string {
8923 10
                if ($ignore !== null && \in_array($match[0], $ignore, true)) {
8924 4
                    return $match[0];
8925
                }
8926
8927 10
                if ($use_mb_functions) {
8928 10
                    if ($encoding === 'UTF-8') {
8929 10
                        return \mb_strtoupper(\mb_substr($match[0], 0, 1))
8930 10
                               . \mb_strtolower(\mb_substr($match[0], 1));
8931
                    }
8932
8933
                    return \mb_strtoupper(\mb_substr($match[0], 0, 1, $encoding), $encoding)
8934
                           . \mb_strtolower(\mb_substr($match[0], 1, null, $encoding), $encoding);
8935
                }
8936
8937
                return self::ucfirst(
8938
                    self::strtolower(
8939
                        $match[0],
8940
                        $encoding,
8941
                        false,
8942
                        $lang,
8943
                        $try_to_keep_the_string_length
8944
                    ),
8945
                    $encoding,
8946
                    false,
8947
                    $lang,
8948
                    $try_to_keep_the_string_length
8949
                );
8950 10
            },
8951 10
            $str
8952
        );
8953
8954 10
        return $str;
8955
    }
8956
8957
    /**
8958
     * Returns a trimmed string in proper title case.
8959
     *
8960
     * Also accepts an array, $ignore, allowing you to list words not to be
8961
     * capitalized.
8962
     *
8963
     * Adapted from John Gruber's script.
8964
     *
8965
     * @see https://gist.github.com/gruber/9f9e8650d68b13ce4d78
8966
     *
8967
     * @param string $str
8968
     * @param array  $ignore   <p>An array of words not to capitalize.</p>
8969
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8970
     *
8971
     * @psalm-pure
8972
     *
8973
     * @return string
8974
     *                <p>The titleized string.</p>
8975
     */
8976 35
    public static function str_titleize_for_humans(
8977
        string $str,
8978
        array $ignore = [],
8979
        string $encoding = 'UTF-8'
8980
    ): string {
8981 35
        if ($str === '') {
8982
            return '';
8983
        }
8984
8985
        $small_words = [
8986 35
            '(?<!q&)a',
8987
            'an',
8988
            'and',
8989
            'as',
8990
            'at(?!&t)',
8991
            'but',
8992
            'by',
8993
            'en',
8994
            'for',
8995
            'if',
8996
            'in',
8997
            'of',
8998
            'on',
8999
            'or',
9000
            'the',
9001
            'to',
9002
            'v[.]?',
9003
            'via',
9004
            'vs[.]?',
9005
        ];
9006
9007 35
        if ($ignore !== []) {
9008 1
            $small_words = \array_merge($small_words, $ignore);
9009
        }
9010
9011 35
        $small_words_rx = \implode('|', $small_words);
9012 35
        $apostrophe_rx = '(?x: [\'’] [[:lower:]]* )?';
9013
9014 35
        $str = \trim($str);
9015
9016 35
        if (!self::has_lowercase($str)) {
9017 2
            $str = self::strtolower($str, $encoding);
9018
        }
9019
9020
        // the main substitutions
9021
        /** @noinspection RegExpDuplicateAlternationBranch - false-positive - https://youtrack.jetbrains.com/issue/WI-51002 */
9022 35
        $str = (string) \preg_replace_callback(
9023
            '~\\b (_*) (?:                                                           # 1. Leading underscore and
9024
                        ( (?<=[ ][/\\\\]) [[:alpha:]]+ [-_[:alpha:]/\\\\]+ |                # 2. file path or 
9025 35
                          [-_[:alpha:]]+ [@.:] [-_[:alpha:]@.:/]+ ' . $apostrophe_rx . ' )  #    URL, domain, or email
9026
                        |
9027 35
                        ( (?i: ' . $small_words_rx . ' ) ' . $apostrophe_rx . ' )           # 3. or small word (case-insensitive)
9028
                        |
9029 35
                        ( [[:alpha:]] [[:lower:]\'’()\[\]{}]* ' . $apostrophe_rx . ' )     # 4. or word w/o internal caps
9030
                        |
9031 35
                        ( [[:alpha:]] [[:alpha:]\'’()\[\]{}]* ' . $apostrophe_rx . ' )     # 5. or some other word
9032
                      ) (_*) \\b                                                          # 6. With trailing underscore
9033
                    ~ux',
9034
            /**
9035
             * @param string[] $matches
9036
             *
9037
             * @psalm-pure
9038
             *
9039
             * @return string
9040
             */
9041
            static function (array $matches) use ($encoding): string {
9042
                // preserve leading underscore
9043 35
                $str = $matches[1];
9044 35
                if ($matches[2]) {
9045
                    // preserve URLs, domains, emails and file paths
9046 5
                    $str .= $matches[2];
9047 35
                } elseif ($matches[3]) {
9048
                    // lower-case small words
9049 25
                    $str .= self::strtolower($matches[3], $encoding);
9050 35
                } elseif ($matches[4]) {
9051
                    // capitalize word w/o internal caps
9052 34
                    $str .= static::ucfirst($matches[4], $encoding);
9053
                } else {
9054
                    // preserve other kinds of word (iPhone)
9055 7
                    $str .= $matches[5];
9056
                }
9057
                // preserve trailing underscore
9058 35
                $str .= $matches[6];
9059
9060 35
                return $str;
9061 35
            },
9062 35
            $str
9063
        );
9064
9065
        // Exceptions for small words: capitalize at start of title...
9066 35
        $str = (string) \preg_replace_callback(
9067
            '~(  \\A [[:punct:]]*            # start of title...
9068
                      |  [:.;?!][ ]+                # or of subsentence...
9069
                      |  [ ][\'"“‘(\[][ ]* )        # or of inserted subphrase...
9070 35
                      ( ' . $small_words_rx . ' ) \\b # ...followed by small word
9071
                     ~uxi',
9072
            /**
9073
             * @param string[] $matches
9074
             *
9075
             * @psalm-pure
9076
             *
9077
             * @return string
9078
             */
9079
            static function (array $matches) use ($encoding): string {
9080 11
                return $matches[1] . static::ucfirst($matches[2], $encoding);
9081 35
            },
9082 35
            $str
9083
        );
9084
9085
        // ...and end of title
9086 35
        $str = (string) \preg_replace_callback(
9087 35
            '~\\b ( ' . $small_words_rx . ' ) # small word...
9088
                      (?= [[:punct:]]* \Z          # ...at the end of the title...
9089
                      |   [\'"’”)\]] [ ] )         # ...or of an inserted subphrase?
9090
                     ~uxi',
9091
            /**
9092
             * @param string[] $matches
9093
             *
9094
             * @psalm-pure
9095
             *
9096
             * @return string
9097
             */
9098
            static function (array $matches) use ($encoding): string {
9099 3
                return static::ucfirst($matches[1], $encoding);
9100 35
            },
9101 35
            $str
9102
        );
9103
9104
        // Exceptions for small words in hyphenated compound words.
9105
        // e.g. "in-flight" -> In-Flight
9106 35
        $str = (string) \preg_replace_callback(
9107
            '~\\b
9108
                        (?<! -)                   # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (in-flight)
9109 35
                        ( ' . $small_words_rx . ' )
9110
                        (?= -[[:alpha:]]+)        # lookahead for "-someword"
9111
                       ~uxi',
9112
            /**
9113
             * @param string[] $matches
9114
             *
9115
             * @psalm-pure
9116
             *
9117
             * @return string
9118
             */
9119
            static function (array $matches) use ($encoding): string {
9120
                return static::ucfirst($matches[1], $encoding);
9121 35
            },
9122 35
            $str
9123
        );
9124
9125
        // e.g. "Stand-in" -> "Stand-In" (Stand is already capped at this point)
9126 35
        $str = (string) \preg_replace_callback(
9127
            '~\\b
9128
                      (?<!…)                    # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (stand-in)
9129
                      ( [[:alpha:]]+- )         # $1 = first word and hyphen, should already be properly capped
9130 35
                      ( ' . $small_words_rx . ' ) # ...followed by small word
9131
                      (?!	- )                 # Negative lookahead for another -
9132
                     ~uxi',
9133
            /**
9134
             * @param string[] $matches
9135
             *
9136
             * @psalm-pure
9137
             *
9138
             * @return string
9139
             */
9140
            static function (array $matches) use ($encoding): string {
9141
                return $matches[1] . static::ucfirst($matches[2], $encoding);
9142 35
            },
9143 35
            $str
9144
        );
9145
9146 35
        return $str;
9147
    }
9148
9149
    /**
9150
     * Get a binary representation of a specific string.
9151
     *
9152
     * EXAPLE: <code>UTF8::str_to_binary('😃'); // '11110000100111111001100010000011'</code>
9153
     *
9154
     * @param string $str <p>The input string.</p>
9155
     *
9156
     * @psalm-pure
9157
     *
9158
     * @return false|string
9159
     *                      <p>false on error</p>
9160
     */
9161 2
    public static function str_to_binary(string $str)
9162
    {
9163
        /** @var array|false $value - needed for PhpStan (stubs error) */
9164 2
        $value = \unpack('H*', $str);
9165 2
        if ($value === false) {
9166
            return false;
9167
        }
9168
9169
        /** @noinspection OffsetOperationsInspection */
9170 2
        return \base_convert($value[1], 16, 2);
9171
    }
9172
9173
    /**
9174
     * @param string   $str
9175
     * @param bool     $remove_empty_values <p>Remove empty values.</p>
9176
     * @param int|null $remove_short_values <p>The min. string length or null to disable</p>
9177
     *
9178
     * @psalm-pure
9179
     *
9180
     * @return string[]
9181
     */
9182 17
    public static function str_to_lines(string $str, bool $remove_empty_values = false, int $remove_short_values = null): array
9183
    {
9184 17
        if ($str === '') {
9185 1
            return $remove_empty_values ? [] : [''];
9186
        }
9187
9188 16
        if (self::$SUPPORT['mbstring'] === true) {
9189
            /** @noinspection PhpComposerExtensionStubsInspection */
9190 16
            $return = \mb_split("[\r\n]{1,2}", $str);
9191
        } else {
9192
            $return = \preg_split("/[\r\n]{1,2}/u", $str);
9193
        }
9194
9195 16
        if ($return === false) {
9196
            return $remove_empty_values ? [] : [''];
9197
        }
9198
9199
        if (
9200 16
            $remove_short_values === null
9201
            &&
9202 16
            !$remove_empty_values
9203
        ) {
9204 16
            return $return;
9205
        }
9206
9207
        return self::reduce_string_array(
9208
            $return,
9209
            $remove_empty_values,
9210
            $remove_short_values
9211
        );
9212
    }
9213
9214
    /**
9215
     * Convert a string into an array of words.
9216
     *
9217
     * EXAMPLE: <code>UTF8::str_to_words('中文空白 oöäü#s', '#') // array('', '中文空白', ' ', 'oöäü#s', '')</code>
9218
     *
9219
     * @param string   $str
9220
     * @param string   $char_list           <p>Additional chars for the definition of "words".</p>
9221
     * @param bool     $remove_empty_values <p>Remove empty values.</p>
9222
     * @param int|null $remove_short_values <p>The min. string length or null to disable</p>
9223
     *
9224
     * @psalm-pure
9225
     *
9226
     * @return string[]
9227
     */
9228 13
    public static function str_to_words(
9229
        string $str,
9230
        string $char_list = '',
9231
        bool $remove_empty_values = false,
9232
        int $remove_short_values = null
9233
    ): array {
9234 13
        if ($str === '') {
9235 4
            return $remove_empty_values ? [] : [''];
9236
        }
9237
9238 13
        $char_list = self::rxClass($char_list, '\pL');
9239
9240 13
        $return = \preg_split("/({$char_list}+(?:[\p{Pd}’']{$char_list}+)*)/u", $str, -1, \PREG_SPLIT_DELIM_CAPTURE);
9241 13
        if ($return === false) {
9242
            return $remove_empty_values ? [] : [''];
9243
        }
9244
9245
        if (
9246 13
            $remove_short_values === null
9247
            &&
9248 13
            !$remove_empty_values
9249
        ) {
9250 13
            return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
9251
        }
9252
9253 2
        $tmp_return = self::reduce_string_array(
9254 2
            $return,
9255 2
            $remove_empty_values,
9256 2
            $remove_short_values
9257
        );
9258
9259 2
        foreach ($tmp_return as &$item) {
9260 2
            $item = (string) $item;
9261
        }
9262
9263 2
        return $tmp_return;
9264
    }
9265
9266
    /**
9267
     * alias for "UTF8::to_ascii()"
9268
     *
9269
     * @param string $str
9270
     * @param string $unknown
9271
     * @param bool   $strict
9272
     *
9273
     * @psalm-pure
9274
     *
9275
     * @return string
9276
     *
9277
     * @see        UTF8::to_ascii()
9278
     * @deprecated <p>please use "UTF8::to_ascii()"</p>
9279
     */
9280 7
    public static function str_transliterate(
9281
        string $str,
9282
        string $unknown = '?',
9283
        bool $strict = false
9284
    ): string {
9285 7
        return self::to_ascii($str, $unknown, $strict);
9286
    }
9287
9288
    /**
9289
     * Truncates the string to a given length. If $substring is provided, and
9290
     * truncating occurs, the string is further truncated so that the substring
9291
     * may be appended without exceeding the desired length.
9292
     *
9293
     * @param string $str
9294
     * @param int    $length    <p>Desired length of the truncated string.</p>
9295
     * @param string $substring [optional] <p>The substring to append if it can fit. Default: ''</p>
9296
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
9297
     *
9298
     * @psalm-pure
9299
     *
9300
     * @return string
9301
     *                <p>A string after truncating.</p>
9302
     */
9303 22
    public static function str_truncate(
9304
        string $str,
9305
        int $length,
9306
        string $substring = '',
9307
        string $encoding = 'UTF-8'
9308
    ): string {
9309 22
        if ($str === '') {
9310
            return '';
9311
        }
9312
9313 22
        if ($encoding === 'UTF-8') {
9314 10
            if ($length >= (int) \mb_strlen($str)) {
9315 2
                return $str;
9316
            }
9317
9318 8
            if ($substring !== '') {
9319 4
                $length -= (int) \mb_strlen($substring);
9320
9321
                /** @noinspection UnnecessaryCastingInspection */
9322 4
                return (string) \mb_substr($str, 0, $length) . $substring;
9323
            }
9324
9325
            /** @noinspection UnnecessaryCastingInspection */
9326 4
            return (string) \mb_substr($str, 0, $length);
9327
        }
9328
9329 12
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
9330
9331 12
        if ($length >= (int) self::strlen($str, $encoding)) {
9332 2
            return $str;
9333
        }
9334
9335 10
        if ($substring !== '') {
9336 6
            $length -= (int) self::strlen($substring, $encoding);
9337
        }
9338
9339
        return (
9340 10
               (string) self::substr(
9341 10
                   $str,
9342 10
                   0,
9343 10
                   $length,
9344 10
                   $encoding
9345
               )
9346 10
               ) . $substring;
9347
    }
9348
9349
    /**
9350
     * Truncates the string to a given length, while ensuring that it does not
9351
     * split words. If $substring is provided, and truncating occurs, the
9352
     * string is further truncated so that the substring may be appended without
9353
     * exceeding the desired length.
9354
     *
9355
     * @param string $str
9356
     * @param int    $length                                 <p>Desired length of the truncated string.</p>
9357
     * @param string $substring                              [optional] <p>The substring to append if it can fit.
9358
     *                                                       Default:
9359
     *                                                       ''</p>
9360
     * @param string $encoding                               [optional] <p>Default: 'UTF-8'</p>
9361
     * @param bool   $ignore_do_not_split_words_for_one_word [optional] <p>Default: false</p>
9362
     *
9363
     * @psalm-pure
9364
     *
9365
     * @return string
9366
     *                <p>A string after truncating.</p>
9367
     */
9368 47
    public static function str_truncate_safe(
9369
        string $str,
9370
        int $length,
9371
        string $substring = '',
9372
        string $encoding = 'UTF-8',
9373
        bool $ignore_do_not_split_words_for_one_word = false
9374
    ): string {
9375 47
        if ($str === '' || $length <= 0) {
9376 1
            return $substring;
9377
        }
9378
9379 47
        if ($encoding === 'UTF-8') {
9380 21
            if ($length >= (int) \mb_strlen($str)) {
9381 5
                return $str;
9382
            }
9383
9384
            // need to further trim the string so we can append the substring
9385 17
            $length -= (int) \mb_strlen($substring);
9386 17
            if ($length <= 0) {
9387 1
                return $substring;
9388
            }
9389
9390
            /** @var false|string $truncated - needed for PhpStan (stubs error) */
9391 17
            $truncated = \mb_substr($str, 0, $length);
9392 17
            if ($truncated === false) {
9393
                return '';
9394
            }
9395
9396
            // if the last word was truncated
9397 17
            $space_position = \mb_strpos($str, ' ', $length - 1);
9398 17
            if ($space_position !== $length) {
9399
                // find pos of the last occurrence of a space, get up to that
9400 13
                $last_position = \mb_strrpos($truncated, ' ', 0);
9401
9402
                if (
9403 13
                    $last_position !== false
9404
                    ||
9405
                    (
9406 3
                        $space_position !== false
9407
                        &&
9408 13
                         !$ignore_do_not_split_words_for_one_word
9409
                    )
9410
                ) {
9411 17
                    $truncated = (string) \mb_substr($truncated, 0, (int) $last_position);
9412
                }
9413
            }
9414
        } else {
9415 26
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9416
9417 26
            if ($length >= (int) self::strlen($str, $encoding)) {
9418 4
                return $str;
9419
            }
9420
9421
            // need to further trim the string so we can append the substring
9422 22
            $length -= (int) self::strlen($substring, $encoding);
9423 22
            if ($length <= 0) {
9424
                return $substring;
9425
            }
9426
9427 22
            $truncated = self::substr($str, 0, $length, $encoding);
9428
9429 22
            if ($truncated === false) {
9430
                return '';
9431
            }
9432
9433
            // if the last word was truncated
9434 22
            $space_position = self::strpos($str, ' ', $length - 1, $encoding);
9435 22
            if ($space_position !== $length) {
9436
                // find pos of the last occurrence of a space, get up to that
9437 12
                $last_position = self::strrpos($truncated, ' ', 0, $encoding);
9438
9439
                if (
9440 12
                    $last_position !== false
9441
                    ||
9442
                    (
9443 4
                        $space_position !== false
9444
                        &&
9445 12
                        !$ignore_do_not_split_words_for_one_word
9446
                    )
9447
                ) {
9448 9
                    $truncated = (string) self::substr($truncated, 0, (int) $last_position, $encoding);
9449
                }
9450
            }
9451
        }
9452
9453 39
        return $truncated . $substring;
9454
    }
9455
9456
    /**
9457
     * Returns a lowercase and trimmed string separated by underscores.
9458
     * Underscores are inserted before uppercase characters (with the exception
9459
     * of the first character of the string), and in place of spaces as well as
9460
     * dashes.
9461
     *
9462
     * @param string $str
9463
     *
9464
     * @psalm-pure
9465
     *
9466
     * @return string
9467
     *                <p>The underscored string.</p>
9468
     */
9469 16
    public static function str_underscored(string $str): string
9470
    {
9471 16
        return self::str_delimit($str, '_');
9472
    }
9473
9474
    /**
9475
     * Returns an UpperCamelCase version of the supplied string. It trims
9476
     * surrounding spaces, capitalizes letters following digits, spaces, dashes
9477
     * and underscores, and removes spaces, dashes, underscores.
9478
     *
9479
     * @param string      $str                           <p>The input string.</p>
9480
     * @param string      $encoding                      [optional] <p>Default: 'UTF-8'</p>
9481
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
9482
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
9483
     *                                                   tr</p>
9484
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
9485
     *                                                   -> ß</p>
9486
     *
9487
     * @psalm-pure
9488
     *
9489
     * @return string
9490
     *                <p>A string in UpperCamelCase.</p>
9491
     */
9492 13
    public static function str_upper_camelize(
9493
        string $str,
9494
        string $encoding = 'UTF-8',
9495
        bool $clean_utf8 = false,
9496
        string $lang = null,
9497
        bool $try_to_keep_the_string_length = false
9498
    ): string {
9499 13
        return self::ucfirst(self::str_camelize($str, $encoding), $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
9500
    }
9501
9502
    /**
9503
     * alias for "UTF8::ucfirst()"
9504
     *
9505
     * @param string      $str
9506
     * @param string      $encoding
9507
     * @param bool        $clean_utf8
9508
     * @param string|null $lang
9509
     * @param bool        $try_to_keep_the_string_length
9510
     *
9511
     * @psalm-pure
9512
     *
9513
     * @return string
9514
     *
9515
     * @see        UTF8::ucfirst()
9516
     * @deprecated <p>please use "UTF8::ucfirst()"</p>
9517
     */
9518 5
    public static function str_upper_first(
9519
        string $str,
9520
        string $encoding = 'UTF-8',
9521
        bool $clean_utf8 = false,
9522
        string $lang = null,
9523
        bool $try_to_keep_the_string_length = false
9524
    ): string {
9525 5
        return self::ucfirst(
9526 5
            $str,
9527 5
            $encoding,
9528 5
            $clean_utf8,
9529 5
            $lang,
9530 5
            $try_to_keep_the_string_length
9531
        );
9532
    }
9533
9534
    /**
9535
     * Get the number of words in a specific string.
9536
     *
9537
     * EXAMPLES: <code>
9538
     * // format: 0 -> return only word count (int)
9539
     * //
9540
     * UTF8::str_word_count('中文空白 öäü abc#c'); // 4
9541
     * UTF8::str_word_count('中文空白 öäü abc#c', 0, '#'); // 3
9542
     *
9543
     * // format: 1 -> return words (array)
9544
     * //
9545
     * UTF8::str_word_count('中文空白 öäü abc#c', 1); // array('中文空白', 'öäü', 'abc', 'c')
9546
     * UTF8::str_word_count('中文空白 öäü abc#c', 1, '#'); // array('中文空白', 'öäü', 'abc#c')
9547
     *
9548
     * // format: 2 -> return words with offset (array)
9549
     * //
9550
     * UTF8::str_word_count('中文空白 öäü ab#c', 2); // array(0 => '中文空白', 5 => 'öäü', 9 => 'abc', 13 => 'c')
9551
     * UTF8::str_word_count('中文空白 öäü ab#c', 2, '#'); // array(0 => '中文空白', 5 => 'öäü', 9 => 'abc#c')
9552
     * </code>
9553
     *
9554
     * @param string $str       <p>The input string.</p>
9555
     * @param int    $format    [optional] <p>
9556
     *                          <strong>0</strong> => return a number of words (default)<br>
9557
     *                          <strong>1</strong> => return an array of words<br>
9558
     *                          <strong>2</strong> => return an array of words with word-offset as key
9559
     *                          </p>
9560
     * @param string $char_list [optional] <p>Additional chars that contains to words and do not start a new word.</p>
9561
     *
9562
     * @psalm-pure
9563
     *
9564
     * @return int|string[]
9565
     *                      <p>The number of words in the string.</p>
9566
     */
9567 2
    public static function str_word_count(string $str, int $format = 0, string $char_list = '')
9568
    {
9569 2
        $str_parts = self::str_to_words($str, $char_list);
9570
9571 2
        $len = \count($str_parts);
9572
9573 2
        if ($format === 1) {
9574 2
            $number_of_words = [];
9575 2
            for ($i = 1; $i < $len; $i += 2) {
9576 2
                $number_of_words[] = $str_parts[$i];
9577
            }
9578 2
        } elseif ($format === 2) {
9579 2
            $number_of_words = [];
9580 2
            $offset = (int) self::strlen($str_parts[0]);
9581 2
            for ($i = 1; $i < $len; $i += 2) {
9582 2
                $number_of_words[$offset] = $str_parts[$i];
9583 2
                $offset += (int) self::strlen($str_parts[$i]) + (int) self::strlen($str_parts[$i + 1]);
9584
            }
9585
        } else {
9586 2
            $number_of_words = (int) (($len - 1) / 2);
9587
        }
9588
9589 2
        return $number_of_words;
9590
    }
9591
9592
    /**
9593
     * Case-insensitive string comparison.
9594
     *
9595
     * INFO: Case-insensitive version of UTF8::strcmp()
9596
     *
9597
     * EXAMPLE: <code>UTF8::strcasecmp("iñtërnâtiôn\nàlizætiøn", "Iñtërnâtiôn\nàlizætiøn"); // 0</code>
9598
     *
9599
     * @param string $str1     <p>The first string.</p>
9600
     * @param string $str2     <p>The second string.</p>
9601
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
9602
     *
9603
     * @psalm-pure
9604
     *
9605
     * @return int
9606
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
9607
     *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
9608
     *             <strong>0</strong> if they are equal
9609
     */
9610 23
    public static function strcasecmp(
9611
        string $str1,
9612
        string $str2,
9613
        string $encoding = 'UTF-8'
9614
    ): int {
9615 23
        return self::strcmp(
9616 23
            self::strtocasefold(
9617 23
                $str1,
9618 23
                true,
9619 23
                false,
9620 23
                $encoding,
9621 23
                null,
9622 23
                false
9623
            ),
9624 23
            self::strtocasefold(
9625 23
                $str2,
9626 23
                true,
9627 23
                false,
9628 23
                $encoding,
9629 23
                null,
9630 23
                false
9631
            )
9632
        );
9633
    }
9634
9635
    /**
9636
     * alias for "UTF8::strstr()"
9637
     *
9638
     * @param string $haystack
9639
     * @param string $needle
9640
     * @param bool   $before_needle
9641
     * @param string $encoding
9642
     * @param bool   $clean_utf8
9643
     *
9644
     * @psalm-pure
9645
     *
9646
     * @return false|string
9647
     *
9648
     * @see        UTF8::strstr()
9649
     * @deprecated <p>please use "UTF8::strstr()"</p>
9650
     */
9651 2
    public static function strchr(
9652
        string $haystack,
9653
        string $needle,
9654
        bool $before_needle = false,
9655
        string $encoding = 'UTF-8',
9656
        bool $clean_utf8 = false
9657
    ) {
9658 2
        return self::strstr(
9659 2
            $haystack,
9660 2
            $needle,
9661 2
            $before_needle,
9662 2
            $encoding,
9663 2
            $clean_utf8
9664
        );
9665
    }
9666
9667
    /**
9668
     * Case-sensitive string comparison.
9669
     *
9670
     * EXAMPLE: <code>UTF8::strcmp("iñtërnâtiôn\nàlizætiøn", "iñtërnâtiôn\nàlizætiøn"); // 0</code>
9671
     *
9672
     * @param string $str1 <p>The first string.</p>
9673
     * @param string $str2 <p>The second string.</p>
9674
     *
9675
     * @psalm-pure
9676
     *
9677
     * @return int
9678
     *             <strong>&lt; 0</strong> if str1 is less than str2<br>
9679
     *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
9680
     *             <strong>0</strong> if they are equal
9681
     */
9682 29
    public static function strcmp(string $str1, string $str2): int
9683
    {
9684 29
        if ($str1 === $str2) {
9685 21
            return 0;
9686
        }
9687
9688 24
        return \strcmp(
9689 24
            \Normalizer::normalize($str1, \Normalizer::NFD),
9690 24
            \Normalizer::normalize($str2, \Normalizer::NFD)
9691
        );
9692
    }
9693
9694
    /**
9695
     * Find length of initial segment not matching mask.
9696
     *
9697
     * @param string   $str
9698
     * @param string   $char_list
9699
     * @param int      $offset
9700
     * @param int|null $length
9701
     * @param string   $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
9702
     *
9703
     * @psalm-pure
9704
     *
9705
     * @return int
9706
     */
9707 12
    public static function strcspn(
9708
        string $str,
9709
        string $char_list,
9710
        int $offset = 0,
9711
        int $length = null,
9712
        string $encoding = 'UTF-8'
9713
    ): int {
9714 12
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9715
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9716
        }
9717
9718 12
        if ($char_list === '') {
9719 2
            return (int) self::strlen($str, $encoding);
9720
        }
9721
9722 11
        if ($offset || $length !== null) {
9723 3
            if ($encoding === 'UTF-8') {
9724 3
                if ($length === null) {
9725 2
                    $str_tmp = \mb_substr($str, $offset);
9726
                } else {
9727 3
                    $str_tmp = \mb_substr($str, $offset, $length);
9728
                }
9729
            } else {
9730
                $str_tmp = self::substr($str, $offset, $length, $encoding);
9731
            }
9732
9733 3
            if ($str_tmp === false) {
9734
                return 0;
9735
            }
9736
9737
            /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
9738 3
            $str = $str_tmp;
9739
        }
9740
9741 11
        if ($str === '') {
9742 2
            return 0;
9743
        }
9744
9745 10
        $matches = [];
9746 10
        if (\preg_match('/^(.*?)' . self::rxClass($char_list) . '/us', $str, $matches)) {
9747 9
            $return = self::strlen($matches[1], $encoding);
9748 9
            if ($return === false) {
9749
                return 0;
9750
            }
9751
9752 9
            return $return;
9753
        }
9754
9755 2
        return (int) self::strlen($str, $encoding);
9756
    }
9757
9758
    /**
9759
     * alias for "UTF8::stristr()"
9760
     *
9761
     * @param string $haystack
9762
     * @param string $needle
9763
     * @param bool   $before_needle
9764
     * @param string $encoding
9765
     * @param bool   $clean_utf8
9766
     *
9767
     * @psalm-pure
9768
     *
9769
     * @return false|string
9770
     *
9771
     * @see        UTF8::stristr()
9772
     * @deprecated <p>please use "UTF8::stristr()"</p>
9773
     */
9774 1
    public static function strichr(
9775
        string $haystack,
9776
        string $needle,
9777
        bool $before_needle = false,
9778
        string $encoding = 'UTF-8',
9779
        bool $clean_utf8 = false
9780
    ) {
9781 1
        return self::stristr(
9782 1
            $haystack,
9783 1
            $needle,
9784 1
            $before_needle,
9785 1
            $encoding,
9786 1
            $clean_utf8
9787
        );
9788
    }
9789
9790
    /**
9791
     * Create a UTF-8 string from code points.
9792
     *
9793
     * INFO: opposite to UTF8::codepoints()
9794
     *
9795
     * EXAMPLE: <code>UTF8::string(array(246, 228, 252)); // 'öäü'</code>
9796
     *
9797
     * @param int|int[]|string|string[] $intOrHex <p>Integer or Hexadecimal codepoints.</p>
9798
     *
9799
     * @psalm-param int[]|numeric-string[]|int|numeric-string $intOrHex
9800
     *
9801
     * @psalm-pure
9802
     *
9803
     * @return string
9804
     *                <p>A UTF-8 encoded string.</p>
9805
     */
9806 4
    public static function string($intOrHex): string
9807
    {
9808 4
        if ($intOrHex === []) {
9809 4
            return '';
9810
        }
9811
9812 4
        if (!\is_array($intOrHex)) {
9813 1
            $intOrHex = [$intOrHex];
9814
        }
9815
9816 4
        $str = '';
9817 4
        foreach ($intOrHex as $strPart) {
9818 4
            $str .= '&#' . (int) $strPart . ';';
9819
        }
9820
9821 4
        return self::html_entity_decode($str, \ENT_QUOTES | \ENT_HTML5);
9822
    }
9823
9824
    /**
9825
     * Checks if string starts with "BOM" (Byte Order Mark Character) character.
9826
     *
9827
     * EXAMPLE: <code>UTF8::string_has_bom("\xef\xbb\xbf foobar"); // true</code>
9828
     *
9829
     * @param string $str <p>The input string.</p>
9830
     *
9831
     * @psalm-pure
9832
     *
9833
     * @return bool
9834
     *              <strong>true</strong> if the string has BOM at the start,<br>
9835
     *              <strong>false</strong> otherwise
9836
     */
9837 6
    public static function string_has_bom(string $str): bool
9838
    {
9839
        /** @noinspection PhpUnusedLocalVariableInspection */
9840 6
        foreach (self::$BOM as $bom_string => &$bom_byte_length) {
9841 6
            if (\strpos($str, $bom_string) === 0) {
9842 6
                return true;
9843
            }
9844
        }
9845
9846 6
        return false;
9847
    }
9848
9849
    /**
9850
     * Strip HTML and PHP tags from a string + clean invalid UTF-8.
9851
     *
9852
     * EXAMPLE: <code>UTF8::strip_tags("<span>κόσμε\xa0\xa1</span>"); // 'κόσμε'</code>
9853
     *
9854
     * @see http://php.net/manual/en/function.strip-tags.php
9855
     *
9856
     * @param string      $str            <p>
9857
     *                                    The input string.
9858
     *                                    </p>
9859
     * @param string|null $allowable_tags [optional] <p>
9860
     *                                    You can use the optional second parameter to specify tags which should
9861
     *                                    not be stripped.
9862
     *                                    </p>
9863
     *                                    <p>
9864
     *                                    HTML comments and PHP tags are also stripped. This is hardcoded and
9865
     *                                    can not be changed with allowable_tags.
9866
     *                                    </p>
9867
     * @param bool        $clean_utf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
9868
     *
9869
     * @psalm-pure
9870
     *
9871
     * @return string
9872
     *                <p>The stripped string.</p>
9873
     */
9874 4
    public static function strip_tags(
9875
        string $str,
9876
        string $allowable_tags = null,
9877
        bool $clean_utf8 = false
9878
    ): string {
9879 4
        if ($str === '') {
9880 1
            return '';
9881
        }
9882
9883 4
        if ($clean_utf8) {
9884 2
            $str = self::clean($str);
9885
        }
9886
9887 4
        if ($allowable_tags === null) {
9888 4
            return \strip_tags($str);
9889
        }
9890
9891 2
        return \strip_tags($str, $allowable_tags);
9892
    }
9893
9894
    /**
9895
     * Strip all whitespace characters. This includes tabs and newline
9896
     * characters, as well as multibyte whitespace such as the thin space
9897
     * and ideographic space.
9898
     *
9899
     * EXAMPLE: <code>UTF8::strip_whitespace('   Ο     συγγραφέας  '); // 'Οσυγγραφέας'</code>
9900
     *
9901
     * @param string $str
9902
     *
9903
     * @psalm-pure
9904
     *
9905
     * @return string
9906
     */
9907 36
    public static function strip_whitespace(string $str): string
9908
    {
9909 36
        if ($str === '') {
9910 3
            return '';
9911
        }
9912
9913 33
        return (string) \preg_replace('/[[:space:]]+/u', '', $str);
9914
    }
9915
9916
    /**
9917
     * Find the position of the first occurrence of a substring in a string, case-insensitive.
9918
     *
9919
     * INFO: use UTF8::stripos_in_byte() for the byte-length
9920
     *
9921
     * EXAMPLE: <code>UTF8::stripos('aσσb', 'ΣΣ'); // 1</code> (σσ == ΣΣ)
9922
     *
9923
     * @see http://php.net/manual/en/function.mb-stripos.php
9924
     *
9925
     * @param string $haystack   <p>The string from which to get the position of the first occurrence of needle.</p>
9926
     * @param string $needle     <p>The string to find in haystack.</p>
9927
     * @param int    $offset     [optional] <p>The position in haystack to start searching.</p>
9928
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
9929
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9930
     *
9931
     * @psalm-pure
9932
     *
9933
     * @return false|int
9934
     *                   Return the <strong>(int)</strong> numeric position of the first occurrence of needle in the
9935
     *                   haystack string,<br> or <strong>false</strong> if needle is not found
9936
     */
9937 25
    public static function stripos(
9938
        string $haystack,
9939
        string $needle,
9940
        int $offset = 0,
9941
        string $encoding = 'UTF-8',
9942
        bool $clean_utf8 = false
9943
    ) {
9944 25
        if ($haystack === '' || $needle === '') {
9945 5
            return false;
9946
        }
9947
9948 24
        if ($clean_utf8) {
9949
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9950
            // if invalid characters are found in $haystack before $needle
9951 1
            $haystack = self::clean($haystack);
9952 1
            $needle = self::clean($needle);
9953
        }
9954
9955 24
        if (self::$SUPPORT['mbstring'] === true) {
9956 24
            if ($encoding === 'UTF-8') {
9957 24
                return \mb_stripos($haystack, $needle, $offset);
9958
            }
9959
9960 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9961
9962 2
            return \mb_stripos($haystack, $needle, $offset, $encoding);
9963
        }
9964
9965 2
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
9966
9967
        if (
9968 2
            $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
9969
            &&
9970 2
            $offset >= 0 // grapheme_stripos() can't handle negative offset
9971
            &&
9972 2
            self::$SUPPORT['intl'] === true
9973
        ) {
9974
            $return_tmp = \grapheme_stripos($haystack, $needle, $offset);
9975
            if ($return_tmp !== false) {
9976
                return $return_tmp;
9977
            }
9978
        }
9979
9980
        //
9981
        // fallback for ascii only
9982
        //
9983
9984 2
        if (ASCII::is_ascii($haystack . $needle)) {
9985
            return \stripos($haystack, $needle, $offset);
9986
        }
9987
9988
        //
9989
        // fallback via vanilla php
9990
        //
9991
9992 2
        $haystack = self::strtocasefold($haystack, true, false, $encoding, null, false);
9993 2
        $needle = self::strtocasefold($needle, true, false, $encoding, null, false);
9994
9995 2
        return self::strpos($haystack, $needle, $offset, $encoding);
9996
    }
9997
9998
    /**
9999
     * Returns all of haystack starting from and including the first occurrence of needle to the end.
10000
     *
10001
     * EXAMPLE: <code>
10002
     * $str = 'iñtërnâtiônàlizætiøn';
10003
     * $search = 'NÂT';
10004
     *
10005
     * UTF8::stristr($str, $search)); // 'nâtiônàlizætiøn'
10006
     * UTF8::stristr($str, $search, true)); // 'iñtër'
10007
     * </code>
10008
     *
10009
     * @param string $haystack      <p>The input string. Must be valid UTF-8.</p>
10010
     * @param string $needle        <p>The string to look for. Must be valid UTF-8.</p>
10011
     * @param bool   $before_needle [optional] <p>
10012
     *                              If <b>TRUE</b>, it returns the part of the
10013
     *                              haystack before the first occurrence of the needle (excluding the needle).
10014
     *                              </p>
10015
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
10016
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
10017
     *
10018
     * @psalm-pure
10019
     *
10020
     * @return false|string
10021
     *                      <p>A sub-string,<br>or <strong>false</strong> if needle is not found.</p>
10022
     */
10023 12
    public static function stristr(
10024
        string $haystack,
10025
        string $needle,
10026
        bool $before_needle = false,
10027
        string $encoding = 'UTF-8',
10028
        bool $clean_utf8 = false
10029
    ) {
10030 12
        if ($haystack === '' || $needle === '') {
10031 3
            return false;
10032
        }
10033
10034 9
        if ($clean_utf8) {
10035
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10036
            // if invalid characters are found in $haystack before $needle
10037 1
            $needle = self::clean($needle);
10038 1
            $haystack = self::clean($haystack);
10039
        }
10040
10041 9
        if (!$needle) {
10042
            return $haystack;
10043
        }
10044
10045 9
        if (self::$SUPPORT['mbstring'] === true) {
10046 9
            if ($encoding === 'UTF-8') {
10047 9
                return \mb_stristr($haystack, $needle, $before_needle);
10048
            }
10049
10050 1
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10051
10052 1
            return \mb_stristr($haystack, $needle, $before_needle, $encoding);
10053
        }
10054
10055
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
10056
10057
        if (
10058
            $encoding !== 'UTF-8'
10059
            &&
10060
            self::$SUPPORT['mbstring'] === false
10061
        ) {
10062
            /**
10063
             * @psalm-suppress ImpureFunctionCall - is is only a warning
10064
             */
10065
            \trigger_error('UTF8::stristr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10066
        }
10067
10068
        if (
10069
            $encoding === 'UTF-8' // INFO: "grapheme_stristr()" can't handle other encodings
10070
            &&
10071
            self::$SUPPORT['intl'] === true
10072
        ) {
10073
            $return_tmp = \grapheme_stristr($haystack, $needle, $before_needle);
10074
            if ($return_tmp !== false) {
10075
                return $return_tmp;
10076
            }
10077
        }
10078
10079
        if (ASCII::is_ascii($needle . $haystack)) {
10080
            return \stristr($haystack, $needle, $before_needle);
10081
        }
10082
10083
        \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/usi', $haystack, $match);
10084
10085
        if (!isset($match[1])) {
10086
            return false;
10087
        }
10088
10089
        if ($before_needle) {
10090
            return $match[1];
10091
        }
10092
10093
        return self::substr($haystack, (int) self::strlen($match[1], $encoding), null, $encoding);
10094
    }
10095
10096
    /**
10097
     * Get the string length, not the byte-length!
10098
     *
10099
     * INFO: use UTF8::strwidth() for the char-length
10100
     *
10101
     * EXAMPLE: <code>UTF8::strlen("Iñtërnâtiôn\xE9àlizætiøn")); // 20</code>
10102
     *
10103
     * @see http://php.net/manual/en/function.mb-strlen.php
10104
     *
10105
     * @param string $str        <p>The string being checked for length.</p>
10106
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
10107
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10108
     *
10109
     * @psalm-pure
10110
     *
10111
     * @return false|int
10112
     *                   <p>
10113
     *                   The number <strong>(int)</strong> of characters in the string $str having character encoding
10114
     *                   $encoding.
10115
     *                   (One multi-byte character counted as +1).
10116
     *                   <br>
10117
     *                   Can return <strong>false</strong>, if e.g. mbstring is not installed and we process invalid
10118
     *                   chars.
10119
     *                   </p>
10120
     */
10121 174
    public static function strlen(
10122
        string $str,
10123
        string $encoding = 'UTF-8',
10124
        bool $clean_utf8 = false
10125
    ) {
10126 174
        if ($str === '') {
10127 21
            return 0;
10128
        }
10129
10130 172
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10131 12
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10132
        }
10133
10134 172
        if ($clean_utf8) {
10135
            // "mb_strlen" and "\iconv_strlen" returns wrong length,
10136
            // if invalid characters are found in $str
10137 4
            $str = self::clean($str);
10138
        }
10139
10140
        //
10141
        // fallback via mbstring
10142
        //
10143
10144 172
        if (self::$SUPPORT['mbstring'] === true) {
10145 166
            if ($encoding === 'UTF-8') {
10146
                /** @noinspection PhpUsageOfSilenceOperatorInspection - ignore warnings, it's working anyway */
10147 166
                return @\mb_strlen($str);
10148
            }
10149
10150
            /** @noinspection PhpUsageOfSilenceOperatorInspection - ignore warnings, it's working anyway */
10151 4
            return @\mb_strlen($str, $encoding);
10152
        }
10153
10154
        //
10155
        // fallback for binary || ascii only
10156
        //
10157
10158
        if (
10159 8
            $encoding === 'CP850'
10160
            ||
10161 8
            $encoding === 'ASCII'
10162
        ) {
10163
            return \strlen($str);
10164
        }
10165
10166
        if (
10167 8
            $encoding !== 'UTF-8'
10168
            &&
10169 8
            self::$SUPPORT['mbstring'] === false
10170
            &&
10171 8
            self::$SUPPORT['iconv'] === false
10172
        ) {
10173
            /**
10174
             * @psalm-suppress ImpureFunctionCall - is is only a warning
10175
             */
10176 2
            \trigger_error('UTF8::strlen() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10177
        }
10178
10179
        //
10180
        // fallback via iconv
10181
        //
10182
10183 8
        if (self::$SUPPORT['iconv'] === true) {
10184
            $return_tmp = \iconv_strlen($str, $encoding);
10185
            if ($return_tmp !== false) {
10186
                return $return_tmp;
10187
            }
10188
        }
10189
10190
        //
10191
        // fallback via intl
10192
        //
10193
10194
        if (
10195 8
            $encoding === 'UTF-8' // INFO: "grapheme_strlen()" can't handle other encodings
10196
            &&
10197 8
            self::$SUPPORT['intl'] === true
10198
        ) {
10199
            $return_tmp = \grapheme_strlen($str);
10200
            if ($return_tmp !== null) {
10201
                return $return_tmp;
10202
            }
10203
        }
10204
10205
        //
10206
        // fallback for ascii only
10207
        //
10208
10209 8
        if (ASCII::is_ascii($str)) {
10210 4
            return \strlen($str);
10211
        }
10212
10213
        //
10214
        // fallback via vanilla php
10215
        //
10216
10217 8
        \preg_match_all('/./us', $str, $parts);
10218
10219 8
        $return_tmp = \count($parts[0]);
10220 8
        if ($return_tmp === 0) {
10221
            return false;
10222
        }
10223
10224 8
        return $return_tmp;
10225
    }
10226
10227
    /**
10228
     * Get string length in byte.
10229
     *
10230
     * @param string $str
10231
     *
10232
     * @psalm-pure
10233
     *
10234
     * @return int
10235
     */
10236 1
    public static function strlen_in_byte(string $str): int
10237
    {
10238 1
        if ($str === '') {
10239
            return 0;
10240
        }
10241
10242 1
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10243
            // "mb_" is available if overload is used, so use it ...
10244
            return \mb_strlen($str, 'CP850'); // 8-BIT
10245
        }
10246
10247 1
        return \strlen($str);
10248
    }
10249
10250
    /**
10251
     * Case-insensitive string comparisons using a "natural order" algorithm.
10252
     *
10253
     * INFO: natural order version of UTF8::strcasecmp()
10254
     *
10255
     * EXAMPLES: <code>
10256
     * UTF8::strnatcasecmp('2', '10Hello WORLD 中文空白!'); // -1
10257
     * UTF8::strcasecmp('2Hello world 中文空白!', '10Hello WORLD 中文空白!'); // 1
10258
     *
10259
     * UTF8::strnatcasecmp('10Hello world 中文空白!', '2Hello WORLD 中文空白!'); // 1
10260
     * UTF8::strcasecmp('10Hello world 中文空白!', '2Hello WORLD 中文空白!'); // -1
10261
     * </code>
10262
     *
10263
     * @param string $str1     <p>The first string.</p>
10264
     * @param string $str2     <p>The second string.</p>
10265
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
10266
     *
10267
     * @psalm-pure
10268
     *
10269
     * @return int
10270
     *             <strong>&lt; 0</strong> if str1 is less than str2<br>
10271
     *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
10272
     *             <strong>0</strong> if they are equal
10273
     */
10274 2
    public static function strnatcasecmp(string $str1, string $str2, string $encoding = 'UTF-8'): int
10275
    {
10276 2
        return self::strnatcmp(
10277 2
            self::strtocasefold($str1, true, false, $encoding, null, false),
10278 2
            self::strtocasefold($str2, true, false, $encoding, null, false)
10279
        );
10280
    }
10281
10282
    /**
10283
     * String comparisons using a "natural order" algorithm
10284
     *
10285
     * INFO: natural order version of UTF8::strcmp()
10286
     *
10287
     * EXAMPLES: <code>
10288
     * UTF8::strnatcmp('2Hello world 中文空白!', '10Hello WORLD 中文空白!'); // -1
10289
     * UTF8::strcmp('2Hello world 中文空白!', '10Hello WORLD 中文空白!'); // 1
10290
     *
10291
     * UTF8::strnatcmp('10Hello world 中文空白!', '2Hello WORLD 中文空白!'); // 1
10292
     * UTF8::strcmp('10Hello world 中文空白!', '2Hello WORLD 中文空白!'); // -1
10293
     * </code>
10294
     *
10295
     * @see http://php.net/manual/en/function.strnatcmp.php
10296
     *
10297
     * @param string $str1 <p>The first string.</p>
10298
     * @param string $str2 <p>The second string.</p>
10299
     *
10300
     * @psalm-pure
10301
     *
10302
     * @return int
10303
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
10304
     *             <strong>&gt; 0</strong> if str1 is greater than str2;<br>
10305
     *             <strong>0</strong> if they are equal
10306
     */
10307 4
    public static function strnatcmp(string $str1, string $str2): int
10308
    {
10309 4
        if ($str1 === $str2) {
10310 4
            return 0;
10311
        }
10312
10313 4
        return \strnatcmp(
10314 4
            (string) self::strtonatfold($str1),
10315 4
            (string) self::strtonatfold($str2)
10316
        );
10317
    }
10318
10319
    /**
10320
     * Case-insensitive string comparison of the first n characters.
10321
     *
10322
     * EXAMPLE: <code>
10323
     * UTF8::strcasecmp("iñtërnâtiôn\nàlizætiøn321", "iñtërnâtiôn\nàlizætiøn123", 5); // 0
10324
     * </code>
10325
     *
10326
     * @see http://php.net/manual/en/function.strncasecmp.php
10327
     *
10328
     * @param string $str1     <p>The first string.</p>
10329
     * @param string $str2     <p>The second string.</p>
10330
     * @param int    $len      <p>The length of strings to be used in the comparison.</p>
10331
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
10332
     *
10333
     * @psalm-pure
10334
     *
10335
     * @return int
10336
     *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
10337
     *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
10338
     *             <strong>0</strong> if they are equal
10339
     */
10340 2
    public static function strncasecmp(
10341
        string $str1,
10342
        string $str2,
10343
        int $len,
10344
        string $encoding = 'UTF-8'
10345
    ): int {
10346 2
        return self::strncmp(
10347 2
            self::strtocasefold($str1, true, false, $encoding, null, false),
10348 2
            self::strtocasefold($str2, true, false, $encoding, null, false),
10349 2
            $len
10350
        );
10351
    }
10352
10353
    /**
10354
     * String comparison of the first n characters.
10355
     *
10356
     * EXAMPLE: <code>
10357
     * UTF8::strncmp("Iñtërnâtiôn\nàlizætiøn321", "Iñtërnâtiôn\nàlizætiøn123", 5); // 0
10358
     * </code>
10359
     *
10360
     * @see http://php.net/manual/en/function.strncmp.php
10361
     *
10362
     * @param string $str1     <p>The first string.</p>
10363
     * @param string $str2     <p>The second string.</p>
10364
     * @param int    $len      <p>Number of characters to use in the comparison.</p>
10365
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
10366
     *
10367
     * @psalm-pure
10368
     *
10369
     * @return int
10370
     *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
10371
     *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
10372
     *             <strong>0</strong> if they are equal
10373
     */
10374 4
    public static function strncmp(
10375
        string $str1,
10376
        string $str2,
10377
        int $len,
10378
        string $encoding = 'UTF-8'
10379
    ): int {
10380 4
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10381
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10382
        }
10383
10384 4
        if ($encoding === 'UTF-8') {
10385 4
            $str1 = (string) \mb_substr($str1, 0, $len);
10386 4
            $str2 = (string) \mb_substr($str2, 0, $len);
10387
        } else {
10388
            $str1 = (string) self::substr($str1, 0, $len, $encoding);
10389
            $str2 = (string) self::substr($str2, 0, $len, $encoding);
10390
        }
10391
10392 4
        return self::strcmp($str1, $str2);
10393
    }
10394
10395
    /**
10396
     * Search a string for any of a set of characters.
10397
     *
10398
     * EXAMPLE: <code>UTF8::strpbrk('-中文空白-', '白'); // '白-'</code>
10399
     *
10400
     * @see http://php.net/manual/en/function.strpbrk.php
10401
     *
10402
     * @param string $haystack  <p>The string where char_list is looked for.</p>
10403
     * @param string $char_list <p>This parameter is case-sensitive.</p>
10404
     *
10405
     * @psalm-pure
10406
     *
10407
     * @return false|string
10408
     *                      <p>The string starting from the character found, or false if it is not found.</p>
10409
     */
10410 2
    public static function strpbrk(string $haystack, string $char_list)
10411
    {
10412 2
        if ($haystack === '' || $char_list === '') {
10413 2
            return false;
10414
        }
10415
10416 2
        if (\preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
10417 2
            return \substr($haystack, (int) \strpos($haystack, $m[0]));
10418
        }
10419
10420 2
        return false;
10421
    }
10422
10423
    /**
10424
     * Find the position of the first occurrence of a substring in a string.
10425
     *
10426
     * INFO: use UTF8::strpos_in_byte() for the byte-length
10427
     *
10428
     * EXAMPLE: <code>UTF8::strpos('ABC-ÖÄÜ-中文空白-中文空白', '中'); // 8</code>
10429
     *
10430
     * @see http://php.net/manual/en/function.mb-strpos.php
10431
     *
10432
     * @param string     $haystack   <p>The string from which to get the position of the first occurrence of needle.</p>
10433
     * @param int|string $needle     <p>The string to find in haystack.<br>Or a code point as int.</p>
10434
     * @param int        $offset     [optional] <p>The search offset. If it is not specified, 0 is used.</p>
10435
     * @param string     $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
10436
     * @param bool       $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10437
     *
10438
     * @psalm-pure
10439
     *
10440
     * @return false|int
10441
     *                   The <strong>(int)</strong> numeric position of the first occurrence of needle in the haystack
10442
     *                   string.<br> If needle is not found it returns false.
10443
     */
10444 53
    public static function strpos(
10445
        string $haystack,
10446
        $needle,
10447
        int $offset = 0,
10448
        string $encoding = 'UTF-8',
10449
        bool $clean_utf8 = false
10450
    ) {
10451 53
        if ($haystack === '') {
10452 4
            return false;
10453
        }
10454
10455
        // iconv and mbstring do not support integer $needle
10456 52
        if ((int) $needle === $needle) {
10457
            $needle = (string) self::chr($needle);
10458
        }
10459 52
        $needle = (string) $needle;
10460
10461 52
        if ($needle === '') {
10462 2
            return false;
10463
        }
10464
10465 52
        if ($clean_utf8) {
10466
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10467
            // if invalid characters are found in $haystack before $needle
10468 3
            $needle = self::clean($needle);
10469 3
            $haystack = self::clean($haystack);
10470
        }
10471
10472 52
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10473 10
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10474
        }
10475
10476
        //
10477
        // fallback via mbstring
10478
        //
10479
10480 52
        if (self::$SUPPORT['mbstring'] === true) {
10481 50
            if ($encoding === 'UTF-8') {
10482
                /** @noinspection PhpUsageOfSilenceOperatorInspection - Offset not contained in string */
10483 50
                return @\mb_strpos($haystack, $needle, $offset);
10484
            }
10485
10486
            /** @noinspection PhpUsageOfSilenceOperatorInspection - Offset not contained in string */
10487 2
            return @\mb_strpos($haystack, $needle, $offset, $encoding);
10488
        }
10489
10490
        //
10491
        // fallback for binary || ascii only
10492
        //
10493
        if (
10494 4
            $encoding === 'CP850'
10495
            ||
10496 4
            $encoding === 'ASCII'
10497
        ) {
10498 2
            return \strpos($haystack, $needle, $offset);
10499
        }
10500
10501
        if (
10502 4
            $encoding !== 'UTF-8'
10503
            &&
10504 4
            self::$SUPPORT['iconv'] === false
10505
            &&
10506 4
            self::$SUPPORT['mbstring'] === false
10507
        ) {
10508
            /**
10509
             * @psalm-suppress ImpureFunctionCall - is is only a warning
10510
             */
10511 2
            \trigger_error('UTF8::strpos() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10512
        }
10513
10514
        //
10515
        // fallback via intl
10516
        //
10517
10518
        if (
10519 4
            $encoding === 'UTF-8' // INFO: "grapheme_strpos()" can't handle other encodings
10520
            &&
10521 4
            $offset >= 0 // grapheme_strpos() can't handle negative offset
10522
            &&
10523 4
            self::$SUPPORT['intl'] === true
10524
        ) {
10525
            $return_tmp = \grapheme_strpos($haystack, $needle, $offset);
10526
            if ($return_tmp !== false) {
10527
                return $return_tmp;
10528
            }
10529
        }
10530
10531
        //
10532
        // fallback via iconv
10533
        //
10534
10535
        if (
10536 4
            $offset >= 0 // iconv_strpos() can't handle negative offset
10537
            &&
10538 4
            self::$SUPPORT['iconv'] === true
10539
        ) {
10540
            // ignore invalid negative offset to keep compatibility
10541
            // with php < 5.5.35, < 5.6.21, < 7.0.6
10542
            $return_tmp = \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
10543
            if ($return_tmp !== false) {
10544
                return $return_tmp;
10545
            }
10546
        }
10547
10548
        //
10549
        // fallback for ascii only
10550
        //
10551
10552 4
        if (ASCII::is_ascii($haystack . $needle)) {
10553
            /** @noinspection PhpUsageOfSilenceOperatorInspection - Offset not contained in string */
10554 2
            return @\strpos($haystack, $needle, $offset);
10555
        }
10556
10557
        //
10558
        // fallback via vanilla php
10559
        //
10560
10561 4
        $haystack_tmp = self::substr($haystack, $offset, null, $encoding);
10562 4
        if ($haystack_tmp === false) {
10563
            $haystack_tmp = '';
10564
        }
10565 4
        $haystack = (string) $haystack_tmp;
10566
10567 4
        if ($offset < 0) {
10568
            $offset = 0;
10569
        }
10570
10571 4
        $pos = \strpos($haystack, $needle);
10572 4
        if ($pos === false) {
10573 2
            return false;
10574
        }
10575
10576 4
        if ($pos) {
10577 4
            return $offset + (int) self::strlen(\substr($haystack, 0, $pos), $encoding);
10578
        }
10579
10580 2
        return $offset + 0;
10581
    }
10582
10583
    /**
10584
     * Find the position of the first occurrence of a substring in a string.
10585
     *
10586
     * @param string $haystack <p>
10587
     *                         The string being checked.
10588
     *                         </p>
10589
     * @param string $needle   <p>
10590
     *                         The position counted from the beginning of haystack.
10591
     *                         </p>
10592
     * @param int    $offset   [optional] <p>
10593
     *                         The search offset. If it is not specified, 0 is used.
10594
     *                         </p>
10595
     *
10596
     * @psalm-pure
10597
     *
10598
     * @return false|int
10599
     *                   <p>The numeric position of the first occurrence of needle in the
10600
     *                   haystack string. If needle is not found, it returns false.</p>
10601
     */
10602 2
    public static function strpos_in_byte(string $haystack, string $needle, int $offset = 0)
10603
    {
10604 2
        if ($haystack === '' || $needle === '') {
10605
            return false;
10606
        }
10607
10608 2
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10609
            // "mb_" is available if overload is used, so use it ...
10610
            return \mb_strpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
10611
        }
10612
10613 2
        return \strpos($haystack, $needle, $offset);
10614
    }
10615
10616
    /**
10617
     * Find the position of the first occurrence of a substring in a string, case-insensitive.
10618
     *
10619
     * @param string $haystack <p>
10620
     *                         The string being checked.
10621
     *                         </p>
10622
     * @param string $needle   <p>
10623
     *                         The position counted from the beginning of haystack.
10624
     *                         </p>
10625
     * @param int    $offset   [optional] <p>
10626
     *                         The search offset. If it is not specified, 0 is used.
10627
     *                         </p>
10628
     *
10629
     * @psalm-pure
10630
     *
10631
     * @return false|int
10632
     *                   <p>The numeric position of the first occurrence of needle in the
10633
     *                   haystack string. If needle is not found, it returns false.</p>
10634
     */
10635 2
    public static function stripos_in_byte(string $haystack, string $needle, int $offset = 0)
10636
    {
10637 2
        if ($haystack === '' || $needle === '') {
10638
            return false;
10639
        }
10640
10641 2
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10642
            // "mb_" is available if overload is used, so use it ...
10643
            return \mb_stripos($haystack, $needle, $offset, 'CP850'); // 8-BIT
10644
        }
10645
10646 2
        return \stripos($haystack, $needle, $offset);
10647
    }
10648
10649
    /**
10650
     * Find the last occurrence of a character in a string within another.
10651
     *
10652
     * EXAMPLE: <code>UTF8::strrchr('κόσμεκόσμε-äöü', 'κόσμε'); // 'κόσμε-äöü'</code>
10653
     *
10654
     * @see http://php.net/manual/en/function.mb-strrchr.php
10655
     *
10656
     * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
10657
     * @param string $needle        <p>The string to find in haystack</p>
10658
     * @param bool   $before_needle [optional] <p>
10659
     *                              Determines which portion of haystack
10660
     *                              this function returns.
10661
     *                              If set to true, it returns all of haystack
10662
     *                              from the beginning to the last occurrence of needle.
10663
     *                              If set to false, it returns all of haystack
10664
     *                              from the last occurrence of needle to the end,
10665
     *                              </p>
10666
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
10667
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
10668
     *
10669
     * @psalm-pure
10670
     *
10671
     * @return false|string
10672
     *                      <p>The portion of haystack or false if needle is not found.</p>
10673
     */
10674 2
    public static function strrchr(
10675
        string $haystack,
10676
        string $needle,
10677
        bool $before_needle = false,
10678
        string $encoding = 'UTF-8',
10679
        bool $clean_utf8 = false
10680
    ) {
10681 2
        if ($haystack === '' || $needle === '') {
10682 2
            return false;
10683
        }
10684
10685 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10686 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10687
        }
10688
10689 2
        if ($clean_utf8) {
10690
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10691
            // if invalid characters are found in $haystack before $needle
10692 2
            $needle = self::clean($needle);
10693 2
            $haystack = self::clean($haystack);
10694
        }
10695
10696
        //
10697
        // fallback via mbstring
10698
        //
10699
10700 2
        if (self::$SUPPORT['mbstring'] === true) {
10701 2
            if ($encoding === 'UTF-8') {
10702 2
                return \mb_strrchr($haystack, $needle, $before_needle);
10703
            }
10704
10705 2
            return \mb_strrchr($haystack, $needle, $before_needle, $encoding);
10706
        }
10707
10708
        //
10709
        // fallback for binary || ascii only
10710
        //
10711
10712
        if (
10713
            !$before_needle
10714
            &&
10715
            (
10716
                $encoding === 'CP850'
10717
                ||
10718
                $encoding === 'ASCII'
10719
            )
10720
        ) {
10721
            return \strrchr($haystack, $needle);
10722
        }
10723
10724
        if (
10725
            $encoding !== 'UTF-8'
10726
            &&
10727
            self::$SUPPORT['mbstring'] === false
10728
        ) {
10729
            /**
10730
             * @psalm-suppress ImpureFunctionCall - is is only a warning
10731
             */
10732
            \trigger_error('UTF8::strrchr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10733
        }
10734
10735
        //
10736
        // fallback via iconv
10737
        //
10738
10739
        if (self::$SUPPORT['iconv'] === true) {
10740
            $needle_tmp = self::substr($needle, 0, 1, $encoding);
10741
            if ($needle_tmp === false) {
10742
                return false;
10743
            }
10744
            $needle = (string) $needle_tmp;
10745
10746
            $pos = \iconv_strrpos($haystack, $needle, $encoding);
10747
            if ($pos === false) {
10748
                return false;
10749
            }
10750
10751
            if ($before_needle) {
10752
                return self::substr($haystack, 0, $pos, $encoding);
10753
            }
10754
10755
            return self::substr($haystack, $pos, null, $encoding);
10756
        }
10757
10758
        //
10759
        // fallback via vanilla php
10760
        //
10761
10762
        $needle_tmp = self::substr($needle, 0, 1, $encoding);
10763
        if ($needle_tmp === false) {
10764
            return false;
10765
        }
10766
        $needle = (string) $needle_tmp;
10767
10768
        $pos = self::strrpos($haystack, $needle, 0, $encoding);
10769
        if ($pos === false) {
10770
            return false;
10771
        }
10772
10773
        if ($before_needle) {
10774
            return self::substr($haystack, 0, $pos, $encoding);
10775
        }
10776
10777
        return self::substr($haystack, $pos, null, $encoding);
10778
    }
10779
10780
    /**
10781
     * Reverses characters order in the string.
10782
     *
10783
     * EXAMPLE: <code>UTF8::strrev('κ-öäü'); // 'üäö-κ'</code>
10784
     *
10785
     * @param string $str      <p>The input string.</p>
10786
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
10787
     *
10788
     * @psalm-pure
10789
     *
10790
     * @return string
10791
     *                <p>The string with characters in the reverse sequence.</p>
10792
     */
10793 10
    public static function strrev(string $str, string $encoding = 'UTF-8'): string
10794
    {
10795 10
        if ($str === '') {
10796 4
            return '';
10797
        }
10798
10799
        // init
10800 8
        $reversed = '';
10801
10802 8
        $str = self::emoji_encode($str, true);
10803
10804 8
        if ($encoding === 'UTF-8') {
10805 8
            if (self::$SUPPORT['intl'] === true) {
10806
                // try "grapheme" first: https://stackoverflow.com/questions/17496493/strrev-dosent-support-utf-8
10807 8
                $i = (int) \grapheme_strlen($str);
10808 8
                while ($i--) {
10809 8
                    $reversed_tmp = \grapheme_substr($str, $i, 1);
10810 8
                    if ($reversed_tmp !== false) {
10811 8
                        $reversed .= $reversed_tmp;
10812
                    }
10813
                }
10814
            } else {
10815
                $i = (int) \mb_strlen($str);
10816 8
                while ($i--) {
10817
                    $reversed_tmp = \mb_substr($str, $i, 1);
10818
                    if ($reversed_tmp !== false) {
10819
                        $reversed .= $reversed_tmp;
10820
                    }
10821
                }
10822
            }
10823
        } else {
10824
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10825
10826
            $i = (int) self::strlen($str, $encoding);
10827
            while ($i--) {
10828
                $reversed_tmp = self::substr($str, $i, 1, $encoding);
10829
                if ($reversed_tmp !== false) {
10830
                    $reversed .= $reversed_tmp;
10831
                }
10832
            }
10833
        }
10834
10835 8
        return self::emoji_decode($reversed, true);
10836
    }
10837
10838
    /**
10839
     * Find the last occurrence of a character in a string within another, case-insensitive.
10840
     *
10841
     * EXAMPLE: <code>UTF8::strrichr('Aκόσμεκόσμε-äöü', 'aκόσμε'); // 'Aκόσμεκόσμε-äöü'</code>
10842
     *
10843
     * @see http://php.net/manual/en/function.mb-strrichr.php
10844
     *
10845
     * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
10846
     * @param string $needle        <p>The string to find in haystack.</p>
10847
     * @param bool   $before_needle [optional] <p>
10848
     *                              Determines which portion of haystack
10849
     *                              this function returns.
10850
     *                              If set to true, it returns all of haystack
10851
     *                              from the beginning to the last occurrence of needle.
10852
     *                              If set to false, it returns all of haystack
10853
     *                              from the last occurrence of needle to the end,
10854
     *                              </p>
10855
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
10856
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
10857
     *
10858
     * @psalm-pure
10859
     *
10860
     * @return false|string
10861
     *                      <p>The portion of haystack or<br>false if needle is not found.</p>
10862
     */
10863 3
    public static function strrichr(
10864
        string $haystack,
10865
        string $needle,
10866
        bool $before_needle = false,
10867
        string $encoding = 'UTF-8',
10868
        bool $clean_utf8 = false
10869
    ) {
10870 3
        if ($haystack === '' || $needle === '') {
10871 2
            return false;
10872
        }
10873
10874 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10875 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10876
        }
10877
10878 3
        if ($clean_utf8) {
10879
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10880
            // if invalid characters are found in $haystack before $needle
10881 2
            $needle = self::clean($needle);
10882 2
            $haystack = self::clean($haystack);
10883
        }
10884
10885
        //
10886
        // fallback via mbstring
10887
        //
10888
10889 3
        if (self::$SUPPORT['mbstring'] === true) {
10890 3
            if ($encoding === 'UTF-8') {
10891 3
                return \mb_strrichr($haystack, $needle, $before_needle);
10892
            }
10893
10894 2
            return \mb_strrichr($haystack, $needle, $before_needle, $encoding);
10895
        }
10896
10897
        //
10898
        // fallback via vanilla php
10899
        //
10900
10901
        $needle_tmp = self::substr($needle, 0, 1, $encoding);
10902
        if ($needle_tmp === false) {
10903
            return false;
10904
        }
10905
        $needle = (string) $needle_tmp;
10906
10907
        $pos = self::strripos($haystack, $needle, 0, $encoding);
10908
        if ($pos === false) {
10909
            return false;
10910
        }
10911
10912
        if ($before_needle) {
10913
            return self::substr($haystack, 0, $pos, $encoding);
10914
        }
10915
10916
        return self::substr($haystack, $pos, null, $encoding);
10917
    }
10918
10919
    /**
10920
     * Find the position of the last occurrence of a substring in a string, case-insensitive.
10921
     *
10922
     * EXAMPLE: <code>UTF8::strripos('ABC-ÖÄÜ-中文空白-中文空白', '中'); // 13</code>
10923
     *
10924
     * @param string     $haystack   <p>The string to look in.</p>
10925
     * @param int|string $needle     <p>The string to look for.</p>
10926
     * @param int        $offset     [optional] <p>Number of characters to ignore in the beginning or end.</p>
10927
     * @param string     $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
10928
     * @param bool       $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10929
     *
10930
     * @psalm-pure
10931
     *
10932
     * @return false|int
10933
     *                   <p>The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
10934
     *                   string.<br>If needle is not found, it returns false.</p>
10935
     */
10936 14
    public static function strripos(
10937
        string $haystack,
10938
        $needle,
10939
        int $offset = 0,
10940
        string $encoding = 'UTF-8',
10941
        bool $clean_utf8 = false
10942
    ) {
10943 14
        if ($haystack === '') {
10944
            return false;
10945
        }
10946
10947
        // iconv and mbstring do not support integer $needle
10948 14
        if ((int) $needle === $needle && $needle >= 0) {
10949
            $needle = (string) self::chr($needle);
10950
        }
10951 14
        $needle = (string) $needle;
10952
10953 14
        if ($needle === '') {
10954
            return false;
10955
        }
10956
10957 14
        if ($clean_utf8) {
10958
            // mb_strripos() && iconv_strripos() is not tolerant to invalid characters
10959 3
            $needle = self::clean($needle);
10960 3
            $haystack = self::clean($haystack);
10961
        }
10962
10963 14
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10964 9
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10965
        }
10966
10967
        //
10968
        // fallback via mbstrig
10969
        //
10970
10971 14
        if (self::$SUPPORT['mbstring'] === true) {
10972 14
            if ($encoding === 'UTF-8') {
10973 14
                return \mb_strripos($haystack, $needle, $offset);
10974
            }
10975
10976
            return \mb_strripos($haystack, $needle, $offset, $encoding);
10977
        }
10978
10979
        //
10980
        // fallback for binary || ascii only
10981
        //
10982
10983
        if (
10984
            $encoding === 'CP850'
10985
            ||
10986
            $encoding === 'ASCII'
10987
        ) {
10988
            return \strripos($haystack, $needle, $offset);
10989
        }
10990
10991
        if (
10992
            $encoding !== 'UTF-8'
10993
            &&
10994
            self::$SUPPORT['mbstring'] === false
10995
        ) {
10996
            /**
10997
             * @psalm-suppress ImpureFunctionCall - is is only a warning
10998
             */
10999
            \trigger_error('UTF8::strripos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
11000
        }
11001
11002
        //
11003
        // fallback via intl
11004
        //
11005
11006
        if (
11007
            $encoding === 'UTF-8' // INFO: "grapheme_strripos()" can't handle other encodings
11008
            &&
11009
            $offset >= 0 // grapheme_strripos() can't handle negative offset
11010
            &&
11011
            self::$SUPPORT['intl'] === true
11012
        ) {
11013
            $return_tmp = \grapheme_strripos($haystack, $needle, $offset);
11014
            if ($return_tmp !== false) {
11015
                return $return_tmp;
11016
            }
11017
        }
11018
11019
        //
11020
        // fallback for ascii only
11021
        //
11022
11023
        if (ASCII::is_ascii($haystack . $needle)) {
11024
            return \strripos($haystack, $needle, $offset);
11025
        }
11026
11027
        //
11028
        // fallback via vanilla php
11029
        //
11030
11031
        $haystack = self::strtocasefold($haystack, true, false, $encoding);
11032
        $needle = self::strtocasefold($needle, true, false, $encoding);
11033
11034
        return self::strrpos($haystack, $needle, $offset, $encoding, $clean_utf8);
11035
    }
11036
11037
    /**
11038
     * Finds position of last occurrence of a string within another, case-insensitive.
11039
     *
11040
     * @param string $haystack <p>
11041
     *                         The string from which to get the position of the last occurrence
11042
     *                         of needle.
11043
     *                         </p>
11044
     * @param string $needle   <p>
11045
     *                         The string to find in haystack.
11046
     *                         </p>
11047
     * @param int    $offset   [optional] <p>
11048
     *                         The position in haystack
11049
     *                         to start searching.
11050
     *                         </p>
11051
     *
11052
     * @psalm-pure
11053
     *
11054
     * @return false|int
11055
     *                   <p>eturn the numeric position of the last occurrence of needle in the
11056
     *                   haystack string, or false if needle is not found.</p>
11057
     */
11058 2
    public static function strripos_in_byte(string $haystack, string $needle, int $offset = 0)
11059
    {
11060 2
        if ($haystack === '' || $needle === '') {
11061
            return false;
11062
        }
11063
11064 2
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
11065
            // "mb_" is available if overload is used, so use it ...
11066
            return \mb_strripos($haystack, $needle, $offset, 'CP850'); // 8-BIT
11067
        }
11068
11069 2
        return \strripos($haystack, $needle, $offset);
11070
    }
11071
11072
    /**
11073
     * Find the position of the last occurrence of a substring in a string.
11074
     *
11075
     * EXAMPLE: <code>UTF8::strrpos('ABC-ÖÄÜ-中文空白-中文空白', '中'); // 13</code>
11076
     *
11077
     * @see http://php.net/manual/en/function.mb-strrpos.php
11078
     *
11079
     * @param string     $haystack   <p>The string being checked, for the last occurrence of needle</p>
11080
     * @param int|string $needle     <p>The string to find in haystack.<br>Or a code point as int.</p>
11081
     * @param int        $offset     [optional] <p>May be specified to begin searching an arbitrary number of characters
11082
     *                               into the string. Negative values will stop searching at an arbitrary point prior to
11083
     *                               the end of the string.
11084
     *                               </p>
11085
     * @param string     $encoding   [optional] <p>Set the charset.</p>
11086
     * @param bool       $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
11087
     *
11088
     * @psalm-pure
11089
     *
11090
     * @return false|int
11091
     *                   <p>The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
11092
     *                   string.<br>If needle is not found, it returns false.</p>
11093
     */
11094 35
    public static function strrpos(
11095
        string $haystack,
11096
        $needle,
11097
        int $offset = 0,
11098
        string $encoding = 'UTF-8',
11099
        bool $clean_utf8 = false
11100
    ) {
11101 35
        if ($haystack === '') {
11102 3
            return false;
11103
        }
11104
11105
        // iconv and mbstring do not support integer $needle
11106 34
        if ((int) $needle === $needle && $needle >= 0) {
11107 1
            $needle = (string) self::chr($needle);
11108
        }
11109 34
        $needle = (string) $needle;
11110
11111 34
        if ($needle === '') {
11112 2
            return false;
11113
        }
11114
11115 34
        if ($clean_utf8) {
11116
            // mb_strrpos && iconv_strrpos is not tolerant to invalid characters
11117 4
            $needle = self::clean($needle);
11118 4
            $haystack = self::clean($haystack);
11119
        }
11120
11121 34
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
11122 8
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11123
        }
11124
11125
        //
11126
        // fallback via mbstring
11127
        //
11128
11129 34
        if (self::$SUPPORT['mbstring'] === true) {
11130 34
            if ($encoding === 'UTF-8') {
11131 34
                return \mb_strrpos($haystack, $needle, $offset);
11132
            }
11133
11134 2
            return \mb_strrpos($haystack, $needle, $offset, $encoding);
11135
        }
11136
11137
        //
11138
        // fallback for binary || ascii only
11139
        //
11140
11141
        if (
11142
            $encoding === 'CP850'
11143
            ||
11144
            $encoding === 'ASCII'
11145
        ) {
11146
            return \strrpos($haystack, $needle, $offset);
11147
        }
11148
11149
        if (
11150
            $encoding !== 'UTF-8'
11151
            &&
11152
            self::$SUPPORT['mbstring'] === false
11153
        ) {
11154
            /**
11155
             * @psalm-suppress ImpureFunctionCall - is is only a warning
11156
             */
11157
            \trigger_error('UTF8::strrpos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
11158
        }
11159
11160
        //
11161
        // fallback via intl
11162
        //
11163
11164
        if (
11165
            $offset >= 0 // grapheme_strrpos() can't handle negative offset
11166
            &&
11167
            $encoding === 'UTF-8' // INFO: "grapheme_strrpos()" can't handle other encodings
11168
            &&
11169
            self::$SUPPORT['intl'] === true
11170
        ) {
11171
            $return_tmp = \grapheme_strrpos($haystack, $needle, $offset);
11172
            if ($return_tmp !== false) {
11173
                return $return_tmp;
11174
            }
11175
        }
11176
11177
        //
11178
        // fallback for ascii only
11179
        //
11180
11181
        if (ASCII::is_ascii($haystack . $needle)) {
11182
            return \strrpos($haystack, $needle, $offset);
11183
        }
11184
11185
        //
11186
        // fallback via vanilla php
11187
        //
11188
11189
        $haystack_tmp = null;
11190
        if ($offset > 0) {
11191
            $haystack_tmp = self::substr($haystack, $offset);
11192
        } elseif ($offset < 0) {
11193
            $haystack_tmp = self::substr($haystack, 0, $offset);
11194
            $offset = 0;
11195
        }
11196
11197
        if ($haystack_tmp !== null) {
11198
            if ($haystack_tmp === false) {
11199
                $haystack_tmp = '';
11200
            }
11201
            $haystack = (string) $haystack_tmp;
11202
        }
11203
11204
        $pos = \strrpos($haystack, $needle);
11205
        if ($pos === false) {
11206
            return false;
11207
        }
11208
11209
        /** @var false|string $str_tmp - needed for PhpStan (stubs error) */
11210
        $str_tmp = \substr($haystack, 0, $pos);
11211
        if ($str_tmp === false) {
11212
            return false;
11213
        }
11214
11215
        return $offset + (int) self::strlen($str_tmp);
11216
    }
11217
11218
    /**
11219
     * Find the position of the last occurrence of a substring in a string.
11220
     *
11221
     * @param string $haystack <p>
11222
     *                         The string being checked, for the last occurrence
11223
     *                         of needle.
11224
     *                         </p>
11225
     * @param string $needle   <p>
11226
     *                         The string to find in haystack.
11227
     *                         </p>
11228
     * @param int    $offset   [optional] <p>May be specified to begin searching an arbitrary number of characters into
11229
     *                         the string. Negative values will stop searching at an arbitrary point
11230
     *                         prior to the end of the string.
11231
     *                         </p>
11232
     *
11233
     * @psalm-pure
11234
     *
11235
     * @return false|int
11236
     *                   <p>The numeric position of the last occurrence of needle in the
11237
     *                   haystack string. If needle is not found, it returns false.</p>
11238
     */
11239 2
    public static function strrpos_in_byte(string $haystack, string $needle, int $offset = 0)
11240
    {
11241 2
        if ($haystack === '' || $needle === '') {
11242
            return false;
11243
        }
11244
11245 2
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
11246
            // "mb_" is available if overload is used, so use it ...
11247
            return \mb_strrpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
11248
        }
11249
11250 2
        return \strrpos($haystack, $needle, $offset);
11251
    }
11252
11253
    /**
11254
     * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
11255
     * mask.
11256
     *
11257
     * EXAMPLE: <code>UTF8::strspn('iñtërnâtiônàlizætiøn', 'itñ'); // '3'</code>
11258
     *
11259
     * @param string   $str      <p>The input string.</p>
11260
     * @param string   $mask     <p>The mask of chars</p>
11261
     * @param int      $offset   [optional]
11262
     * @param int|null $length   [optional]
11263
     * @param string   $encoding [optional] <p>Set the charset.</p>
11264
     *
11265
     * @psalm-pure
11266
     *
11267
     * @return false|int
11268
     */
11269 10
    public static function strspn(
11270
        string $str,
11271
        string $mask,
11272
        int $offset = 0,
11273
        int $length = null,
11274
        string $encoding = 'UTF-8'
11275
    ) {
11276 10
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
11277
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11278
        }
11279
11280 10
        if ($offset || $length !== null) {
11281 2
            if ($encoding === 'UTF-8') {
11282 2
                if ($length === null) {
11283
                    $str = (string) \mb_substr($str, $offset);
11284
                } else {
11285 2
                    $str = (string) \mb_substr($str, $offset, $length);
11286
                }
11287
            } else {
11288
                $str = (string) self::substr($str, $offset, $length, $encoding);
11289
            }
11290
        }
11291
11292 10
        if ($str === '' || $mask === '') {
11293 2
            return 0;
11294
        }
11295
11296 8
        $matches = [];
11297
11298 8
        return \preg_match('/^' . self::rxClass($mask) . '+/u', $str, $matches) ? (int) self::strlen($matches[0], $encoding) : 0;
11299
    }
11300
11301
    /**
11302
     * Returns part of haystack string from the first occurrence of needle to the end of haystack.
11303
     *
11304
     * EXAMPLE: <code>
11305
     * $str = 'iñtërnâtiônàlizætiøn';
11306
     * $search = 'nât';
11307
     *
11308
     * UTF8::strstr($str, $search)); // 'nâtiônàlizætiøn'
11309
     * UTF8::strstr($str, $search, true)); // 'iñtër'
11310
     * </code>
11311
     *
11312
     * @param string $haystack      <p>The input string. Must be valid UTF-8.</p>
11313
     * @param string $needle        <p>The string to look for. Must be valid UTF-8.</p>
11314
     * @param bool   $before_needle [optional] <p>
11315
     *                              If <b>TRUE</b>, strstr() returns the part of the
11316
     *                              haystack before the first occurrence of the needle (excluding the needle).
11317
     *                              </p>
11318
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
11319
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
11320
     *
11321
     * @psalm-pure
11322
     *
11323
     * @return false|string
11324
     *                      <p>A sub-string,<br>or <strong>false</strong> if needle is not found.</p>
11325
     */
11326 3
    public static function strstr(
11327
        string $haystack,
11328
        string $needle,
11329
        bool $before_needle = false,
11330
        string $encoding = 'UTF-8',
11331
        bool $clean_utf8 = false
11332
    ) {
11333 3
        if ($haystack === '' || $needle === '') {
11334 2
            return false;
11335
        }
11336
11337 3
        if ($clean_utf8) {
11338
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11339
            // if invalid characters are found in $haystack before $needle
11340
            $needle = self::clean($needle);
11341
            $haystack = self::clean($haystack);
11342
        }
11343
11344 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
11345 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11346
        }
11347
11348
        //
11349
        // fallback via mbstring
11350
        //
11351
11352 3
        if (self::$SUPPORT['mbstring'] === true) {
11353 3
            if ($encoding === 'UTF-8') {
11354 3
                return \mb_strstr($haystack, $needle, $before_needle);
11355
            }
11356
11357 2
            return \mb_strstr($haystack, $needle, $before_needle, $encoding);
11358
        }
11359
11360
        //
11361
        // fallback for binary || ascii only
11362
        //
11363
11364
        if (
11365
            $encoding === 'CP850'
11366
            ||
11367
            $encoding === 'ASCII'
11368
        ) {
11369
            return \strstr($haystack, $needle, $before_needle);
11370
        }
11371
11372
        if (
11373
            $encoding !== 'UTF-8'
11374
            &&
11375
            self::$SUPPORT['mbstring'] === false
11376
        ) {
11377
            /**
11378
             * @psalm-suppress ImpureFunctionCall - is is only a warning
11379
             */
11380
            \trigger_error('UTF8::strstr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
11381
        }
11382
11383
        //
11384
        // fallback via intl
11385
        //
11386
11387
        if (
11388
            $encoding === 'UTF-8' // INFO: "grapheme_strstr()" can't handle other encodings
11389
            &&
11390
            self::$SUPPORT['intl'] === true
11391
        ) {
11392
            $return_tmp = \grapheme_strstr($haystack, $needle, $before_needle);
11393
            if ($return_tmp !== false) {
11394
                return $return_tmp;
11395
            }
11396
        }
11397
11398
        //
11399
        // fallback for ascii only
11400
        //
11401
11402
        if (ASCII::is_ascii($haystack . $needle)) {
11403
            return \strstr($haystack, $needle, $before_needle);
11404
        }
11405
11406
        //
11407
        // fallback via vanilla php
11408
        //
11409
11410
        \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/us', $haystack, $match);
11411
11412
        if (!isset($match[1])) {
11413
            return false;
11414
        }
11415
11416
        if ($before_needle) {
11417
            return $match[1];
11418
        }
11419
11420
        return self::substr($haystack, (int) self::strlen($match[1]));
11421
    }
11422
11423
    /**
11424
     * Finds first occurrence of a string within another.
11425
     *
11426
     * @param string $haystack      <p>
11427
     *                              The string from which to get the first occurrence
11428
     *                              of needle.
11429
     *                              </p>
11430
     * @param string $needle        <p>
11431
     *                              The string to find in haystack.
11432
     *                              </p>
11433
     * @param bool   $before_needle [optional] <p>
11434
     *                              Determines which portion of haystack
11435
     *                              this function returns.
11436
     *                              If set to true, it returns all of haystack
11437
     *                              from the beginning to the first occurrence of needle.
11438
     *                              If set to false, it returns all of haystack
11439
     *                              from the first occurrence of needle to the end,
11440
     *                              </p>
11441
     *
11442
     * @psalm-pure
11443
     *
11444
     * @return false|string
11445
     *                      <p>The portion of haystack,
11446
     *                      or false if needle is not found.</p>
11447
     */
11448 2
    public static function strstr_in_byte(
11449
        string $haystack,
11450
        string $needle,
11451
        bool $before_needle = false
11452
    ) {
11453 2
        if ($haystack === '' || $needle === '') {
11454
            return false;
11455
        }
11456
11457 2
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
11458
            // "mb_" is available if overload is used, so use it ...
11459
            return \mb_strstr($haystack, $needle, $before_needle, 'CP850'); // 8-BIT
11460
        }
11461
11462 2
        return \strstr($haystack, $needle, $before_needle);
11463
    }
11464
11465
    /**
11466
     * Unicode transformation for case-less matching.
11467
     *
11468
     * EXAMPLE: <code>UTF8::strtocasefold('ǰ◌̱'); // 'ǰ◌̱'</code>
11469
     *
11470
     * @see http://unicode.org/reports/tr21/tr21-5.html
11471
     *
11472
     * @param string      $str        <p>The input string.</p>
11473
     * @param bool        $full       [optional] <p>
11474
     *                                <b>true</b>, replace full case folding chars (default)<br>
11475
     *                                <b>false</b>, use only limited static array [UTF8::$COMMON_CASE_FOLD]
11476
     *                                </p>
11477
     * @param bool        $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
11478
     * @param string      $encoding   [optional] <p>Set the charset.</p>
11479
     * @param string|null $lang       [optional] <p>Set the language for special cases: az, el, lt, tr</p>
11480
     * @param bool        $lower      [optional] <p>Use lowercase string, otherwise use uppercase string. PS: uppercase
11481
     *                                is for some languages better ...</p>
11482
     *
11483
     * @psalm-pure
11484
     *
11485
     * @return string
11486
     */
11487 32
    public static function strtocasefold(
11488
        string $str,
11489
        bool $full = true,
11490
        bool $clean_utf8 = false,
11491
        string $encoding = 'UTF-8',
11492
        string $lang = null,
11493
        bool $lower = true
11494
    ): string {
11495 32
        if ($str === '') {
11496 5
            return '';
11497
        }
11498
11499 31
        if ($clean_utf8) {
11500
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11501
            // if invalid characters are found in $haystack before $needle
11502 2
            $str = self::clean($str);
11503
        }
11504
11505 31
        $str = self::fixStrCaseHelper($str, $lower, $full);
11506
11507 31
        if ($lang === null && $encoding === 'UTF-8') {
11508 31
            if ($lower) {
11509 2
                return \mb_strtolower($str);
11510
            }
11511
11512 29
            return \mb_strtoupper($str);
11513
        }
11514
11515 2
        if ($lower) {
11516
            return self::strtolower($str, $encoding, false, $lang);
11517
        }
11518
11519 2
        return self::strtoupper($str, $encoding, false, $lang);
11520
    }
11521
11522
    /**
11523
     * Make a string lowercase.
11524
     *
11525
     * EXAMPLE: <code>UTF8::strtolower('DÉJÀ Σσς Iıİi'); // 'déjà σσς iıii'</code>
11526
     *
11527
     * @see http://php.net/manual/en/function.mb-strtolower.php
11528
     *
11529
     * @param string      $str                           <p>The string being lowercased.</p>
11530
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
11531
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
11532
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
11533
     *                                                   tr</p>
11534
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
11535
     *                                                   -> ß</p>
11536
     *
11537
     * @psalm-pure
11538
     *
11539
     * @return string
11540
     *                <p>String with all alphabetic characters converted to lowercase.</p>
11541
     */
11542 73
    public static function strtolower(
11543
        $str,
11544
        string $encoding = 'UTF-8',
11545
        bool $clean_utf8 = false,
11546
        string $lang = null,
11547
        bool $try_to_keep_the_string_length = false
11548
    ): string {
11549
        // init
11550 73
        $str = (string) $str;
11551
11552 73
        if ($str === '') {
11553 1
            return '';
11554
        }
11555
11556 72
        if ($clean_utf8) {
11557
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11558
            // if invalid characters are found in $haystack before $needle
11559 2
            $str = self::clean($str);
11560
        }
11561
11562
        // hack for old php version or for the polyfill ...
11563 72
        if ($try_to_keep_the_string_length) {
11564
            $str = self::fixStrCaseHelper($str, true);
11565
        }
11566
11567 72
        if ($lang === null && $encoding === 'UTF-8') {
11568 13
            return \mb_strtolower($str);
11569
        }
11570
11571 61
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
11572
11573 61
        if ($lang !== null) {
11574 2
            if (self::$SUPPORT['intl'] === true) {
11575 2
                if (self::$INTL_TRANSLITERATOR_LIST === null) {
11576
                    self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
11577
                }
11578
11579 2
                $language_code = $lang . '-Lower';
11580 2
                if (!\in_array($language_code, self::$INTL_TRANSLITERATOR_LIST, true)) {
11581
                    /**
11582
                     * @psalm-suppress ImpureFunctionCall - is is only a warning
11583
                     */
11584
                    \trigger_error('UTF8::strtolower() cannot handle special language: ' . $lang . ' | supported: ' . \print_r(self::$INTL_TRANSLITERATOR_LIST, true), \E_USER_WARNING);
11585
11586
                    $language_code = 'Any-Lower';
11587
                }
11588
11589
                /** @noinspection PhpComposerExtensionStubsInspection */
11590
                /** @noinspection UnnecessaryCastingInspection */
11591 2
                return (string) \transliterator_transliterate($language_code, $str);
11592
            }
11593
11594
            /**
11595
             * @psalm-suppress ImpureFunctionCall - is is only a warning
11596
             */
11597
            \trigger_error('UTF8::strtolower() without intl cannot handle the "lang" parameter: ' . $lang, \E_USER_WARNING);
11598
        }
11599
11600
        // always fallback via symfony polyfill
11601 61
        return \mb_strtolower($str, $encoding);
11602
    }
11603
11604
    /**
11605
     * Make a string uppercase.
11606
     *
11607
     * EXAMPLE: <code>UTF8::strtoupper('Déjà Σσς Iıİi'); // 'DÉJÀ ΣΣΣ IIİI'</code>
11608
     *
11609
     * @see http://php.net/manual/en/function.mb-strtoupper.php
11610
     *
11611
     * @param string      $str                           <p>The string being uppercased.</p>
11612
     * @param string      $encoding                      [optional] <p>Set the charset.</p>
11613
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
11614
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
11615
     *                                                   tr</p>
11616
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
11617
     *                                                   -> ß</p>
11618
     *
11619
     * @psalm-pure
11620
     *
11621
     * @return string
11622
     *                <p>String with all alphabetic characters converted to uppercase.</p>
11623
     */
11624 17
    public static function strtoupper(
11625
        $str,
11626
        string $encoding = 'UTF-8',
11627
        bool $clean_utf8 = false,
11628
        string $lang = null,
11629
        bool $try_to_keep_the_string_length = false
11630
    ): string {
11631
        // init
11632 17
        $str = (string) $str;
11633
11634 17
        if ($str === '') {
11635 1
            return '';
11636
        }
11637
11638 16
        if ($clean_utf8) {
11639
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11640
            // if invalid characters are found in $haystack before $needle
11641 2
            $str = self::clean($str);
11642
        }
11643
11644
        // hack for old php version or for the polyfill ...
11645 16
        if ($try_to_keep_the_string_length) {
11646 2
            $str = self::fixStrCaseHelper($str);
11647
        }
11648
11649 16
        if ($lang === null && $encoding === 'UTF-8') {
11650 8
            return \mb_strtoupper($str);
11651
        }
11652
11653 10
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
11654
11655 10
        if ($lang !== null) {
11656 2
            if (self::$SUPPORT['intl'] === true) {
11657 2
                if (self::$INTL_TRANSLITERATOR_LIST === null) {
11658
                    self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
11659
                }
11660
11661 2
                $language_code = $lang . '-Upper';
11662 2
                if (!\in_array($language_code, self::$INTL_TRANSLITERATOR_LIST, true)) {
11663
                    /**
11664
                     * @psalm-suppress ImpureFunctionCall - is is only a warning
11665
                     */
11666
                    \trigger_error('UTF8::strtoupper() without intl for special language: ' . $lang, \E_USER_WARNING);
11667
11668
                    $language_code = 'Any-Upper';
11669
                }
11670
11671
                /** @noinspection PhpComposerExtensionStubsInspection */
11672
                /** @noinspection UnnecessaryCastingInspection */
11673 2
                return (string) \transliterator_transliterate($language_code, $str);
11674
            }
11675
11676
            /**
11677
             * @psalm-suppress ImpureFunctionCall - is is only a warning
11678
             */
11679
            \trigger_error('UTF8::strtolower() without intl cannot handle the "lang"-parameter: ' . $lang, \E_USER_WARNING);
11680
        }
11681
11682
        // always fallback via symfony polyfill
11683 10
        return \mb_strtoupper($str, $encoding);
11684
    }
11685
11686
    /**
11687
     * Translate characters or replace sub-strings.
11688
     *
11689
     * EXAMPLE:
11690
     * <code>
11691
     * $array = [
11692
     *     'Hello'   => '○●◎',
11693
     *     '中文空白' => 'earth',
11694
     * ];
11695
     * UTF8::strtr('Hello 中文空白', $array); // '○●◎ earth'
11696
     * </code>
11697
     *
11698
     * @see http://php.net/manual/en/function.strtr.php
11699
     *
11700
     * @param string          $str  <p>The string being translated.</p>
11701
     * @param string|string[] $from <p>The string replacing from.</p>
11702
     * @param string|string[] $to   [optional] <p>The string being translated to to.</p>
11703
     *
11704
     * @psalm-pure
11705
     *
11706
     * @return string
11707
     *                <p>This function returns a copy of str, translating all occurrences of each character in "from"
11708
     *                to the corresponding character in "to".</p>
11709
     */
11710 2
    public static function strtr(string $str, $from, $to = ''): string
11711
    {
11712 2
        if ($str === '') {
11713
            return '';
11714
        }
11715
11716 2
        if ($from === $to) {
11717
            return $str;
11718
        }
11719
11720 2
        if ($to !== '') {
11721 2
            if (!\is_array($from)) {
11722 2
                $from = self::str_split($from);
11723
            }
11724
11725 2
            if (!\is_array($to)) {
11726 2
                $to = self::str_split($to);
11727
            }
11728
11729 2
            $count_from = \count($from);
11730 2
            $count_to = \count($to);
11731
11732 2
            if ($count_from !== $count_to) {
11733 2
                if ($count_from > $count_to) {
11734 2
                    $from = \array_slice($from, 0, $count_to);
11735 2
                } elseif ($count_from < $count_to) {
11736 2
                    $to = \array_slice($to, 0, $count_from);
11737
                }
11738
            }
11739
11740 2
            $from = \array_combine($from, $to);
11741
            /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
11742 2
            if ($from === false) {
11743
                throw new \InvalidArgumentException('The number of elements for each array isn\'t equal or the arrays are empty: (from: ' . \print_r($from, true) . ' | to: ' . \print_r($to, true) . ')');
11744
            }
11745
        }
11746
11747 2
        if (\is_string($from)) {
11748 2
            return \str_replace($from, $to, $str);
0 ignored issues
show
Bug introduced by
It seems like $to can also be of type array<mixed,array> and array<mixed,string[]>; however, parameter $replace of str_replace() does only seem to accept string|string[], maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

11748
            return \str_replace($from, /** @scrutinizer ignore-type */ $to, $str);
Loading history...
11749
        }
11750
11751 2
        return \strtr($str, $from);
11752
    }
11753
11754
    /**
11755
     * Return the width of a string.
11756
     *
11757
     * INFO: use UTF8::strlen() for the byte-length
11758
     *
11759
     * EXAMPLE: <code>UTF8::strwidth("Iñtërnâtiôn\xE9àlizætiøn")); // 21</code>
11760
     *
11761
     * @param string $str        <p>The input string.</p>
11762
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
11763
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
11764
     *
11765
     * @psalm-pure
11766
     *
11767
     * @return int
11768
     */
11769 2
    public static function strwidth(
11770
        string $str,
11771
        string $encoding = 'UTF-8',
11772
        bool $clean_utf8 = false
11773
    ): int {
11774 2
        if ($str === '') {
11775 2
            return 0;
11776
        }
11777
11778 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
11779 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11780
        }
11781
11782 2
        if ($clean_utf8) {
11783
            // iconv and mbstring are not tolerant to invalid encoding
11784
            // further, their behaviour is inconsistent with that of PHP's substr
11785 2
            $str = self::clean($str);
11786
        }
11787
11788
        //
11789
        // fallback via mbstring
11790
        //
11791
11792 2
        if (self::$SUPPORT['mbstring'] === true) {
11793 2
            if ($encoding === 'UTF-8') {
11794 2
                return \mb_strwidth($str);
11795
            }
11796
11797
            return \mb_strwidth($str, $encoding);
11798
        }
11799
11800
        //
11801
        // fallback via vanilla php
11802
        //
11803
11804
        if ($encoding !== 'UTF-8') {
11805
            $str = self::encode('UTF-8', $str, false, $encoding);
11806
        }
11807
11808
        $wide = 0;
11809
        $str = (string) \preg_replace('/[\x{1100}-\x{115F}\x{2329}\x{232A}\x{2E80}-\x{303E}\x{3040}-\x{A4CF}\x{AC00}-\x{D7A3}\x{F900}-\x{FAFF}\x{FE10}-\x{FE19}\x{FE30}-\x{FE6F}\x{FF00}-\x{FF60}\x{FFE0}-\x{FFE6}\x{20000}-\x{2FFFD}\x{30000}-\x{3FFFD}]/u', '', $str, -1, $wide);
11810
11811
        return ($wide << 1) + (int) self::strlen($str);
11812
    }
11813
11814
    /**
11815
     * Get part of a string.
11816
     *
11817
     * EXAMPLE: <code>UTF8::substr('中文空白', 1, 2); // '文空'</code>
11818
     *
11819
     * @see http://php.net/manual/en/function.mb-substr.php
11820
     *
11821
     * @param string   $str        <p>The string being checked.</p>
11822
     * @param int      $offset     <p>The first position used in str.</p>
11823
     * @param int|null $length     [optional] <p>The maximum length of the returned string.</p>
11824
     * @param string   $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
11825
     * @param bool     $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
11826
     *
11827
     * @psalm-pure
11828
     *
11829
     * @return false|string
11830
     *                      The portion of <i>str</i> specified by the <i>offset</i> and
11831
     *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
11832
     *                      characters long, <b>FALSE</b> will be returned.
11833
     */
11834 172
    public static function substr(
11835
        string $str,
11836
        int $offset = 0,
11837
        int $length = null,
11838
        string $encoding = 'UTF-8',
11839
        bool $clean_utf8 = false
11840
    ) {
11841
        // empty string
11842 172
        if ($str === '' || $length === 0) {
11843 8
            return '';
11844
        }
11845
11846 168
        if ($clean_utf8) {
11847
            // iconv and mbstring are not tolerant to invalid encoding
11848
            // further, their behaviour is inconsistent with that of PHP's substr
11849 2
            $str = self::clean($str);
11850
        }
11851
11852
        // whole string
11853 168
        if (!$offset && $length === null) {
11854 7
            return $str;
11855
        }
11856
11857 163
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
11858 19
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11859
        }
11860
11861
        //
11862
        // fallback via mbstring
11863
        //
11864
11865 163
        if (self::$SUPPORT['mbstring'] === true && $encoding === 'UTF-8') {
11866 161
            if ($length === null) {
11867 64
                return \mb_substr($str, $offset);
11868
            }
11869
11870 102
            return \mb_substr($str, $offset, $length);
11871
        }
11872
11873
        //
11874
        // fallback for binary || ascii only
11875
        //
11876
11877
        if (
11878 4
            $encoding === 'CP850'
11879
            ||
11880 4
            $encoding === 'ASCII'
11881
        ) {
11882
            if ($length === null) {
11883
                return \substr($str, $offset);
11884
            }
11885
11886
            return \substr($str, $offset, $length);
11887
        }
11888
11889
        // otherwise we need the string-length
11890 4
        $str_length = 0;
11891 4
        if ($offset || $length === null) {
11892 4
            $str_length = self::strlen($str, $encoding);
11893
        }
11894
11895
        // e.g.: invalid chars + mbstring not installed
11896 4
        if ($str_length === false) {
11897
            return false;
11898
        }
11899
11900
        // empty string
11901 4
        if ($offset === $str_length && !$length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type integer|null is loosely compared to false; this is ambiguous if the integer can be 0. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
11902
            return '';
11903
        }
11904
11905
        // impossible
11906 4
        if ($offset && $offset > $str_length) {
11907
            return '';
11908
        }
11909
11910 4
        $length = $length ?? (int)$str_length;
11911
11912
        if (
11913 4
            $encoding !== 'UTF-8'
11914
            &&
11915 4
            self::$SUPPORT['mbstring'] === false
11916
        ) {
11917
            /**
11918
             * @psalm-suppress ImpureFunctionCall - is is only a warning
11919
             */
11920 2
            \trigger_error('UTF8::substr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
11921
        }
11922
11923
        //
11924
        // fallback via intl
11925
        //
11926
11927
        if (
11928 4
            $encoding === 'UTF-8' // INFO: "grapheme_substr()" can't handle other encodings
11929
            &&
11930 4
            $offset >= 0 // grapheme_substr() can't handle negative offset
11931
            &&
11932 4
            self::$SUPPORT['intl'] === true
11933
        ) {
11934
            $return_tmp = \grapheme_substr($str, $offset, $length);
11935
            if ($return_tmp !== false) {
11936
                return $return_tmp;
11937
            }
11938
        }
11939
11940
        //
11941
        // fallback via iconv
11942
        //
11943
11944
        if (
11945 4
            $length >= 0 // "iconv_substr()" can't handle negative length
11946
            &&
11947 4
            self::$SUPPORT['iconv'] === true
11948
        ) {
11949
            $return_tmp = \iconv_substr($str, $offset, $length);
11950
            if ($return_tmp !== false) {
11951
                return $return_tmp;
11952
            }
11953
        }
11954
11955
        //
11956
        // fallback for ascii only
11957
        //
11958
11959 4
        if (ASCII::is_ascii($str)) {
11960
            return \substr($str, $offset, $length);
11961
        }
11962
11963
        //
11964
        // fallback via vanilla php
11965
        //
11966
11967
        // split to array, and remove invalid characters
11968 4
        $array = self::str_split($str);
11969
11970
        // extract relevant part, and join to make sting again
11971 4
        return \implode('', \array_slice($array, $offset, $length));
11972
    }
11973
11974
    /**
11975
     * Binary-safe comparison of two strings from an offset, up to a length of characters.
11976
     *
11977
     * EXAMPLE: <code>
11978
     * UTF8::substr_compare("○●◎\r", '●◎', 0, 2); // -1
11979
     * UTF8::substr_compare("○●◎\r", '◎●', 1, 2); // 1
11980
     * UTF8::substr_compare("○●◎\r", '●◎', 1, 2); // 0
11981
     * </code>
11982
     *
11983
     * @param string   $str1               <p>The main string being compared.</p>
11984
     * @param string   $str2               <p>The secondary string being compared.</p>
11985
     * @param int      $offset             [optional] <p>The start position for the comparison. If negative, it starts
11986
     *                                     counting from the end of the string.</p>
11987
     * @param int|null $length             [optional] <p>The length of the comparison. The default value is the largest
11988
     *                                     of the length of the str compared to the length of main_str less the
11989
     *                                     offset.</p>
11990
     * @param bool     $case_insensitivity [optional] <p>If case_insensitivity is TRUE, comparison is case
11991
     *                                     insensitive.</p>
11992
     * @param string   $encoding           [optional] <p>Set the charset for e.g. "mb_" function</p>
11993
     *
11994
     * @psalm-pure
11995
     *
11996
     * @return int
11997
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
11998
     *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
11999
     *             <strong>0</strong> if they are equal
12000
     */
12001 2
    public static function substr_compare(
12002
        string $str1,
12003
        string $str2,
12004
        int $offset = 0,
12005
        int $length = null,
12006
        bool $case_insensitivity = false,
12007
        string $encoding = 'UTF-8'
12008
    ): int {
12009
        if (
12010 2
            $offset !== 0
12011
            ||
12012 2
            $length !== null
12013
        ) {
12014 2
            if ($encoding === 'UTF-8') {
12015 2
                if ($length === null) {
12016 2
                    $str1 = (string) \mb_substr($str1, $offset);
12017
                } else {
12018 2
                    $str1 = (string) \mb_substr($str1, $offset, $length);
12019
                }
12020 2
                $str2 = (string) \mb_substr($str2, 0, (int) self::strlen($str1));
12021
            } else {
12022
                $encoding = self::normalize_encoding($encoding, 'UTF-8');
12023
12024
                $str1 = (string) self::substr($str1, $offset, $length, $encoding);
12025
                $str2 = (string) self::substr($str2, 0, (int) self::strlen($str1), $encoding);
12026
            }
12027
        }
12028
12029 2
        if ($case_insensitivity) {
12030 2
            return self::strcasecmp($str1, $str2, $encoding);
12031
        }
12032
12033 2
        return self::strcmp($str1, $str2);
12034
    }
12035
12036
    /**
12037
     * Count the number of substring occurrences.
12038
     *
12039
     * EXAMPLE: <code>UTF8::substr_count('中文空白', '文空', 1, 2); // 1</code>
12040
     *
12041
     * @see http://php.net/manual/en/function.substr-count.php
12042
     *
12043
     * @param string   $haystack   <p>The string to search in.</p>
12044
     * @param string   $needle     <p>The substring to search for.</p>
12045
     * @param int      $offset     [optional] <p>The offset where to start counting.</p>
12046
     * @param int|null $length     [optional] <p>
12047
     *                             The maximum length after the specified offset to search for the
12048
     *                             substring. It outputs a warning if the offset plus the length is
12049
     *                             greater than the haystack length.
12050
     *                             </p>
12051
     * @param string   $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
12052
     * @param bool     $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
12053
     *
12054
     * @psalm-pure
12055
     *
12056
     * @return false|int
12057
     *                   <p>This functions returns an integer or false if there isn't a string.</p>
12058
     */
12059 5
    public static function substr_count(
12060
        string $haystack,
12061
        string $needle,
12062
        int $offset = 0,
12063
        int $length = null,
12064
        string $encoding = 'UTF-8',
12065
        bool $clean_utf8 = false
12066
    ) {
12067 5
        if ($haystack === '' || $needle === '') {
12068 2
            return false;
12069
        }
12070
12071 5
        if ($length === 0) {
12072 2
            return 0;
12073
        }
12074
12075 5
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
12076 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
12077
        }
12078
12079 5
        if ($clean_utf8) {
12080
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
12081
            // if invalid characters are found in $haystack before $needle
12082
            $needle = self::clean($needle);
12083
            $haystack = self::clean($haystack);
12084
        }
12085
12086 5
        if ($offset || $length > 0) {
12087 2
            if ($length === null) {
12088 2
                $length_tmp = self::strlen($haystack, $encoding);
12089 2
                if ($length_tmp === false) {
12090
                    return false;
12091
                }
12092 2
                $length = (int) $length_tmp;
12093
            }
12094
12095 2
            if ($encoding === 'UTF-8') {
12096 2
                $haystack = (string) \mb_substr($haystack, $offset, $length);
12097
            } else {
12098 2
                $haystack = (string) \mb_substr($haystack, $offset, $length, $encoding);
12099
            }
12100
        }
12101
12102
        if (
12103 5
            $encoding !== 'UTF-8'
12104
            &&
12105 5
            self::$SUPPORT['mbstring'] === false
12106
        ) {
12107
            /**
12108
             * @psalm-suppress ImpureFunctionCall - is is only a warning
12109
             */
12110
            \trigger_error('UTF8::substr_count() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
12111
        }
12112
12113 5
        if (self::$SUPPORT['mbstring'] === true) {
12114 5
            if ($encoding === 'UTF-8') {
12115 5
                return \mb_substr_count($haystack, $needle);
12116
            }
12117
12118 2
            return \mb_substr_count($haystack, $needle, $encoding);
12119
        }
12120
12121
        \preg_match_all('/' . \preg_quote($needle, '/') . '/us', $haystack, $matches, \PREG_SET_ORDER);
12122
12123
        return \count($matches);
12124
    }
12125
12126
    /**
12127
     * Count the number of substring occurrences.
12128
     *
12129
     * @param string   $haystack <p>
12130
     *                           The string being checked.
12131
     *                           </p>
12132
     * @param string   $needle   <p>
12133
     *                           The string being found.
12134
     *                           </p>
12135
     * @param int      $offset   [optional] <p>
12136
     *                           The offset where to start counting
12137
     *                           </p>
12138
     * @param int|null $length   [optional] <p>
12139
     *                           The maximum length after the specified offset to search for the
12140
     *                           substring. It outputs a warning if the offset plus the length is
12141
     *                           greater than the haystack length.
12142
     *                           </p>
12143
     *
12144
     * @psalm-pure
12145
     *
12146
     * @return false|int
12147
     *                   <p>The number of times the
12148
     *                   needle substring occurs in the
12149
     *                   haystack string.</p>
12150
     */
12151 4
    public static function substr_count_in_byte(
12152
        string $haystack,
12153
        string $needle,
12154
        int $offset = 0,
12155
        int $length = null
12156
    ) {
12157 4
        if ($haystack === '' || $needle === '') {
12158 1
            return 0;
12159
        }
12160
12161
        if (
12162 3
            ($offset || $length !== null)
12163
            &&
12164 3
            self::$SUPPORT['mbstring_func_overload'] === true
12165
        ) {
12166
            if ($length === null) {
12167
                $length_tmp = self::strlen($haystack);
12168
                if ($length_tmp === false) {
12169
                    return false;
12170
                }
12171
                $length = (int) $length_tmp;
12172
            }
12173
12174
            if (
12175
                (
12176
                    $length !== 0
12177
                    &&
12178
                    $offset !== 0
12179
                )
12180
                &&
12181
                ($length + $offset) <= 0
12182
                &&
12183
                !Bootup::is_php('7.1') // output from "substr_count()" have changed in PHP 7.1
12184
            ) {
12185
                return false;
12186
            }
12187
12188
            /** @var false|string $haystack_tmp - needed for PhpStan (stubs error) */
12189
            $haystack_tmp = \substr($haystack, $offset, $length);
12190
            if ($haystack_tmp === false) {
12191
                $haystack_tmp = '';
12192
            }
12193
            $haystack = (string) $haystack_tmp;
12194
        }
12195
12196 3
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
12197
            // "mb_" is available if overload is used, so use it ...
12198
            return \mb_substr_count($haystack, $needle, 'CP850'); // 8-BIT
12199
        }
12200
12201 3
        if ($length === null) {
12202 3
            return \substr_count($haystack, $needle, $offset);
12203
        }
12204
12205
        return \substr_count($haystack, $needle, $offset, $length);
12206
    }
12207
12208
    /**
12209
     * Returns the number of occurrences of $substring in the given string.
12210
     * By default, the comparison is case-sensitive, but can be made insensitive
12211
     * by setting $case_sensitive to false.
12212
     *
12213
     * @param string $str            <p>The input string.</p>
12214
     * @param string $substring      <p>The substring to search for.</p>
12215
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
12216
     * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
12217
     *
12218
     * @psalm-pure
12219
     *
12220
     * @return int
12221
     */
12222 15
    public static function substr_count_simple(
12223
        string $str,
12224
        string $substring,
12225
        bool $case_sensitive = true,
12226
        string $encoding = 'UTF-8'
12227
    ): int {
12228 15
        if ($str === '' || $substring === '') {
12229 2
            return 0;
12230
        }
12231
12232 13
        if ($encoding === 'UTF-8') {
12233 7
            if ($case_sensitive) {
12234
                return (int) \mb_substr_count($str, $substring);
12235
            }
12236
12237 7
            return (int) \mb_substr_count(
12238 7
                \mb_strtoupper($str),
12239 7
                \mb_strtoupper($substring)
12240
            );
12241
        }
12242
12243 6
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
12244
12245 6
        if ($case_sensitive) {
12246 3
            return (int) \mb_substr_count($str, $substring, $encoding);
12247
        }
12248
12249 3
        return (int) \mb_substr_count(
12250 3
            self::strtocasefold($str, true, false, $encoding, null, false),
12251 3
            self::strtocasefold($substring, true, false, $encoding, null, false),
12252 3
            $encoding
12253
        );
12254
    }
12255
12256
    /**
12257
     * Removes a prefix ($needle) from the beginning of the string ($haystack), case-insensitive.
12258
     *
12259
     * EXMAPLE: <code>
12260
     * UTF8::substr_ileft('ΚόσμεMiddleEnd', 'Κόσμε'); // 'MiddleEnd'
12261
     * UTF8::substr_ileft('ΚόσμεMiddleEnd', 'κόσμε'); // 'MiddleEnd'
12262
     * </code>
12263
     *
12264
     * @param string $haystack <p>The string to search in.</p>
12265
     * @param string $needle   <p>The substring to search for.</p>
12266
     *
12267
     * @psalm-pure
12268
     *
12269
     * @return string
12270
     *                <p>Return the sub-string.</p>
12271
     */
12272 2
    public static function substr_ileft(string $haystack, string $needle): string
12273
    {
12274 2
        if ($haystack === '') {
12275 2
            return '';
12276
        }
12277
12278 2
        if ($needle === '') {
12279 2
            return $haystack;
12280
        }
12281
12282 2
        if (self::str_istarts_with($haystack, $needle)) {
12283 2
            $haystack = (string) \mb_substr($haystack, (int) self::strlen($needle));
12284
        }
12285
12286 2
        return $haystack;
12287
    }
12288
12289
    /**
12290
     * Get part of a string process in bytes.
12291
     *
12292
     * @param string   $str    <p>The string being checked.</p>
12293
     * @param int      $offset <p>The first position used in str.</p>
12294
     * @param int|null $length [optional] <p>The maximum length of the returned string.</p>
12295
     *
12296
     * @psalm-pure
12297
     *
12298
     * @return false|string
12299
     *                      The portion of <i>str</i> specified by the <i>offset</i> and
12300
     *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
12301
     *                      characters long, <b>FALSE</b> will be returned.
12302
     */
12303 1
    public static function substr_in_byte(string $str, int $offset = 0, int $length = null)
12304
    {
12305
        // empty string
12306 1
        if ($str === '' || $length === 0) {
12307
            return '';
12308
        }
12309
12310
        // whole string
12311 1
        if (!$offset && $length === null) {
12312
            return $str;
12313
        }
12314
12315 1
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
12316
            // "mb_" is available if overload is used, so use it ...
12317
            return \mb_substr($str, $offset, $length, 'CP850'); // 8-BIT
12318
        }
12319
12320 1
        return \substr($str, $offset, $length ?? 2147483647);
12321
    }
12322
12323
    /**
12324
     * Removes a suffix ($needle) from the end of the string ($haystack), case-insensitive.
12325
     *
12326
     * EXAMPLE: <code>
12327
     * UTF8::substr_iright('BeginMiddleΚόσμε', 'Κόσμε'); // 'BeginMiddle'
12328
     * UTF8::substr_iright('BeginMiddleΚόσμε', 'κόσμε'); // 'BeginMiddle'
12329
     * </code>
12330
     *
12331
     * @param string $haystack <p>The string to search in.</p>
12332
     * @param string $needle   <p>The substring to search for.</p>
12333
     *
12334
     * @psalm-pure
12335
     *
12336
     * @return string
12337
     *                <p>Return the sub-string.<p>
12338
     */
12339 2
    public static function substr_iright(string $haystack, string $needle): string
12340
    {
12341 2
        if ($haystack === '') {
12342 2
            return '';
12343
        }
12344
12345 2
        if ($needle === '') {
12346 2
            return $haystack;
12347
        }
12348
12349 2
        if (self::str_iends_with($haystack, $needle)) {
12350 2
            $haystack = (string) \mb_substr($haystack, 0, (int) self::strlen($haystack) - (int) self::strlen($needle));
12351
        }
12352
12353 2
        return $haystack;
12354
    }
12355
12356
    /**
12357
     * Removes a prefix ($needle) from the beginning of the string ($haystack).
12358
     *
12359
     * EXAMPLE: <code>
12360
     * UTF8::substr_left('ΚόσμεMiddleEnd', 'Κόσμε'); // 'MiddleEnd'
12361
     * UTF8::substr_left('ΚόσμεMiddleEnd', 'κόσμε'); // 'ΚόσμεMiddleEnd'
12362
     * </code>
12363
     *
12364
     * @param string $haystack <p>The string to search in.</p>
12365
     * @param string $needle   <p>The substring to search for.</p>
12366
     *
12367
     * @psalm-pure
12368
     *
12369
     * @return string
12370
     *                <p>Return the sub-string.</p>
12371
     */
12372 2
    public static function substr_left(string $haystack, string $needle): string
12373
    {
12374 2
        if ($haystack === '') {
12375 2
            return '';
12376
        }
12377
12378 2
        if ($needle === '') {
12379 2
            return $haystack;
12380
        }
12381
12382 2
        if (self::str_starts_with($haystack, $needle)) {
12383 2
            $haystack = (string) \mb_substr($haystack, (int) self::strlen($needle));
12384
        }
12385
12386 2
        return $haystack;
12387
    }
12388
12389
    /**
12390
     * Replace text within a portion of a string.
12391
     *
12392
     * EXAMPLE: <code>UTF8::substr_replace(array('Iñtërnâtiônàlizætiøn', 'foo'), 'æ', 1); // array('Iæñtërnâtiônàlizætiøn', 'fæoo')</code>
12393
     *
12394
     * source: https://gist.github.com/stemar/8287074
12395
     *
12396
     * @param string|string[] $str         <p>The input string or an array of stings.</p>
12397
     * @param string|string[] $replacement <p>The replacement string or an array of stings.</p>
12398
     * @param int|int[]       $offset      <p>
12399
     *                                     If start is positive, the replacing will begin at the start'th offset
12400
     *                                     into string.
12401
     *                                     <br><br>
12402
     *                                     If start is negative, the replacing will begin at the start'th character
12403
     *                                     from the end of string.
12404
     *                                     </p>
12405
     * @param int|int[]|null  $length      [optional] <p>If given and is positive, it represents the length of the
12406
     *                                     portion of string which is to be replaced. If it is negative, it
12407
     *                                     represents the number of characters from the end of string at which to
12408
     *                                     stop replacing. If it is not given, then it will default to strlen(
12409
     *                                     string ); i.e. end the replacing at the end of string. Of course, if
12410
     *                                     length is zero then this function will have the effect of inserting
12411
     *                                     replacement into string at the given start offset.</p>
12412
     * @param string          $encoding    [optional] <p>Set the charset for e.g. "mb_" function</p>
12413
     *
12414
     * @psalm-pure
12415
     *
12416
     * @return string|string[]
12417
     *                         <p>The result string is returned. If string is an array then array is returned.</p>
12418
     */
12419 10
    public static function substr_replace(
12420
        $str,
12421
        $replacement,
12422
        $offset,
12423
        $length = null,
12424
        string $encoding = 'UTF-8'
12425
    ) {
12426 10
        if (\is_array($str)) {
12427 1
            $num = \count($str);
12428
12429
            // the replacement
12430 1
            if (\is_array($replacement)) {
12431 1
                $replacement = \array_slice($replacement, 0, $num);
12432
            } else {
12433 1
                $replacement = \array_pad([$replacement], $num, $replacement);
12434
            }
12435
12436
            // the offset
12437 1
            if (\is_array($offset)) {
12438 1
                $offset = \array_slice($offset, 0, $num);
12439 1
                foreach ($offset as &$value_tmp) {
12440 1
                    $value_tmp = (int) $value_tmp === $value_tmp ? $value_tmp : 0;
12441
                }
12442 1
                unset($value_tmp);
12443
            } else {
12444 1
                $offset = \array_pad([$offset], $num, $offset);
12445
            }
12446
12447
            // the length
12448 1
            if ($length === null) {
12449 1
                $length = \array_fill(0, $num, 0);
12450 1
            } elseif (\is_array($length)) {
12451 1
                $length = \array_slice($length, 0, $num);
12452 1
                foreach ($length as &$value_tmp_V2) {
12453 1
                    $value_tmp_V2 = (int) $value_tmp_V2 === $value_tmp_V2 ? $value_tmp_V2 : $num;
12454
                }
12455 1
                unset($value_tmp_V2);
12456
            } else {
12457 1
                $length = \array_pad([$length], $num, $length);
12458
            }
12459
12460
            // recursive call
12461 1
            return \array_map([self::class, 'substr_replace'], $str, $replacement, $offset, $length);
12462
        }
12463
12464 10
        if (\is_array($replacement)) {
12465 1
            if ($replacement !== []) {
12466 1
                $replacement = $replacement[0];
12467
            } else {
12468 1
                $replacement = '';
12469
            }
12470
        }
12471
12472
        // init
12473 10
        $str = (string) $str;
12474 10
        $replacement = (string) $replacement;
12475
12476 10
        if (\is_array($length)) {
12477
            throw new \InvalidArgumentException('Parameter "$length" can only be an array, if "$str" is also an array.');
12478
        }
12479
12480 10
        if (\is_array($offset)) {
12481
            throw new \InvalidArgumentException('Parameter "$offset" can only be an array, if "$str" is also an array.');
12482
        }
12483
12484 10
        if ($str === '') {
12485 1
            return $replacement;
12486
        }
12487
12488 9
        if (self::$SUPPORT['mbstring'] === true) {
12489 9
            $string_length = (int) self::strlen($str, $encoding);
12490
12491 9
            if ($offset < 0) {
12492 1
                $offset = (int) \max(0, $string_length + $offset);
12493 9
            } elseif ($offset > $string_length) {
12494 1
                $offset = $string_length;
12495
            }
12496
12497 9
            if ($length !== null && $length < 0) {
12498 1
                $length = (int) \max(0, $string_length - $offset + $length);
12499 9
            } elseif ($length === null || $length > $string_length) {
12500 4
                $length = $string_length;
12501
            }
12502
12503
            /** @noinspection AdditionOperationOnArraysInspection */
12504 9
            if (($offset + $length) > $string_length) {
12505 4
                $length = $string_length - $offset;
12506
            }
12507
12508
            /** @noinspection AdditionOperationOnArraysInspection */
12509 9
            return ((string) \mb_substr($str, 0, $offset, $encoding)) .
12510 9
                   $replacement .
12511 9
                   ((string) \mb_substr($str, $offset + $length, $string_length - $offset - $length, $encoding));
12512
        }
12513
12514
        //
12515
        // fallback for ascii only
12516
        //
12517
12518
        if (ASCII::is_ascii($str)) {
12519
            return ($length === null) ?
12520
                \substr_replace($str, $replacement, $offset) :
12521
                \substr_replace($str, $replacement, $offset, $length);
12522
        }
12523
12524
        //
12525
        // fallback via vanilla php
12526
        //
12527
12528
        \preg_match_all('/./us', $str, $str_matches);
12529
        \preg_match_all('/./us', $replacement, $replacement_matches);
12530
12531
        if ($length === null) {
12532
            $length_tmp = self::strlen($str, $encoding);
12533
            if ($length_tmp === false) {
12534
                // e.g.: non mbstring support + invalid chars
12535
                return '';
12536
            }
12537
            $length = (int) $length_tmp;
12538
        }
12539
12540
        \array_splice($str_matches[0], $offset, $length, $replacement_matches[0]);
12541
12542
        return \implode('', $str_matches[0]);
12543
    }
12544
12545
    /**
12546
     * Removes a suffix ($needle) from the end of the string ($haystack).
12547
     *
12548
     * EXAMPLE: <code>
12549
     * UTF8::substr_right('BeginMiddleΚόσμε', 'Κόσμε'); // 'BeginMiddle'
12550
     * UTF8::substr_right('BeginMiddleΚόσμε', 'κόσμε'); // 'BeginMiddleΚόσμε'
12551
     * </code>
12552
     *
12553
     * @param string $haystack <p>The string to search in.</p>
12554
     * @param string $needle   <p>The substring to search for.</p>
12555
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
12556
     *
12557
     * @psalm-pure
12558
     *
12559
     * @return string
12560
     *                <p>Return the sub-string.</p>
12561
     */
12562 2
    public static function substr_right(
12563
        string $haystack,
12564
        string $needle,
12565
        string $encoding = 'UTF-8'
12566
    ): string {
12567 2
        if ($haystack === '') {
12568 2
            return '';
12569
        }
12570
12571 2
        if ($needle === '') {
12572 2
            return $haystack;
12573
        }
12574
12575
        if (
12576 2
            $encoding === 'UTF-8'
12577
            &&
12578 2
            \substr($haystack, -\strlen($needle)) === $needle
12579
        ) {
12580 2
            return (string) \mb_substr($haystack, 0, (int) \mb_strlen($haystack) - (int) \mb_strlen($needle));
12581
        }
12582
12583 2
        if (\substr($haystack, -\strlen($needle)) === $needle) {
12584
            return (string) self::substr(
12585
                $haystack,
12586
                0,
12587
                (int) self::strlen($haystack, $encoding) - (int) self::strlen($needle, $encoding),
12588
                $encoding
12589
            );
12590
        }
12591
12592 2
        return $haystack;
12593
    }
12594
12595
    /**
12596
     * Returns a case swapped version of the string.
12597
     *
12598
     * EXAMPLE: <code>UTF8::swapCase('déJÀ σσς iıII'); // 'DÉjà ΣΣΣ IIii'</code>
12599
     *
12600
     * @param string $str        <p>The input string.</p>
12601
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
12602
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
12603
     *
12604
     * @psalm-pure
12605
     *
12606
     * @return string
12607
     *                <p>Each character's case swapped.</p>
12608
     */
12609 6
    public static function swapCase(string $str, string $encoding = 'UTF-8', bool $clean_utf8 = false): string
12610
    {
12611 6
        if ($str === '') {
12612 1
            return '';
12613
        }
12614
12615 6
        if ($clean_utf8) {
12616
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
12617
            // if invalid characters are found in $haystack before $needle
12618 2
            $str = self::clean($str);
12619
        }
12620
12621 6
        if ($encoding === 'UTF-8') {
12622 4
            return (string) (\mb_strtolower($str) ^ \mb_strtoupper($str) ^ $str);
12623
        }
12624
12625 4
        return (string) (self::strtolower($str, $encoding) ^ self::strtoupper($str, $encoding) ^ $str);
12626
    }
12627
12628
    /**
12629
     * Checks whether symfony-polyfills are used.
12630
     *
12631
     * @psalm-pure
12632
     *
12633
     * @return bool
12634
     *              <strong>true</strong> if in use, <strong>false</strong> otherwise
12635
     */
12636
    public static function symfony_polyfill_used(): bool
12637
    {
12638
        // init
12639
        $return = false;
12640
12641
        $return_tmp = \extension_loaded('mbstring');
12642
        if (!$return_tmp && \function_exists('mb_strlen')) {
12643
            $return = true;
12644
        }
12645
12646
        $return_tmp = \extension_loaded('iconv');
12647
        if (!$return_tmp && \function_exists('iconv')) {
12648
            $return = true;
12649
        }
12650
12651
        return $return;
12652
    }
12653
12654
    /**
12655
     * @param string $str
12656
     * @param int    $tab_length
12657
     *
12658
     * @psalm-pure
12659
     *
12660
     * @return string
12661
     */
12662 6
    public static function tabs_to_spaces(string $str, int $tab_length = 4): string
12663
    {
12664 6
        if ($tab_length === 4) {
12665 3
            $spaces = '    ';
12666 3
        } elseif ($tab_length === 2) {
12667 1
            $spaces = '  ';
12668
        } else {
12669 2
            $spaces = \str_repeat(' ', $tab_length);
12670
        }
12671
12672 6
        return \str_replace("\t", $spaces, $str);
12673
    }
12674
12675
    /**
12676
     * Converts the first character of each word in the string to uppercase
12677
     * and all other chars to lowercase.
12678
     *
12679
     * @param string      $str                           <p>The input string.</p>
12680
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
12681
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
12682
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
12683
     *                                                   tr</p>
12684
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
12685
     *                                                   -> ß</p>
12686
     *
12687
     * @psalm-pure
12688
     *
12689
     * @return string
12690
     *                <p>A string with all characters of $str being title-cased.</p>
12691
     */
12692 5
    public static function titlecase(
12693
        string $str,
12694
        string $encoding = 'UTF-8',
12695
        bool $clean_utf8 = false,
12696
        string $lang = null,
12697
        bool $try_to_keep_the_string_length = false
12698
    ): string {
12699 5
        if ($clean_utf8) {
12700
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
12701
            // if invalid characters are found in $haystack before $needle
12702
            $str = self::clean($str);
12703
        }
12704
12705
        if (
12706 5
            $lang === null
12707
            &&
12708 5
            !$try_to_keep_the_string_length
12709
        ) {
12710 5
            if ($encoding === 'UTF-8') {
12711 3
                return \mb_convert_case($str, \MB_CASE_TITLE);
12712
            }
12713
12714 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
12715
12716 2
            return \mb_convert_case($str, \MB_CASE_TITLE, $encoding);
12717
        }
12718
12719
        return self::str_titleize(
12720
            $str,
12721
            null,
12722
            $encoding,
12723
            false,
12724
            $lang,
12725
            $try_to_keep_the_string_length,
12726
            false
12727
        );
12728
    }
12729
12730
    /**
12731
     * alias for "UTF8::to_ascii()"
12732
     *
12733
     * @param string $str
12734
     * @param string $subst_chr
12735
     * @param bool   $strict
12736
     *
12737
     * @psalm-pure
12738
     *
12739
     * @return string
12740
     *
12741
     * @see        UTF8::to_ascii()
12742
     * @deprecated <p>please use "UTF8::to_ascii()"</p>
12743
     */
12744 7
    public static function toAscii(
12745
        string $str,
12746
        string $subst_chr = '?',
12747
        bool $strict = false
12748
    ): string {
12749 7
        return self::to_ascii($str, $subst_chr, $strict);
12750
    }
12751
12752
    /**
12753
     * alias for "UTF8::to_iso8859()"
12754
     *
12755
     * @param string|string[] $str
12756
     *
12757
     * @psalm-pure
12758
     *
12759
     * @return string|string[]
12760
     *
12761
     * @see        UTF8::to_iso8859()
12762
     * @deprecated <p>please use "UTF8::to_iso8859()"</p>
12763
     */
12764 2
    public static function toIso8859($str)
12765
    {
12766 2
        return self::to_iso8859($str);
12767
    }
12768
12769
    /**
12770
     * alias for "UTF8::to_latin1()"
12771
     *
12772
     * @param string|string[] $str
12773
     *
12774
     * @psalm-pure
12775
     *
12776
     * @return string|string[]
12777
     *
12778
     * @see        UTF8::to_iso8859()
12779
     * @deprecated <p>please use "UTF8::to_iso8859()"</p>
12780
     */
12781 2
    public static function toLatin1($str)
12782
    {
12783 2
        return self::to_iso8859($str);
12784
    }
12785
12786
    /**
12787
     * alias for "UTF8::to_utf8()"
12788
     *
12789
     * @param string|string[] $str
12790
     *
12791
     * @psalm-pure
12792
     *
12793
     * @return string|string[]
12794
     *
12795
     * @see        UTF8::to_utf8()
12796
     * @deprecated <p>please use "UTF8::to_utf8()"</p>
12797
     */
12798 2
    public static function toUTF8($str)
12799
    {
12800 2
        return self::to_utf8($str);
12801
    }
12802
12803
    /**
12804
     * Convert a string into ASCII.
12805
     *
12806
     * EXAMPLE: <code>UTF8::to_ascii('déjà σσς iıii'); // 'deja sss iiii'</code>
12807
     *
12808
     * @param string $str     <p>The input string.</p>
12809
     * @param string $unknown [optional] <p>Character use if character unknown. (default is ?)</p>
12810
     * @param bool   $strict  [optional] <p>Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad
12811
     *                        performance</p>
12812
     *
12813
     * @psalm-pure
12814
     *
12815
     * @return string
12816
     */
12817 37
    public static function to_ascii(
12818
        string $str,
12819
        string $unknown = '?',
12820
        bool $strict = false
12821
    ): string {
12822 37
        return ASCII::to_transliterate($str, $unknown, $strict);
12823
    }
12824
12825
    /**
12826
     * @param bool|int|string $str
12827
     *
12828
     * @psalm-param bool|int|numeric-string $str
12829
     *
12830
     * @psalm-pure
12831
     *
12832
     * @return bool
12833
     */
12834 19
    public static function to_boolean($str): bool
12835
    {
12836
        // init
12837 19
        $str = (string) $str;
12838
12839 19
        if ($str === '') {
12840 2
            return false;
12841
        }
12842
12843
        // Info: http://php.net/manual/en/filter.filters.validate.php
12844
        $map = [
12845 17
            'true'  => true,
12846
            '1'     => true,
12847
            'on'    => true,
12848
            'yes'   => true,
12849
            'false' => false,
12850
            '0'     => false,
12851
            'off'   => false,
12852
            'no'    => false,
12853
        ];
12854
12855 17
        if (isset($map[$str])) {
12856 11
            return $map[$str];
12857
        }
12858
12859 6
        $key = \strtolower($str);
12860 6
        if (isset($map[$key])) {
12861 2
            return $map[$key];
12862
        }
12863
12864 4
        if (\is_numeric($str)) {
12865 2
            return ((float) $str + 0) > 0;
12866
        }
12867
12868 2
        return (bool) \trim($str);
12869
    }
12870
12871
    /**
12872
     * Convert given string to safe filename (and keep string case).
12873
     *
12874
     * @param string $str
12875
     * @param bool   $use_transliterate No transliteration, conversion etc. is done by default - unsafe characters are
12876
     *                                  simply replaced with hyphen.
12877
     * @param string $fallback_char
12878
     *
12879
     * @psalm-pure
12880
     *
12881
     * @return string
12882
     */
12883 1
    public static function to_filename(
12884
        string $str,
12885
        bool $use_transliterate = false,
12886
        string $fallback_char = '-'
12887
    ): string {
12888 1
        return ASCII::to_filename(
12889 1
            $str,
12890 1
            $use_transliterate,
12891 1
            $fallback_char
12892
        );
12893
    }
12894
12895
    /**
12896
     * Convert a string into "ISO-8859"-encoding (Latin-1).
12897
     *
12898
     * EXAMPLE: <code>UTF8::to_utf8(UTF8::to_iso8859('  -ABC-中文空白-  ')); // '  -ABC-????-  '</code>
12899
     *
12900
     * @param string|string[] $str
12901
     *
12902
     * @psalm-pure
12903
     *
12904
     * @return string|string[]
12905
     */
12906 8
    public static function to_iso8859($str)
12907
    {
12908 8
        if (\is_array($str)) {
12909 2
            foreach ($str as $k => &$v) {
12910 2
                $v = self::to_iso8859($v);
12911
            }
12912
12913 2
            return $str;
12914
        }
12915
12916 8
        $str = (string) $str;
12917 8
        if ($str === '') {
12918 2
            return '';
12919
        }
12920
12921 8
        return self::utf8_decode($str);
12922
    }
12923
12924
    /**
12925
     * alias for "UTF8::to_iso8859()"
12926
     *
12927
     * @param string|string[] $str
12928
     *
12929
     * @psalm-pure
12930
     *
12931
     * @return string|string[]
12932
     *
12933
     * @see        UTF8::to_iso8859()
12934
     * @deprecated <p>please use "UTF8::to_iso8859()"</p>
12935
     */
12936 2
    public static function to_latin1($str)
12937
    {
12938 2
        return self::to_iso8859($str);
12939
    }
12940
12941
    /**
12942
     * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
12943
     *
12944
     * <ul>
12945
     * <li>It decode UTF-8 codepoints and Unicode escape sequences.</li>
12946
     * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li>
12947
     * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
12948
     * case.</li>
12949
     * </ul>
12950
     *
12951
     * EXAMPLE: <code>UTF8::to_utf8(["\u0063\u0061\u0074"]); // array('cat')</code>
12952
     *
12953
     * @param string|string[] $str                        <p>Any string or array of strings.</p>
12954
     * @param bool            $decode_html_entity_to_utf8 <p>Set to true, if you need to decode html-entities.</p>
12955
     *
12956
     * @psalm-pure
12957
     *
12958
     * @return string|string[]
12959
     *                         <p>The UTF-8 encoded string</p>
12960
     *
12961
     * @template TToUtf8
12962
     * @psalm-param TToUtf8 $str
12963
     * @psalm-return TToUtf8
12964
     *
12965
     * @noinspection SuspiciousBinaryOperationInspection
12966
     */
12967 44
    public static function to_utf8($str, bool $decode_html_entity_to_utf8 = false)
12968
    {
12969 44
        if (\is_array($str)) {
12970 4
            foreach ($str as $k => &$v) {
12971 4
                $v = self::to_utf8_string($v, $decode_html_entity_to_utf8);
12972
            }
12973
12974 4
            return $str;
12975
        }
12976
12977
        /** @psalm-var TToUtf8 $str */
12978 44
        $str = self::to_utf8_string($str, $decode_html_entity_to_utf8);
12979
12980 44
        return $str;
12981
    }
12982
12983
    /**
12984
     * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
12985
     *
12986
     * <ul>
12987
     * <li>It decode UTF-8 codepoints and Unicode escape sequences.</li>
12988
     * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li>
12989
     * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
12990
     * case.</li>
12991
     * </ul>
12992
     *
12993
     * EXAMPLE: <code>UTF8::to_utf8_string("\u0063\u0061\u0074"); // 'cat'</code>
12994
     *
12995
     * @param string $str                        <p>Any string.</p>
12996
     * @param bool   $decode_html_entity_to_utf8 <p>Set to true, if you need to decode html-entities.</p>
12997
     *
12998
     * @psalm-pure
12999
     *
13000
     * @return string
13001
     *                <p>The UTF-8 encoded string</p>
13002
     *
13003
     * @noinspection SuspiciousBinaryOperationInspection
13004
     */
13005 44
    public static function to_utf8_string(string $str, bool $decode_html_entity_to_utf8 = false): string
13006
    {
13007 44
        if ($str === '') {
13008 7
            return $str;
13009
        }
13010
13011 44
        $max = \strlen($str);
13012 44
        $buf = '';
13013
13014 44
        for ($i = 0; $i < $max; ++$i) {
13015 44
            $c1 = $str[$i];
13016
13017 44
            if ($c1 >= "\xC0") { // should be converted to UTF8, if it's not UTF8 already
13018
13019 40
                if ($c1 <= "\xDF") { // looks like 2 bytes UTF8
13020
13021 36
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
13022
13023 36
                    if ($c2 >= "\x80" && $c2 <= "\xBF") { // yeah, almost sure it's UTF8 already
13024 22
                        $buf .= $c1 . $c2;
13025 22
                        ++$i;
13026
                    } else { // not valid UTF8 - convert it
13027 36
                        $buf .= self::to_utf8_convert_helper($c1);
13028
                    }
13029 37
                } elseif ($c1 >= "\xE0" && $c1 <= "\xEF") { // looks like 3 bytes UTF8
13030
13031 36
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
13032 36
                    $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
13033
13034 36
                    if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF") { // yeah, almost sure it's UTF8 already
13035 17
                        $buf .= $c1 . $c2 . $c3;
13036 17
                        $i += 2;
13037
                    } else { // not valid UTF8 - convert it
13038 36
                        $buf .= self::to_utf8_convert_helper($c1);
13039
                    }
13040 28
                } elseif ($c1 >= "\xF0" && $c1 <= "\xF7") { // looks like 4 bytes UTF8
13041
13042 28
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
13043 28
                    $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
13044 28
                    $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
13045
13046 28
                    if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF" && $c4 >= "\x80" && $c4 <= "\xBF") { // yeah, almost sure it's UTF8 already
13047 10
                        $buf .= $c1 . $c2 . $c3 . $c4;
13048 10
                        $i += 3;
13049
                    } else { // not valid UTF8 - convert it
13050 28
                        $buf .= self::to_utf8_convert_helper($c1);
13051
                    }
13052
                } else { // doesn't look like UTF8, but should be converted
13053
13054 40
                    $buf .= self::to_utf8_convert_helper($c1);
13055
                }
13056 41
            } elseif (($c1 & "\xC0") === "\x80") { // needs conversion
13057
13058 4
                $buf .= self::to_utf8_convert_helper($c1);
13059
            } else { // it doesn't need conversion
13060
13061 41
                $buf .= $c1;
13062
            }
13063
        }
13064
13065
        // decode unicode escape sequences + unicode surrogate pairs
13066 44
        $buf = \preg_replace_callback(
13067 44
            '/\\\\u([dD][89abAB][0-9a-fA-F]{2})\\\\u([dD][cdefCDEF][\da-fA-F]{2})|\\\\u([0-9a-fA-F]{4})/',
13068
            /**
13069
             * @param array $matches
13070
             *
13071
             * @psalm-pure
13072
             *
13073
             * @return string
13074
             */
13075
            static function (array $matches): string {
13076 13
                if (isset($matches[3])) {
13077 13
                    $cp = (int) \hexdec($matches[3]);
13078
                } else {
13079
                    // http://unicode.org/faq/utf_bom.html#utf16-4
13080 1
                    $cp = ((int) \hexdec($matches[1]) << 10)
13081 1
                          + (int) \hexdec($matches[2])
13082 1
                          + 0x10000
13083 1
                          - (0xD800 << 10)
13084 1
                          - 0xDC00;
13085
                }
13086
13087
                // https://github.com/php/php-src/blob/php-7.3.2/ext/standard/html.c#L471
13088
                //
13089
                // php_utf32_utf8(unsigned char *buf, unsigned k)
13090
13091 13
                if ($cp < 0x80) {
13092 8
                    return (string) self::chr($cp);
13093
                }
13094
13095 10
                if ($cp < 0xA0) {
13096
                    /** @noinspection UnnecessaryCastingInspection */
13097
                    return (string) self::chr(0xC0 | $cp >> 6) . (string) self::chr(0x80 | $cp & 0x3F);
13098
                }
13099
13100 10
                return self::decimal_to_chr($cp);
13101 44
            },
13102 44
            $buf
13103
        );
13104
13105 44
        if ($buf === null) {
13106
            return '';
13107
        }
13108
13109
        // decode UTF-8 codepoints
13110 44
        if ($decode_html_entity_to_utf8) {
13111 3
            $buf = self::html_entity_decode($buf);
13112
        }
13113
13114 44
        return $buf;
13115
    }
13116
13117
    /**
13118
     * Returns the given string as an integer, or null if the string isn't numeric.
13119
     *
13120
     * @param string $str
13121
     *
13122
     * @psalm-pure
13123
     *
13124
     * @return int|null
13125
     *                  <p>null if the string isn't numeric</p>
13126
     */
13127 1
    public static function to_int(string $str)
13128
    {
13129 1
        if (\is_numeric($str)) {
13130 1
            return (int) $str;
13131
        }
13132
13133 1
        return null;
13134
    }
13135
13136
    /**
13137
     * Returns the given input as string, or null if the input isn't int|float|string
13138
     * and do not implement the "__toString()" method.
13139
     *
13140
     * @param float|int|object|string|null $input
13141
     *
13142
     * @psalm-pure
13143
     *
13144
     * @return string|null
13145
     *                     <p>null if the input isn't int|float|string and has no "__toString()" method</p>
13146
     */
13147 1
    public static function to_string($input)
13148
    {
13149 1
        if ($input === null) {
13150
            return null;
13151
        }
13152
13153
        /** @var string $input_type - hack for psalm */
13154 1
        $input_type = \gettype($input);
13155
13156
        if (
13157 1
            $input_type === 'string'
13158
            ||
13159 1
            $input_type === 'integer'
13160
            ||
13161 1
            $input_type === 'float'
13162
            ||
13163 1
            $input_type === 'double'
13164
        ) {
13165 1
            return (string) $input;
13166
        }
13167
13168 1
        if ($input_type === 'object') {
13169
            /** @noinspection PhpSillyAssignmentInspection */
13170
            /** @var object $input - hack for psalm / phpstan */
13171 1
            $input = $input;
13172
            /** @noinspection NestedPositiveIfStatementsInspection */
13173
            /** @noinspection MissingOrEmptyGroupStatementInspection */
13174 1
            if (\method_exists($input, '__toString')) {
13175 1
                return (string) $input;
13176
            }
13177
        }
13178
13179 1
        return null;
13180
    }
13181
13182
    /**
13183
     * Strip whitespace or other characters from the beginning and end of a UTF-8 string.
13184
     *
13185
     * INFO: This is slower then "trim()"
13186
     *
13187
     * We can only use the original-function, if we use <= 7-Bit in the string / chars
13188
     * but the check for ASCII (7-Bit) cost more time, then we can safe here.
13189
     *
13190
     * EXAMPLE: <code>UTF8::trim('   -ABC-中文空白-  '); // '-ABC-中文空白-'</code>
13191
     *
13192
     * @param string      $str   <p>The string to be trimmed</p>
13193
     * @param string|null $chars [optional] <p>Optional characters to be stripped</p>
13194
     *
13195
     * @psalm-pure
13196
     *
13197
     * @return string
13198
     *                <p>The trimmed string.</p>
13199
     */
13200 57
    public static function trim(string $str = '', string $chars = null): string
13201
    {
13202 57
        if ($str === '') {
13203 9
            return '';
13204
        }
13205
13206 50
        if (self::$SUPPORT['mbstring'] === true) {
13207 50
            if ($chars !== null) {
13208
                /** @noinspection PregQuoteUsageInspection */
13209 28
                $chars = \preg_quote($chars);
13210 28
                $pattern = "^[${chars}]+|[${chars}]+\$";
13211
            } else {
13212 22
                $pattern = '^[\\s]+|[\\s]+$';
13213
            }
13214
13215
            /** @noinspection PhpComposerExtensionStubsInspection */
13216 50
            return (string) \mb_ereg_replace($pattern, '', $str);
13217
        }
13218
13219 8
        if ($chars !== null) {
13220
            $chars = \preg_quote($chars, '/');
13221
            $pattern = "^[${chars}]+|[${chars}]+\$";
13222
        } else {
13223 8
            $pattern = '^[\\s]+|[\\s]+$';
13224
        }
13225
13226 8
        return self::regex_replace($str, $pattern, '');
13227
    }
13228
13229
    /**
13230
     * Makes string's first char uppercase.
13231
     *
13232
     * EXAMPLE: <code>UTF8::ucfirst('ñtërnâtiônàlizætiøn foo'); // 'Ñtërnâtiônàlizætiøn foo'</code>
13233
     *
13234
     * @param string      $str                           <p>The input string.</p>
13235
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
13236
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
13237
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
13238
     *                                                   tr</p>
13239
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
13240
     *                                                   -> ß</p>
13241
     *
13242
     * @psalm-pure
13243
     *
13244
     * @return string
13245
     *                <p>The resulting string with with char uppercase.</p>
13246
     */
13247 69
    public static function ucfirst(
13248
        string $str,
13249
        string $encoding = 'UTF-8',
13250
        bool $clean_utf8 = false,
13251
        string $lang = null,
13252
        bool $try_to_keep_the_string_length = false
13253
    ): string {
13254 69
        if ($str === '') {
13255 3
            return '';
13256
        }
13257
13258 68
        if ($clean_utf8) {
13259
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
13260
            // if invalid characters are found in $haystack before $needle
13261 1
            $str = self::clean($str);
13262
        }
13263
13264 68
        $use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
13265
13266 68
        if ($encoding === 'UTF-8') {
13267 22
            $str_part_two = (string) \mb_substr($str, 1);
13268
13269 22
            if ($use_mb_functions) {
13270 22
                $str_part_one = \mb_strtoupper(
13271 22
                    (string) \mb_substr($str, 0, 1)
13272
                );
13273
            } else {
13274
                $str_part_one = self::strtoupper(
13275
                    (string) \mb_substr($str, 0, 1),
13276
                    $encoding,
13277
                    false,
13278
                    $lang,
13279 22
                    $try_to_keep_the_string_length
13280
                );
13281
            }
13282
        } else {
13283 47
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
13284
13285 47
            $str_part_two = (string) self::substr($str, 1, null, $encoding);
13286
13287 47
            if ($use_mb_functions) {
13288 47
                $str_part_one = \mb_strtoupper(
13289 47
                    (string) \mb_substr($str, 0, 1, $encoding),
13290 47
                    $encoding
13291
                );
13292
            } else {
13293
                $str_part_one = self::strtoupper(
13294
                    (string) self::substr($str, 0, 1, $encoding),
13295
                    $encoding,
13296
                    false,
13297
                    $lang,
13298
                    $try_to_keep_the_string_length
13299
                );
13300
            }
13301
        }
13302
13303 68
        return $str_part_one . $str_part_two;
13304
    }
13305
13306
    /**
13307
     * alias for "UTF8::ucfirst()"
13308
     *
13309
     * @param string $str
13310
     * @param string $encoding
13311
     * @param bool   $clean_utf8
13312
     *
13313
     * @psalm-pure
13314
     *
13315
     * @return string
13316
     *
13317
     * @see        UTF8::ucfirst()
13318
     * @deprecated <p>please use "UTF8::ucfirst()"</p>
13319
     */
13320 1
    public static function ucword(
13321
        string $str,
13322
        string $encoding = 'UTF-8',
13323
        bool $clean_utf8 = false
13324
    ): string {
13325 1
        return self::ucfirst($str, $encoding, $clean_utf8);
13326
    }
13327
13328
    /**
13329
     * Uppercase for all words in the string.
13330
     *
13331
     * EXAMPLE: <code>UTF8::ucwords('iñt ërn âTi ônà liz æti øn'); // 'Iñt Ërn ÂTi Ônà Liz Æti Øn'</code>
13332
     *
13333
     * @param string   $str        <p>The input string.</p>
13334
     * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
13335
     * @param string   $char_list  [optional] <p>Additional chars that contains to words and do not start a new
13336
     *                             word.</p>
13337
     * @param string   $encoding   [optional] <p>Set the charset.</p>
13338
     * @param bool     $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
13339
     *
13340
     * @psalm-pure
13341
     *
13342
     * @return string
13343
     */
13344 8
    public static function ucwords(
13345
        string $str,
13346
        array $exceptions = [],
13347
        string $char_list = '',
13348
        string $encoding = 'UTF-8',
13349
        bool $clean_utf8 = false
13350
    ): string {
13351 8
        if (!$str) {
13352 2
            return '';
13353
        }
13354
13355
        // INFO: mb_convert_case($str, MB_CASE_TITLE);
13356
        // -> MB_CASE_TITLE didn't only uppercase the first letter, it also lowercase all other letters
13357
13358 7
        if ($clean_utf8) {
13359
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
13360
            // if invalid characters are found in $haystack before $needle
13361 1
            $str = self::clean($str);
13362
        }
13363
13364 7
        $use_php_default_functions = !(bool) ($char_list . \implode('', $exceptions));
13365
13366
        if (
13367 7
            $use_php_default_functions
13368
            &&
13369 7
            ASCII::is_ascii($str)
13370
        ) {
13371
            return \ucwords($str);
13372
        }
13373
13374 7
        $words = self::str_to_words($str, $char_list);
13375 7
        $use_exceptions = $exceptions !== [];
13376
13377 7
        $words_str = '';
13378 7
        foreach ($words as &$word) {
13379 7
            if (!$word) {
13380 7
                continue;
13381
            }
13382
13383
            if (
13384 7
                !$use_exceptions
13385
                ||
13386 7
                !\in_array($word, $exceptions, true)
13387
            ) {
13388 7
                $words_str .= self::ucfirst($word, $encoding);
13389
            } else {
13390 7
                $words_str .= $word;
13391
            }
13392
        }
13393
13394 7
        return $words_str;
13395
    }
13396
13397
    /**
13398
     * Multi decode HTML entity + fix urlencoded-win1252-chars.
13399
     *
13400
     * EXAMPLE: <code>UTF8::urldecode('tes%20öäü%20\u00edtest+test'); // 'tes öäü ítest test'</code>
13401
     *
13402
     * e.g:
13403
     * 'test+test'                     => 'test test'
13404
     * 'D&#252;sseldorf'               => 'Düsseldorf'
13405
     * 'D%FCsseldorf'                  => 'Düsseldorf'
13406
     * 'D&#xFC;sseldorf'               => 'Düsseldorf'
13407
     * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
13408
     * 'Düsseldorf'                   => 'Düsseldorf'
13409
     * 'D%C3%BCsseldorf'               => 'Düsseldorf'
13410
     * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
13411
     * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
13412
     *
13413
     * @param string $str          <p>The input string.</p>
13414
     * @param bool   $multi_decode <p>Decode as often as possible.</p>
13415
     *
13416
     * @psalm-pure
13417
     *
13418
     * @return string
13419
     */
13420 4
    public static function urldecode(string $str, bool $multi_decode = true): string
13421
    {
13422 4
        if ($str === '') {
13423 3
            return '';
13424
        }
13425
13426
        if (
13427 4
            \strpos($str, '&') === false
13428
            &&
13429 4
            \strpos($str, '%') === false
13430
            &&
13431 4
            \strpos($str, '+') === false
13432
            &&
13433 4
            \strpos($str, '\u') === false
13434
        ) {
13435 3
            return self::fix_simple_utf8($str);
13436
        }
13437
13438 4
        $str = self::urldecode_unicode_helper($str);
13439
13440 4
        if ($multi_decode) {
13441
            do {
13442 3
                $str_compare = $str;
13443
13444
                /**
13445
                 * @psalm-suppress PossiblyInvalidArgument
13446
                 */
13447 3
                $str = self::fix_simple_utf8(
13448 3
                    \urldecode(
13449 3
                        self::html_entity_decode(
13450 3
                            self::to_utf8($str),
13451 3
                            \ENT_QUOTES | \ENT_HTML5
13452
                        )
13453
                    )
13454
                );
13455 3
            } while ($str_compare !== $str);
13456
        } else {
13457
            /**
13458
             * @psalm-suppress PossiblyInvalidArgument
13459
             */
13460 1
            $str = self::fix_simple_utf8(
13461 1
                \urldecode(
13462 1
                    self::html_entity_decode(
13463 1
                        self::to_utf8($str),
13464 1
                        \ENT_QUOTES | \ENT_HTML5
13465
                    )
13466
                )
13467
            );
13468
        }
13469
13470 4
        return $str;
13471
    }
13472
13473
    /**
13474
     * Return a array with "urlencoded"-win1252 -> UTF-8
13475
     *
13476
     * @psalm-pure
13477
     *
13478
     * @return string[]
13479
     *
13480
     * @deprecated <p>please use the "UTF8::urldecode()" function to decode a string</p>
13481
     */
13482 2
    public static function urldecode_fix_win1252_chars(): array
13483
    {
13484
        return [
13485 2
            '%20' => ' ',
13486
            '%21' => '!',
13487
            '%22' => '"',
13488
            '%23' => '#',
13489
            '%24' => '$',
13490
            '%25' => '%',
13491
            '%26' => '&',
13492
            '%27' => "'",
13493
            '%28' => '(',
13494
            '%29' => ')',
13495
            '%2A' => '*',
13496
            '%2B' => '+',
13497
            '%2C' => ',',
13498
            '%2D' => '-',
13499
            '%2E' => '.',
13500
            '%2F' => '/',
13501
            '%30' => '0',
13502
            '%31' => '1',
13503
            '%32' => '2',
13504
            '%33' => '3',
13505
            '%34' => '4',
13506
            '%35' => '5',
13507
            '%36' => '6',
13508
            '%37' => '7',
13509
            '%38' => '8',
13510
            '%39' => '9',
13511
            '%3A' => ':',
13512
            '%3B' => ';',
13513
            '%3C' => '<',
13514
            '%3D' => '=',
13515
            '%3E' => '>',
13516
            '%3F' => '?',
13517
            '%40' => '@',
13518
            '%41' => 'A',
13519
            '%42' => 'B',
13520
            '%43' => 'C',
13521
            '%44' => 'D',
13522
            '%45' => 'E',
13523
            '%46' => 'F',
13524
            '%47' => 'G',
13525
            '%48' => 'H',
13526
            '%49' => 'I',
13527
            '%4A' => 'J',
13528
            '%4B' => 'K',
13529
            '%4C' => 'L',
13530
            '%4D' => 'M',
13531
            '%4E' => 'N',
13532
            '%4F' => 'O',
13533
            '%50' => 'P',
13534
            '%51' => 'Q',
13535
            '%52' => 'R',
13536
            '%53' => 'S',
13537
            '%54' => 'T',
13538
            '%55' => 'U',
13539
            '%56' => 'V',
13540
            '%57' => 'W',
13541
            '%58' => 'X',
13542
            '%59' => 'Y',
13543
            '%5A' => 'Z',
13544
            '%5B' => '[',
13545
            '%5C' => '\\',
13546
            '%5D' => ']',
13547
            '%5E' => '^',
13548
            '%5F' => '_',
13549
            '%60' => '`',
13550
            '%61' => 'a',
13551
            '%62' => 'b',
13552
            '%63' => 'c',
13553
            '%64' => 'd',
13554
            '%65' => 'e',
13555
            '%66' => 'f',
13556
            '%67' => 'g',
13557
            '%68' => 'h',
13558
            '%69' => 'i',
13559
            '%6A' => 'j',
13560
            '%6B' => 'k',
13561
            '%6C' => 'l',
13562
            '%6D' => 'm',
13563
            '%6E' => 'n',
13564
            '%6F' => 'o',
13565
            '%70' => 'p',
13566
            '%71' => 'q',
13567
            '%72' => 'r',
13568
            '%73' => 's',
13569
            '%74' => 't',
13570
            '%75' => 'u',
13571
            '%76' => 'v',
13572
            '%77' => 'w',
13573
            '%78' => 'x',
13574
            '%79' => 'y',
13575
            '%7A' => 'z',
13576
            '%7B' => '{',
13577
            '%7C' => '|',
13578
            '%7D' => '}',
13579
            '%7E' => '~',
13580
            '%7F' => '',
13581
            '%80' => '`',
13582
            '%81' => '',
13583
            '%82' => '‚',
13584
            '%83' => 'ƒ',
13585
            '%84' => '„',
13586
            '%85' => '…',
13587
            '%86' => '†',
13588
            '%87' => '‡',
13589
            '%88' => 'ˆ',
13590
            '%89' => '‰',
13591
            '%8A' => 'Š',
13592
            '%8B' => '‹',
13593
            '%8C' => 'Œ',
13594
            '%8D' => '',
13595
            '%8E' => 'Ž',
13596
            '%8F' => '',
13597
            '%90' => '',
13598
            '%91' => '‘',
13599
            '%92' => '’',
13600
            '%93' => '“',
13601
            '%94' => '”',
13602
            '%95' => '•',
13603
            '%96' => '–',
13604
            '%97' => '—',
13605
            '%98' => '˜',
13606
            '%99' => '™',
13607
            '%9A' => 'š',
13608
            '%9B' => '›',
13609
            '%9C' => 'œ',
13610
            '%9D' => '',
13611
            '%9E' => 'ž',
13612
            '%9F' => 'Ÿ',
13613
            '%A0' => '',
13614
            '%A1' => '¡',
13615
            '%A2' => '¢',
13616
            '%A3' => '£',
13617
            '%A4' => '¤',
13618
            '%A5' => '¥',
13619
            '%A6' => '¦',
13620
            '%A7' => '§',
13621
            '%A8' => '¨',
13622
            '%A9' => '©',
13623
            '%AA' => 'ª',
13624
            '%AB' => '«',
13625
            '%AC' => '¬',
13626
            '%AD' => '',
13627
            '%AE' => '®',
13628
            '%AF' => '¯',
13629
            '%B0' => '°',
13630
            '%B1' => '±',
13631
            '%B2' => '²',
13632
            '%B3' => '³',
13633
            '%B4' => '´',
13634
            '%B5' => 'µ',
13635
            '%B6' => '¶',
13636
            '%B7' => '·',
13637
            '%B8' => '¸',
13638
            '%B9' => '¹',
13639
            '%BA' => 'º',
13640
            '%BB' => '»',
13641
            '%BC' => '¼',
13642
            '%BD' => '½',
13643
            '%BE' => '¾',
13644
            '%BF' => '¿',
13645
            '%C0' => 'À',
13646
            '%C1' => 'Á',
13647
            '%C2' => 'Â',
13648
            '%C3' => 'Ã',
13649
            '%C4' => 'Ä',
13650
            '%C5' => 'Å',
13651
            '%C6' => 'Æ',
13652
            '%C7' => 'Ç',
13653
            '%C8' => 'È',
13654
            '%C9' => 'É',
13655
            '%CA' => 'Ê',
13656
            '%CB' => 'Ë',
13657
            '%CC' => 'Ì',
13658
            '%CD' => 'Í',
13659
            '%CE' => 'Î',
13660
            '%CF' => 'Ï',
13661
            '%D0' => 'Ð',
13662
            '%D1' => 'Ñ',
13663
            '%D2' => 'Ò',
13664
            '%D3' => 'Ó',
13665
            '%D4' => 'Ô',
13666
            '%D5' => 'Õ',
13667
            '%D6' => 'Ö',
13668
            '%D7' => '×',
13669
            '%D8' => 'Ø',
13670
            '%D9' => 'Ù',
13671
            '%DA' => 'Ú',
13672
            '%DB' => 'Û',
13673
            '%DC' => 'Ü',
13674
            '%DD' => 'Ý',
13675
            '%DE' => 'Þ',
13676
            '%DF' => 'ß',
13677
            '%E0' => 'à',
13678
            '%E1' => 'á',
13679
            '%E2' => 'â',
13680
            '%E3' => 'ã',
13681
            '%E4' => 'ä',
13682
            '%E5' => 'å',
13683
            '%E6' => 'æ',
13684
            '%E7' => 'ç',
13685
            '%E8' => 'è',
13686
            '%E9' => 'é',
13687
            '%EA' => 'ê',
13688
            '%EB' => 'ë',
13689
            '%EC' => 'ì',
13690
            '%ED' => 'í',
13691
            '%EE' => 'î',
13692
            '%EF' => 'ï',
13693
            '%F0' => 'ð',
13694
            '%F1' => 'ñ',
13695
            '%F2' => 'ò',
13696
            '%F3' => 'ó',
13697
            '%F4' => 'ô',
13698
            '%F5' => 'õ',
13699
            '%F6' => 'ö',
13700
            '%F7' => '÷',
13701
            '%F8' => 'ø',
13702
            '%F9' => 'ù',
13703
            '%FA' => 'ú',
13704
            '%FB' => 'û',
13705
            '%FC' => 'ü',
13706
            '%FD' => 'ý',
13707
            '%FE' => 'þ',
13708
            '%FF' => 'ÿ',
13709
        ];
13710
    }
13711
13712
    /**
13713
     * Decodes a UTF-8 string to ISO-8859-1.
13714
     *
13715
     * EXAMPLE: <code>UTF8::encode('UTF-8', UTF8::utf8_decode('-ABC-中文空白-')); // '-ABC-????-'</code>
13716
     *
13717
     * @param string $str             <p>The input string.</p>
13718
     * @param bool   $keep_utf8_chars
13719
     *
13720
     * @psalm-pure
13721
     *
13722
     * @return string
13723
     *
13724
     * @noinspection SuspiciousBinaryOperationInspection
13725
     */
13726 14
    public static function utf8_decode(string $str, bool $keep_utf8_chars = false): string
13727
    {
13728 14
        if ($str === '') {
13729 6
            return '';
13730
        }
13731
13732
        // save for later comparision
13733 14
        $str_backup = $str;
13734 14
        $len = \strlen($str);
13735
13736 14
        if (self::$ORD === null) {
13737
            self::$ORD = self::getData('ord');
13738
        }
13739
13740 14
        if (self::$CHR === null) {
13741
            self::$CHR = self::getData('chr');
13742
        }
13743
13744 14
        $no_char_found = '?';
13745
        /** @noinspection ForeachInvariantsInspection */
13746 14
        for ($i = 0, $j = 0; $i < $len; ++$i, ++$j) {
13747 14
            switch ($str[$i] & "\xF0") {
13748 14
                case "\xC0":
13749 13
                case "\xD0":
13750 13
                    $c = (self::$ORD[$str[$i] & "\x1F"] << 6) | self::$ORD[$str[++$i] & "\x3F"];
13751 13
                    $str[$j] = $c < 256 ? self::$CHR[$c] : $no_char_found;
13752
13753 13
                    break;
13754
13755
                /** @noinspection PhpMissingBreakStatementInspection */
13756 13
                case "\xF0":
13757
                    ++$i;
13758
13759
                // no break
13760
13761 13
                case "\xE0":
13762 11
                    $str[$j] = $no_char_found;
13763 11
                    $i += 2;
13764
13765 11
                    break;
13766
13767
                default:
13768 12
                    $str[$j] = $str[$i];
13769
            }
13770
        }
13771
13772
        /** @var false|string $return - needed for PhpStan (stubs error) */
13773 14
        $return = \substr($str, 0, $j);
13774 14
        if ($return === false) {
13775
            $return = '';
13776
        }
13777
13778
        if (
13779 14
            $keep_utf8_chars
13780
            &&
13781 14
            (int) self::strlen($return) >= (int) self::strlen($str_backup)
13782
        ) {
13783 2
            return $str_backup;
13784
        }
13785
13786 14
        return $return;
13787
    }
13788
13789
    /**
13790
     * Encodes an ISO-8859-1 string to UTF-8.
13791
     *
13792
     * EXAMPLE: <code>UTF8::utf8_decode(UTF8::utf8_encode('-ABC-中文空白-')); // '-ABC-中文空白-'</code>
13793
     *
13794
     * @param string $str <p>The input string.</p>
13795
     *
13796
     * @psalm-pure
13797
     *
13798
     * @return string
13799
     */
13800 16
    public static function utf8_encode(string $str): string
13801
    {
13802 16
        if ($str === '') {
13803 14
            return '';
13804
        }
13805
13806
        /** @var false|string $str - the polyfill maybe return false */
13807 16
        $str = \utf8_encode($str);
0 ignored issues
show
Bug introduced by
It seems like $str can also be of type false; however, parameter $data of utf8_encode() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

13807
        $str = \utf8_encode(/** @scrutinizer ignore-type */ $str);
Loading history...
13808
13809
        /** @noinspection CallableParameterUseCaseInTypeContextInspection */
13810
        /** @psalm-suppress TypeDoesNotContainType */
13811 16
        if ($str === false) {
13812
            return '';
13813
        }
13814
13815 16
        return $str;
13816
    }
13817
13818
    /**
13819
     * fix -> utf8-win1252 chars
13820
     *
13821
     * @param string $str <p>The input string.</p>
13822
     *
13823
     * @psalm-pure
13824
     *
13825
     * @return string
13826
     *
13827
     * @deprecated <p>please use "UTF8::fix_simple_utf8()"</p>
13828
     */
13829 2
    public static function utf8_fix_win1252_chars(string $str): string
13830
    {
13831 2
        return self::fix_simple_utf8($str);
13832
    }
13833
13834
    /**
13835
     * Returns an array with all utf8 whitespace characters.
13836
     *
13837
     * @see http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
13838
     *
13839
     * @psalm-pure
13840
     *
13841
     * @return string[]
13842
     *                  An array with all known whitespace characters as values and the type of whitespace as keys
13843
     *                  as defined in above URL
13844
     */
13845 2
    public static function whitespace_table(): array
13846
    {
13847 2
        return self::$WHITESPACE_TABLE;
13848
    }
13849
13850
    /**
13851
     * Limit the number of words in a string.
13852
     *
13853
     * EXAMPLE: <code>UTF8::words_limit('fòô bàř fòô', 2, ''); // 'fòô bàř'</code>
13854
     *
13855
     * @param string $str        <p>The input string.</p>
13856
     * @param int    $limit      <p>The limit of words as integer.</p>
13857
     * @param string $str_add_on <p>Replacement for the striped string.</p>
13858
     *
13859
     * @psalm-pure
13860
     *
13861
     * @return string
13862
     */
13863 2
    public static function words_limit(
13864
        string $str,
13865
        int $limit = 100,
13866
        string $str_add_on = '…'
13867
    ): string {
13868 2
        if ($str === '' || $limit < 1) {
13869 2
            return '';
13870
        }
13871
13872 2
        \preg_match('/^\\s*+(?:[^\\s]++\\s*+){1,' . $limit . '}/u', $str, $matches);
13873
13874
        if (
13875 2
            !isset($matches[0])
13876
            ||
13877 2
            \mb_strlen($str) === (int) \mb_strlen($matches[0])
13878
        ) {
13879 2
            return $str;
13880
        }
13881
13882 2
        return \rtrim($matches[0]) . $str_add_on;
13883
    }
13884
13885
    /**
13886
     * Wraps a string to a given number of characters
13887
     *
13888
     * EXAMPLE: <code>UTF8::wordwrap('Iñtërnâtiônàlizætiøn', 2, '<br>', true)); // 'Iñ<br>të<br>rn<br>ât<br>iô<br>nà<br>li<br>zæ<br>ti<br>øn'</code>
13889
     *
13890
     * @see http://php.net/manual/en/function.wordwrap.php
13891
     *
13892
     * @param string $str   <p>The input string.</p>
13893
     * @param int    $width [optional] <p>The column width.</p>
13894
     * @param string $break [optional] <p>The line is broken using the optional break parameter.</p>
13895
     * @param bool   $cut   [optional] <p>
13896
     *                      If the cut is set to true, the string is
13897
     *                      always wrapped at or before the specified width. So if you have
13898
     *                      a word that is larger than the given width, it is broken apart.
13899
     *                      </p>
13900
     *
13901
     * @psalm-pure
13902
     *
13903
     * @return string
13904
     *                <p>The given string wrapped at the specified column.</p>
13905
     */
13906 12
    public static function wordwrap(
13907
        string $str,
13908
        int $width = 75,
13909
        string $break = "\n",
13910
        bool $cut = false
13911
    ): string {
13912 12
        if ($str === '' || $break === '') {
13913 4
            return '';
13914
        }
13915
13916 10
        $str_split = \explode($break, $str);
13917 10
        if ($str_split === false) {
13918
            return '';
13919
        }
13920
13921
        /** @var string[] $charsArray */
13922 10
        $charsArray = [];
13923 10
        $word_split = '';
13924 10
        foreach ($str_split as $i => $i_value) {
13925 10
            if ($i) {
13926 3
                $charsArray[] = $break;
13927 3
                $word_split .= '#';
13928
            }
13929
13930 10
            foreach (self::str_split($i_value) as $c) {
13931 10
                $charsArray[] = $c;
13932 10
                if ($c === ' ') {
13933 3
                    $word_split .= ' ';
13934
                } else {
13935 10
                    $word_split .= '?';
13936
                }
13937
            }
13938
        }
13939
13940 10
        $str_return = '';
13941 10
        $j = 0;
13942 10
        $b = -1;
13943 10
        $i = -1;
13944 10
        $word_split = \wordwrap($word_split, $width, '#', $cut);
13945
13946 10
        $max = \mb_strlen($word_split);
13947 10
        while (($b = \mb_strpos($word_split, '#', $b + 1)) !== false) {
13948 8
            for (++$i; $i < $b; ++$i) {
13949 8
                if (isset($charsArray[$j])) {
13950 8
                    $str_return .= $charsArray[$j];
13951 8
                    unset($charsArray[$j]);
13952
                }
13953 8
                ++$j;
13954
13955
                // prevent endless loop, e.g. if there is a error in the "mb_*" polyfill
13956 8
                if ($i > $max) {
13957
                    break 2;
13958
                }
13959
            }
13960
13961
            if (
13962 8
                $break === $charsArray[$j]
13963
                ||
13964 8
                $charsArray[$j] === ' '
13965
            ) {
13966 5
                unset($charsArray[$j++]);
13967
            }
13968
13969 8
            $str_return .= $break;
13970
13971
            // prevent endless loop, e.g. if there is a error in the "mb_*" polyfill
13972 8
            if ($b > $max) {
13973
                break;
13974
            }
13975
        }
13976
13977 10
        return $str_return . \implode('', $charsArray);
13978
    }
13979
13980
    /**
13981
     * Line-Wrap the string after $limit, but split the string by "$delimiter" before ...
13982
     *    ... so that we wrap the per line.
13983
     *
13984
     * @param string      $str             <p>The input string.</p>
13985
     * @param int         $width           [optional] <p>The column width.</p>
13986
     * @param string      $break           [optional] <p>The line is broken using the optional break parameter.</p>
13987
     * @param bool        $cut             [optional] <p>
13988
     *                                     If the cut is set to true, the string is
13989
     *                                     always wrapped at or before the specified width. So if you have
13990
     *                                     a word that is larger than the given width, it is broken apart.
13991
     *                                     </p>
13992
     * @param bool        $add_final_break [optional] <p>
13993
     *                                     If this flag is true, then the method will add a $break at the end
13994
     *                                     of the result string.
13995
     *                                     </p>
13996
     * @param string|null $delimiter       [optional] <p>
13997
     *                                     You can change the default behavior, where we split the string by newline.
13998
     *                                     </p>
13999
     *
14000
     * @psalm-pure
14001
     *
14002
     * @return string
14003
     */
14004 1
    public static function wordwrap_per_line(
14005
        string $str,
14006
        int $width = 75,
14007
        string $break = "\n",
14008
        bool $cut = false,
14009
        bool $add_final_break = true,
14010
        string $delimiter = null
14011
    ): string {
14012 1
        if ($delimiter === null) {
14013 1
            $strings = \preg_split('/\\r\\n|\\r|\\n/', $str);
14014
        } else {
14015 1
            $strings = \explode($delimiter, $str);
14016
        }
14017
14018 1
        $string_helper_array = [];
14019 1
        if ($strings !== false) {
14020 1
            foreach ($strings as $value) {
14021 1
                $string_helper_array[] = self::wordwrap($value, $width, $break, $cut);
14022
            }
14023
        }
14024
14025 1
        if ($add_final_break) {
14026 1
            $final_break = $break;
14027
        } else {
14028 1
            $final_break = '';
14029
        }
14030
14031 1
        return \implode($delimiter ?? "\n", $string_helper_array) . $final_break;
14032
    }
14033
14034
    /**
14035
     * Returns an array of Unicode White Space characters.
14036
     *
14037
     * @psalm-pure
14038
     *
14039
     * @return string[]
14040
     *                  <p>An array with numeric code point as key and White Space Character as value.</p>
14041
     */
14042 2
    public static function ws(): array
14043
    {
14044 2
        return self::$WHITESPACE;
14045
    }
14046
14047
    /**
14048
     * Checks whether the passed string contains only byte sequences that are valid UTF-8 characters.
14049
     *
14050
     * EXAMPLE: <code>
14051
     * UTF8::is_utf8_string('Iñtërnâtiônàlizætiøn']); // true
14052
     * //
14053
     * UTF8::is_utf8_string("Iñtërnâtiônàlizætiøn\xA0\xA1"); // false
14054
     * </code>
14055
     *
14056
     * @see          http://hsivonen.iki.fi/php-utf8/
14057
     *
14058
     * @param string $str    <p>The string to be checked.</p>
14059
     * @param bool   $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
14060
     *
14061
     * @psalm-pure
14062
     *
14063
     * @return bool
14064
     *
14065
     * @noinspection ReturnTypeCanBeDeclaredInspection
14066
     */
14067 110
    private static function is_utf8_string(string $str, bool $strict = false)
14068
    {
14069 110
        if ($str === '') {
14070 15
            return true;
14071
        }
14072
14073 103
        if ($strict) {
14074 2
            $is_binary = self::is_binary($str, true);
14075
14076 2
            if ($is_binary && self::is_utf16($str, false) !== false) {
14077 2
                return false;
14078
            }
14079
14080
            if ($is_binary && self::is_utf32($str, false) !== false) {
14081
                return false;
14082
            }
14083
        }
14084
14085 103
        if (self::$SUPPORT['pcre_utf8']) {
14086
            // If even just the first character can be matched, when the /u
14087
            // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
14088
            // invalid, nothing at all will match, even if the string contains
14089
            // some valid sequences
14090 103
            return \preg_match('/^./us', $str) === 1;
14091
        }
14092
14093 2
        $mState = 0; // cached expected number of octets after the current octet
14094
        // until the beginning of the next UTF8 character sequence
14095 2
        $mUcs4 = 0; // cached Unicode character
14096 2
        $mBytes = 1; // cached expected number of octets in the current sequence
14097
14098 2
        if (self::$ORD === null) {
14099
            self::$ORD = self::getData('ord');
14100
        }
14101
14102 2
        $len = \strlen($str);
14103
        /** @noinspection ForeachInvariantsInspection */
14104 2
        for ($i = 0; $i < $len; ++$i) {
14105 2
            $in = self::$ORD[$str[$i]];
14106
14107 2
            if ($mState === 0) {
14108
                // When mState is zero we expect either a US-ASCII character or a
14109
                // multi-octet sequence.
14110 2
                if ((0x80 & $in) === 0) {
14111
                    // US-ASCII, pass straight through.
14112 2
                    $mBytes = 1;
14113 2
                } elseif ((0xE0 & $in) === 0xC0) {
14114
                    // First octet of 2 octet sequence.
14115 2
                    $mUcs4 = $in;
14116 2
                    $mUcs4 = ($mUcs4 & 0x1F) << 6;
14117 2
                    $mState = 1;
14118 2
                    $mBytes = 2;
14119 2
                } elseif ((0xF0 & $in) === 0xE0) {
14120
                    // First octet of 3 octet sequence.
14121 2
                    $mUcs4 = $in;
14122 2
                    $mUcs4 = ($mUcs4 & 0x0F) << 12;
14123 2
                    $mState = 2;
14124 2
                    $mBytes = 3;
14125
                } elseif ((0xF8 & $in) === 0xF0) {
14126
                    // First octet of 4 octet sequence.
14127
                    $mUcs4 = $in;
14128
                    $mUcs4 = ($mUcs4 & 0x07) << 18;
14129
                    $mState = 3;
14130
                    $mBytes = 4;
14131
                } elseif ((0xFC & $in) === 0xF8) {
14132
                    /* First octet of 5 octet sequence.
14133
                     *
14134
                     * This is illegal because the encoded codepoint must be either
14135
                     * (a) not the shortest form or
14136
                     * (b) outside the Unicode range of 0-0x10FFFF.
14137
                     * Rather than trying to resynchronize, we will carry on until the end
14138
                     * of the sequence and let the later error handling code catch it.
14139
                     */
14140
                    $mUcs4 = $in;
14141
                    $mUcs4 = ($mUcs4 & 0x03) << 24;
14142
                    $mState = 4;
14143
                    $mBytes = 5;
14144
                } elseif ((0xFE & $in) === 0xFC) {
14145
                    // First octet of 6 octet sequence, see comments for 5 octet sequence.
14146
                    $mUcs4 = $in;
14147
                    $mUcs4 = ($mUcs4 & 1) << 30;
14148
                    $mState = 5;
14149
                    $mBytes = 6;
14150
                } else {
14151
                    // Current octet is neither in the US-ASCII range nor a legal first
14152
                    // octet of a multi-octet sequence.
14153 2
                    return false;
14154
                }
14155 2
            } elseif ((0xC0 & $in) === 0x80) {
14156
14157
                // When mState is non-zero, we expect a continuation of the multi-octet
14158
                // sequence
14159
14160
                // Legal continuation.
14161 2
                $shift = ($mState - 1) * 6;
14162 2
                $tmp = $in;
14163 2
                $tmp = ($tmp & 0x0000003F) << $shift;
14164 2
                $mUcs4 |= $tmp;
14165
                // Prefix: End of the multi-octet sequence. mUcs4 now contains the final
14166
                // Unicode code point to be output.
14167 2
                if (--$mState === 0) {
14168
                    // Check for illegal sequences and code points.
14169
                    //
14170
                    // From Unicode 3.1, non-shortest form is illegal
14171
                    if (
14172 2
                        ($mBytes === 2 && $mUcs4 < 0x0080)
14173
                        ||
14174 2
                        ($mBytes === 3 && $mUcs4 < 0x0800)
14175
                        ||
14176 2
                        ($mBytes === 4 && $mUcs4 < 0x10000)
14177
                        ||
14178 2
                        ($mBytes > 4)
14179
                        ||
14180
                        // From Unicode 3.2, surrogate characters are illegal.
14181 2
                        (($mUcs4 & 0xFFFFF800) === 0xD800)
14182
                        ||
14183
                        // Code points outside the Unicode range are illegal.
14184 2
                        ($mUcs4 > 0x10FFFF)
14185
                    ) {
14186
                        return false;
14187
                    }
14188
                    // initialize UTF8 cache
14189 2
                    $mState = 0;
14190 2
                    $mUcs4 = 0;
14191 2
                    $mBytes = 1;
14192
                }
14193
            } else {
14194
                // ((0xC0 & (*in) != 0x80) && (mState != 0))
14195
                // Incomplete multi-octet sequence.
14196
                return false;
14197
            }
14198
        }
14199
14200 2
        return $mState === 0;
14201
    }
14202
14203
    /**
14204
     * @param string $str
14205
     * @param bool   $use_lowercase      <p>Use uppercase by default, otherwise use lowercase.</p>
14206
     * @param bool   $use_full_case_fold <p>Convert not only common cases.</p>
14207
     *
14208
     * @psalm-pure
14209
     *
14210
     * @return string
14211
     *
14212
     * @noinspection ReturnTypeCanBeDeclaredInspection
14213
     */
14214 33
    private static function fixStrCaseHelper(
14215
        string $str,
14216
        bool $use_lowercase = false,
14217
        bool $use_full_case_fold = false
14218
    ) {
14219 33
        $upper = self::$COMMON_CASE_FOLD['upper'];
14220 33
        $lower = self::$COMMON_CASE_FOLD['lower'];
14221
14222 33
        if ($use_lowercase) {
14223 2
            $str = \str_replace(
14224 2
                $upper,
14225 2
                $lower,
14226 2
                $str
14227
            );
14228
        } else {
14229 31
            $str = \str_replace(
14230 31
                $lower,
14231 31
                $upper,
14232 31
                $str
14233
            );
14234
        }
14235
14236 33
        if ($use_full_case_fold) {
14237
            /**
14238
             * @psalm-suppress ImpureStaticVariable
14239
             *
14240
             * @var array<mixed>|null
14241
             */
14242 31
            static $FULL_CASE_FOLD = null;
14243 31
            if ($FULL_CASE_FOLD === null) {
14244 1
                $FULL_CASE_FOLD = self::getData('caseFolding_full');
14245
            }
14246
14247 31
            if ($use_lowercase) {
14248 2
                $str = \str_replace($FULL_CASE_FOLD[0], $FULL_CASE_FOLD[1], $str);
14249
            } else {
14250 29
                $str = \str_replace($FULL_CASE_FOLD[1], $FULL_CASE_FOLD[0], $str);
14251
            }
14252
        }
14253
14254 33
        return $str;
14255
    }
14256
14257
    /**
14258
     * get data from "/data/*.php"
14259
     *
14260
     * @param string $file
14261
     *
14262
     * @psalm-pure
14263
     *
14264
     * @return array
14265
     *
14266
     * @noinspection ReturnTypeCanBeDeclaredInspection
14267
     */
14268 6
    private static function getData(string $file)
14269
    {
14270
        /** @noinspection PhpIncludeInspection */
14271
        /** @noinspection UsingInclusionReturnValueInspection */
14272
        /** @psalm-suppress UnresolvableInclude */
14273 6
        return include __DIR__ . '/data/' . $file . '.php';
14274
    }
14275
14276
    /**
14277
     * @psalm-pure
14278
     *
14279
     * @return true|null
14280
     */
14281 12
    private static function initEmojiData()
14282
    {
14283 12
        if (self::$EMOJI_KEYS_CACHE === null) {
14284 1
            if (self::$EMOJI === null) {
14285 1
                self::$EMOJI = self::getData('emoji');
14286
            }
14287
14288
            /**
14289
             * @psalm-suppress ImpureFunctionCall - static sort function is used
14290
             */
14291 1
            \uksort(
14292 1
                self::$EMOJI,
14293
                static function (string $a, string $b): int {
14294 1
                    return \strlen($b) <=> \strlen($a);
14295 1
                }
14296
            );
14297
14298 1
            self::$EMOJI_KEYS_CACHE = \array_keys(self::$EMOJI);
14299 1
            self::$EMOJI_VALUES_CACHE = self::$EMOJI;
14300
14301 1
            foreach (self::$EMOJI_KEYS_CACHE as $key) {
14302 1
                $tmp_key = \crc32($key);
14303 1
                self::$EMOJI_KEYS_REVERSIBLE_CACHE[] = '_-_PORTABLE_UTF8_-_' . $tmp_key . '_-_' . \strrev((string) $tmp_key) . '_-_8FTU_ELBATROP_-_';
14304
            }
14305
14306 1
            return true;
14307
        }
14308
14309 12
        return null;
14310
    }
14311
14312
    /**
14313
     * Checks whether mbstring "overloaded" is active on the server.
14314
     *
14315
     * @psalm-pure
14316
     *
14317
     * @return bool
14318
     *
14319
     * @noinspection ReturnTypeCanBeDeclaredInspection
14320
     */
14321
    private static function mbstring_overloaded()
14322
    {
14323
        /**
14324
         * INI directive 'mbstring.func_overload' is deprecated since PHP 7.2
14325
         */
14326
14327
        /** @noinspection PhpComposerExtensionStubsInspection */
14328
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
14329
        return \defined('MB_OVERLOAD_STRING')
14330
               &&
14331
               ((int) @\ini_get('mbstring.func_overload') & \MB_OVERLOAD_STRING);
14332
    }
14333
14334
    /**
14335
     * @param array    $strings
14336
     * @param bool     $remove_empty_values
14337
     * @param int|null $remove_short_values
14338
     *
14339
     * @psalm-pure
14340
     *
14341
     * @return array
14342
     *
14343
     * @noinspection ReturnTypeCanBeDeclaredInspection
14344
     */
14345 2
    private static function reduce_string_array(
14346
        array $strings,
14347
        bool $remove_empty_values,
14348
        int $remove_short_values = null
14349
    ) {
14350
        // init
14351 2
        $return = [];
14352
14353 2
        foreach ($strings as &$str) {
14354
            if (
14355 2
                $remove_short_values !== null
14356
                &&
14357 2
                \mb_strlen($str) <= $remove_short_values
14358
            ) {
14359 2
                continue;
14360
            }
14361
14362
            if (
14363 2
                $remove_empty_values
14364
                &&
14365 2
                \trim($str) === ''
14366
            ) {
14367 2
                continue;
14368
            }
14369
14370 2
            $return[] = $str;
14371
        }
14372
14373 2
        return $return;
14374
    }
14375
14376
    /**
14377
     * rxClass
14378
     *
14379
     * @param string $s
14380
     * @param string $class
14381
     *
14382
     * @psalm-pure
14383
     *
14384
     * @return string
14385
     *
14386
     * @noinspection ReturnTypeCanBeDeclaredInspection
14387
     */
14388 33
    private static function rxClass(string $s, string $class = '')
14389
    {
14390
        /**
14391
         * @psalm-suppress ImpureStaticVariable
14392
         *
14393
         * @var array<string,string>
14394
         */
14395 33
        static $RX_CLASS_CACHE = [];
14396
14397 33
        $cache_key = $s . '_' . $class;
14398
14399 33
        if (isset($RX_CLASS_CACHE[$cache_key])) {
14400 21
            return $RX_CLASS_CACHE[$cache_key];
14401
        }
14402
14403
        /** @var string[] $class_array */
14404 16
        $class_array[] = $class;
0 ignored issues
show
Comprehensibility Best Practice introduced by
$class_array was never initialized. Although not strictly required by PHP, it is generally a good practice to add $class_array = array(); before regardless.
Loading history...
14405
14406
        /** @noinspection SuspiciousLoopInspection */
14407
        /** @noinspection AlterInForeachInspection */
14408 16
        foreach (self::str_split($s) as &$s) {
0 ignored issues
show
Bug introduced by
The expression self::str_split($s) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
14409 15
            if ($s === '-') {
14410
                $class_array[0] = '-' . $class_array[0];
14411 15
            } elseif (!isset($s[2])) {
14412 15
                $class_array[0] .= \preg_quote($s, '/');
14413 1
            } elseif (self::strlen($s) === 1) {
14414 1
                $class_array[0] .= $s;
14415
            } else {
14416 15
                $class_array[] = $s;
14417
            }
14418
        }
14419
14420 16
        if ($class_array[0]) {
14421 16
            $class_array[0] = '[' . $class_array[0] . ']';
14422
        }
14423
14424 16
        if (\count($class_array) === 1) {
14425 16
            $return = $class_array[0];
14426
        } else {
14427
            $return = '(?:' . \implode('|', $class_array) . ')';
14428
        }
14429
14430 16
        $RX_CLASS_CACHE[$cache_key] = $return;
14431
14432 16
        return $return;
14433
    }
14434
14435
    /**
14436
     * Personal names such as "Marcus Aurelius" are sometimes typed incorrectly using lowercase ("marcus aurelius").
14437
     *
14438
     * @param string $names
14439
     * @param string $delimiter
14440
     * @param string $encoding
14441
     *
14442
     * @psalm-pure
14443
     *
14444
     * @return string
14445
     *
14446
     * @noinspection ReturnTypeCanBeDeclaredInspection
14447
     */
14448 1
    private static function str_capitalize_name_helper(
14449
        string $names,
14450
        string $delimiter,
14451
        string $encoding = 'UTF-8'
14452
    ) {
14453
        // init
14454 1
        $name_helper_array = \explode($delimiter, $names);
14455 1
        if ($name_helper_array === false) {
14456
            return '';
14457
        }
14458
14459
        $special_cases = [
14460 1
            'names' => [
14461
                'ab',
14462
                'af',
14463
                'al',
14464
                'and',
14465
                'ap',
14466
                'bint',
14467
                'binte',
14468
                'da',
14469
                'de',
14470
                'del',
14471
                'den',
14472
                'der',
14473
                'di',
14474
                'dit',
14475
                'ibn',
14476
                'la',
14477
                'mac',
14478
                'nic',
14479
                'of',
14480
                'ter',
14481
                'the',
14482
                'und',
14483
                'van',
14484
                'von',
14485
                'y',
14486
                'zu',
14487
            ],
14488
            'prefixes' => [
14489
                'al-',
14490
                "d'",
14491
                'ff',
14492
                "l'",
14493
                'mac',
14494
                'mc',
14495
                'nic',
14496
            ],
14497
        ];
14498
14499 1
        foreach ($name_helper_array as &$name) {
14500 1
            if (\in_array($name, $special_cases['names'], true)) {
14501 1
                continue;
14502
            }
14503
14504 1
            $continue = false;
14505
14506 1
            if ($delimiter === '-') {
14507
                /** @noinspection AlterInForeachInspection */
14508 1
                foreach ((array) $special_cases['names'] as &$beginning) {
14509 1
                    if (self::strpos($name, $beginning, 0, $encoding) === 0) {
14510 1
                        $continue = true;
14511
14512 1
                        break;
14513
                    }
14514
                }
14515
            }
14516
14517
            /** @noinspection AlterInForeachInspection */
14518 1
            foreach ((array) $special_cases['prefixes'] as &$beginning) {
14519 1
                if (self::strpos($name, $beginning, 0, $encoding) === 0) {
14520 1
                    $continue = true;
14521
14522 1
                    break;
14523
                }
14524
            }
14525
14526 1
            if ($continue) {
14527 1
                continue;
14528
            }
14529
14530 1
            $name = self::ucfirst($name);
14531
        }
14532
14533 1
        return \implode($delimiter, $name_helper_array);
14534
    }
14535
14536
    /**
14537
     * Generic case-sensitive transformation for collation matching.
14538
     *
14539
     * @param string $str <p>The input string</p>
14540
     *
14541
     * @psalm-pure
14542
     *
14543
     * @return string|null
14544
     */
14545 6
    private static function strtonatfold(string $str)
14546
    {
14547
        /** @noinspection PhpUndefinedClassInspection */
14548 6
        return \preg_replace(
14549 6
            '/\p{Mn}+/u',
14550 6
            '',
14551 6
            \Normalizer::normalize($str, \Normalizer::NFD)
14552
        );
14553
    }
14554
14555
    /**
14556
     * @param int|string $input
14557
     *
14558
     * @psalm-pure
14559
     *
14560
     * @return string
14561
     *
14562
     * @noinspection ReturnTypeCanBeDeclaredInspection
14563
     * @noinspection SuspiciousBinaryOperationInspection
14564
     */
14565 32
    private static function to_utf8_convert_helper($input)
14566
    {
14567
        // init
14568 32
        $buf = '';
14569
14570 32
        if (self::$ORD === null) {
14571 1
            self::$ORD = self::getData('ord');
14572
        }
14573
14574 32
        if (self::$CHR === null) {
14575 1
            self::$CHR = self::getData('chr');
14576
        }
14577
14578 32
        if (self::$WIN1252_TO_UTF8 === null) {
14579 1
            self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
14580
        }
14581
14582 32
        $ordC1 = self::$ORD[$input];
14583 32
        if (isset(self::$WIN1252_TO_UTF8[$ordC1])) { // found in Windows-1252 special cases
14584 32
            $buf .= self::$WIN1252_TO_UTF8[$ordC1];
14585
        } else {
14586
            /** @noinspection OffsetOperationsInspection */
14587 1
            $cc1 = self::$CHR[$ordC1 / 64] | "\xC0";
14588 1
            $cc2 = ((string) $input & "\x3F") | "\x80";
0 ignored issues
show
Bug introduced by
Are you sure you want to use the bitwise | or did you mean ||?
Loading history...
Bug introduced by
Are you sure you want to use the bitwise & or did you mean &&?
Loading history...
14589 1
            $buf .= $cc1 . $cc2;
14590
        }
14591
14592 32
        return $buf;
14593
    }
14594
14595
    /**
14596
     * @param string $str
14597
     *
14598
     * @psalm-pure
14599
     *
14600
     * @return string
14601
     *
14602
     * @noinspection ReturnTypeCanBeDeclaredInspection
14603
     */
14604 10
    private static function urldecode_unicode_helper(string $str)
14605
    {
14606 10
        $pattern = '/%u([0-9a-fA-F]{3,4})/';
14607 10
        if (\preg_match($pattern, $str)) {
14608 7
            $str = (string) \preg_replace($pattern, '&#x\\1;', $str);
14609
        }
14610
14611 10
        return $str;
14612
    }
14613
}
14614