Passed
Push — master ( ea880f...362423 )
by Lars
08:28
created

UTF8::str_starts_with()   A

Complexity

Conditions 3
Paths 3

Size

Total Lines 11
Code Lines 5

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 5
CRAP Score 3.0416

Importance

Changes 2
Bugs 0 Features 0
Metric Value
cc 3
eloc 5
c 2
b 0
f 0
nc 3
nop 2
dl 0
loc 11
ccs 5
cts 6
cp 0.8333
crap 3.0416
rs 10
1
<?php
2
3
declare(strict_types=1);
4
5
namespace voku\helper;
6
7
/**
8
 * @psalm-immutable
9
 */
10
final class UTF8
11
{
12
    /**
13
     * (CRLF|([ZWNJ-ZWJ]|T+|L*(LV?V+|LV|LVT)T*|L+|[^Control])[Extend]*|[Control])
14
     * This regular expression is a work around for http://bugs.exim.org/1279
15
     *
16
     * @deprecated <p>please don't use it anymore</p>
17
     */
18
    const GRAPHEME_CLUSTER_RX = "(?:\r\n|(?:[ -~\x{200C}\x{200D}]|[ᆨ-ᇹ]+|[ᄀ-ᅟ]*(?:[가개갸걔거게겨계고과괘괴교구궈궤귀규그긔기까깨꺄꺠꺼께껴꼐꼬꽈꽤꾀꾜꾸꿔꿰뀌뀨끄끠끼나내냐냬너네녀녜노놔놰뇌뇨누눠눼뉘뉴느늬니다대댜댸더데뎌뎨도돠돼되됴두둬뒈뒤듀드듸디따때땨떄떠떼뗘뗴또똬뙈뙤뚀뚜뚸뛔뛰뜌뜨띄띠라래랴럐러레려례로롸뢔뢰료루뤄뤠뤼류르릐리마매먀먜머메며몌모뫄뫠뫼묘무뭐뭬뮈뮤므믜미바배뱌뱨버베벼볘보봐봬뵈뵤부붜붸뷔뷰브븨비빠빼뺘뺴뻐뻬뼈뼤뽀뽜뽸뾔뾰뿌뿨쀄쀠쀼쁘쁴삐사새샤섀서세셔셰소솨쇄쇠쇼수숴쉐쉬슈스싀시싸쌔쌰썌써쎄쎠쎼쏘쏴쐐쐬쑈쑤쒀쒜쒸쓔쓰씌씨아애야얘어에여예오와왜외요우워웨위유으의이자재쟈쟤저제져졔조좌좨죄죠주줘줴쥐쥬즈즤지짜째쨔쨰쩌쩨쪄쪠쪼쫘쫴쬐쬬쭈쭤쮀쮜쮸쯔쯰찌차채챠챼처체쳐쳬초촤쵀최쵸추춰췌취츄츠츼치카캐캬컈커케켜켸코콰쾌쾨쿄쿠쿼퀘퀴큐크킈키타태탸턔터테텨톄토톼퇘퇴툐투퉈퉤튀튜트틔티파패퍄퍠퍼페펴폐포퐈퐤푀표푸풔풰퓌퓨프픠피하해햐햬허헤혀혜호화홰회효후훠훼휘휴흐희히]?[ᅠ-ᆢ]+|[가-힣])[ᆨ-ᇹ]*|[ᄀ-ᅟ]+|[^\p{Cc}\p{Cf}\p{Zl}\p{Zp}])[\p{Mn}\p{Me}\x{09BE}\x{09D7}\x{0B3E}\x{0B57}\x{0BBE}\x{0BD7}\x{0CC2}\x{0CD5}\x{0CD6}\x{0D3E}\x{0D57}\x{0DCF}\x{0DDF}\x{200C}\x{200D}\x{1D165}\x{1D16E}-\x{1D172}]*|[\p{Cc}\p{Cf}\p{Zl}\p{Zp}])";
19
20
    /**
21
     * Bom => Byte-Length
22
     *
23
     * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
24
     *
25
     * @var array<string, int>
26
     */
27
    private static $BOM = [
28
        "\xef\xbb\xbf"     => 3, // UTF-8 BOM
29
        ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
30
        "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
31
        '  þÿ'             => 6, // UTF-32 (BE) BOM as "WINDOWS-1252"
32
        "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
33
        'ÿþ  '             => 6, // UTF-32 (LE) BOM as "WINDOWS-1252"
34
        "\xfe\xff"         => 2, // UTF-16 (BE) BOM
35
        'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
36
        "\xff\xfe"         => 2, // UTF-16 (LE) BOM
37
        'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
38
    ];
39
40
    /**
41
     * Numeric code point => UTF-8 Character
42
     *
43
     * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
44
     *
45
     * @var array<int, string>
46
     */
47
    private static $WHITESPACE = [
48
        // NULL Byte
49
        0 => "\x0",
50
        // Tab
51
        9 => "\x9",
52
        // New Line
53
        10 => "\xa",
54
        // Vertical Tab
55
        11 => "\xb",
56
        // Carriage Return
57
        13 => "\xd",
58
        // Ordinary Space
59
        32 => "\x20",
60
        // NO-BREAK SPACE
61
        160 => "\xc2\xa0",
62
        // OGHAM SPACE MARK
63
        5760 => "\xe1\x9a\x80",
64
        // MONGOLIAN VOWEL SEPARATOR
65
        6158 => "\xe1\xa0\x8e",
66
        // EN QUAD
67
        8192 => "\xe2\x80\x80",
68
        // EM QUAD
69
        8193 => "\xe2\x80\x81",
70
        // EN SPACE
71
        8194 => "\xe2\x80\x82",
72
        // EM SPACE
73
        8195 => "\xe2\x80\x83",
74
        // THREE-PER-EM SPACE
75
        8196 => "\xe2\x80\x84",
76
        // FOUR-PER-EM SPACE
77
        8197 => "\xe2\x80\x85",
78
        // SIX-PER-EM SPACE
79
        8198 => "\xe2\x80\x86",
80
        // FIGURE SPACE
81
        8199 => "\xe2\x80\x87",
82
        // PUNCTUATION SPACE
83
        8200 => "\xe2\x80\x88",
84
        // THIN SPACE
85
        8201 => "\xe2\x80\x89",
86
        // HAIR SPACE
87
        8202 => "\xe2\x80\x8a",
88
        // LINE SEPARATOR
89
        8232 => "\xe2\x80\xa8",
90
        // PARAGRAPH SEPARATOR
91
        8233 => "\xe2\x80\xa9",
92
        // NARROW NO-BREAK SPACE
93
        8239 => "\xe2\x80\xaf",
94
        // MEDIUM MATHEMATICAL SPACE
95
        8287 => "\xe2\x81\x9f",
96
        // HALFWIDTH HANGUL FILLER
97
        65440 => "\xef\xbe\xa0",
98
        // IDEOGRAPHIC SPACE
99
        12288 => "\xe3\x80\x80",
100
    ];
101
102
    /**
103
     * @var array<string, string>
104
     */
105
    private static $WHITESPACE_TABLE = [
106
        'SPACE'                     => "\x20",
107
        'NO-BREAK SPACE'            => "\xc2\xa0",
108
        'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
109
        'EN QUAD'                   => "\xe2\x80\x80",
110
        'EM QUAD'                   => "\xe2\x80\x81",
111
        'EN SPACE'                  => "\xe2\x80\x82",
112
        'EM SPACE'                  => "\xe2\x80\x83",
113
        'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
114
        'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
115
        'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
116
        'FIGURE SPACE'              => "\xe2\x80\x87",
117
        'PUNCTUATION SPACE'         => "\xe2\x80\x88",
118
        'THIN SPACE'                => "\xe2\x80\x89",
119
        'HAIR SPACE'                => "\xe2\x80\x8a",
120
        'LINE SEPARATOR'            => "\xe2\x80\xa8",
121
        'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
122
        'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
123
        'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
124
        'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
125
        'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
126
        'HALFWIDTH HANGUL FILLER'   => "\xef\xbe\xa0",
127
    ];
128
129
    /**
130
     * @var array
131
     *
132
     * @psalm-var array{upper: string[], lower: string[]}
133
     */
134
    private static $COMMON_CASE_FOLD = [
135
        'upper' => [
136
            'µ',
137
            'ſ',
138
            "\xCD\x85",
139
            'ς',
140
            'ẞ',
141
            "\xCF\x90",
142
            "\xCF\x91",
143
            "\xCF\x95",
144
            "\xCF\x96",
145
            "\xCF\xB0",
146
            "\xCF\xB1",
147
            "\xCF\xB5",
148
            "\xE1\xBA\x9B",
149
            "\xE1\xBE\xBE",
150
        ],
151
        'lower' => [
152
            'μ',
153
            's',
154
            'ι',
155
            'σ',
156
            'ß',
157
            'β',
158
            'θ',
159
            'φ',
160
            'π',
161
            'κ',
162
            'ρ',
163
            'ε',
164
            "\xE1\xB9\xA1",
165
            'ι',
166
        ],
167
    ];
168
169
    /**
170
     * @var array
171
     *
172
     * @psalm-var array<string, mixed>
173
     */
174
    private static $SUPPORT = [];
175
176
    /**
177
     * @var string[]|null
178
     *
179
     * @psalm-var array<string, string>|null
180
     */
181
    private static $BROKEN_UTF8_FIX;
182
183
    /**
184
     * @var string[]|null
185
     *
186
     * @psalm-var array<int, string>|null
187
     */
188
    private static $WIN1252_TO_UTF8;
189
190
    /**
191
     * @var string[]|null
192
     *
193
     * @psalm-var array<int ,string>|null
194
     */
195
    private static $INTL_TRANSLITERATOR_LIST;
196
197
    /**
198
     * @var string[]|null
199
     *
200
     * @psalm-var array<string>|null
201
     */
202
    private static $ENCODINGS;
203
204
    /**
205
     * @var int[]|null
206
     *
207
     * @psalm-var array<string ,int>|null
208
     */
209
    private static $ORD;
210
211
    /**
212
     * @var string[]|null
213
     *
214
     * @psalm-var array<string, string>|null
215
     */
216
    private static $EMOJI;
217
218
    /**
219
     * @var string[]|null
220
     *
221
     * @psalm-var array<string>|null
222
     */
223
    private static $EMOJI_VALUES_CACHE;
224
225
    /**
226
     * @var string[]|null
227
     *
228
     * @psalm-var array<string>|null
229
     */
230
    private static $EMOJI_KEYS_CACHE;
231
232
    /**
233
     * @var string[]|null
234
     *
235
     * @psalm-var array<string>|null
236
     */
237
    private static $EMOJI_KEYS_REVERSIBLE_CACHE;
238
239
    /**
240
     * @var string[]|null
241
     *
242
     * @psalm-var array<int, string>|null
243
     */
244
    private static $CHR;
245
246
    /**
247
     * __construct()
248
     */
249 34
    public function __construct()
250
    {
251 34
    }
252
253
    /**
254
     * Return the character at the specified position: $str[1] like functionality.
255
     *
256
     * EXAMPLE: <code>UTF8::access('fòô', 1); // 'ò'</code>
257
     *
258
     * @param string $str      <p>A UTF-8 string.</p>
259
     * @param int    $pos      <p>The position of character to return.</p>
260
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
261
     *
262
     * @psalm-pure
263
     *
264
     * @return string
265
     *                <p>Single multi-byte character.</p>
266
     */
267 3
    public static function access(string $str, int $pos, string $encoding = 'UTF-8'): string
268
    {
269 3
        if ($str === '' || $pos < 0) {
270 2
            return '';
271
        }
272
273 3
        if ($encoding === 'UTF-8') {
274 3
            return (string) \mb_substr($str, $pos, 1);
275
        }
276
277
        return (string) self::substr($str, $pos, 1, $encoding);
278
    }
279
280
    /**
281
     * Prepends UTF-8 BOM character to the string and returns the whole string.
282
     *
283
     * INFO: If BOM already existed there, the Input string is returned.
284
     *
285
     * EXAMPLE: <code>UTF8::add_bom_to_string('fòô'); // "\xEF\xBB\xBF" . 'fòô'</code>
286
     *
287
     * @param string $str <p>The input string.</p>
288
     *
289
     * @psalm-pure
290
     *
291
     * @return string
292
     *                <p>The output string that contains BOM.</p>
293
     */
294 2
    public static function add_bom_to_string(string $str): string
295
    {
296 2
        if (!self::string_has_bom($str)) {
297 2
            $str = self::bom() . $str;
298
        }
299
300 2
        return $str;
301
    }
302
303
    /**
304
     * Changes all keys in an array.
305
     *
306
     * @param array<string, mixed> $array    <p>The array to work on</p>
307
     * @param int                  $case     [optional] <p> Either <strong>CASE_UPPER</strong><br>
308
     *                                       or <strong>CASE_LOWER</strong> (default)</p>
309
     * @param string               $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
310
     *
311
     * @psalm-pure
312
     *
313
     * @return string[]
314
     *                  <p>An array with its keys lower- or uppercased.</p>
315
     */
316 2
    public static function array_change_key_case(
317
        array $array,
318
        int $case = \CASE_LOWER,
319
        string $encoding = 'UTF-8'
320
    ): array {
321
        if (
322 2
            $case !== \CASE_LOWER
323
            &&
324 2
            $case !== \CASE_UPPER
325
        ) {
326
            $case = \CASE_LOWER;
327
        }
328
329 2
        $return = [];
330 2
        foreach ($array as $key => &$value) {
331 2
            $key = $case === \CASE_LOWER
332 2
                ? self::strtolower((string) $key, $encoding)
333 2
                : self::strtoupper((string) $key, $encoding);
334
335 2
            $return[$key] = $value;
336
        }
337
338 2
        return $return;
339
    }
340
341
    /**
342
     * Returns the substring between $start and $end, if found, or an empty
343
     * string. An optional offset may be supplied from which to begin the
344
     * search for the start string.
345
     *
346
     * @param string $str
347
     * @param string $start    <p>Delimiter marking the start of the substring.</p>
348
     * @param string $end      <p>Delimiter marking the end of the substring.</p>
349
     * @param int    $offset   [optional] <p>Index from which to begin the search. Default: 0</p>
350
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
351
     *
352
     * @psalm-pure
353
     *
354
     * @return string
355
     */
356 16
    public static function between(
357
        string $str,
358
        string $start,
359
        string $end,
360
        int $offset = 0,
361
        string $encoding = 'UTF-8'
362
    ): string {
363 16
        if ($encoding === 'UTF-8') {
364 8
            $start_position = \mb_strpos($str, $start, $offset);
365 8
            if ($start_position === false) {
366 1
                return '';
367
            }
368
369 7
            $substr_index = $start_position + (int) \mb_strlen($start);
370 7
            $end_position = \mb_strpos($str, $end, $substr_index);
371
            if (
372 7
                $end_position === false
373
                ||
374 7
                $end_position === $substr_index
375
            ) {
376 2
                return '';
377
            }
378
379 5
            return (string) \mb_substr($str, $substr_index, $end_position - $substr_index);
380
        }
381
382 8
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
383
384 8
        $start_position = self::strpos($str, $start, $offset, $encoding);
385 8
        if ($start_position === false) {
386 1
            return '';
387
        }
388
389 7
        $substr_index = $start_position + (int) self::strlen($start, $encoding);
390 7
        $end_position = self::strpos($str, $end, $substr_index, $encoding);
391
        if (
392 7
            $end_position === false
393
            ||
394 7
            $end_position === $substr_index
395
        ) {
396 2
            return '';
397
        }
398
399 5
        return (string) self::substr(
400 5
            $str,
401 5
            $substr_index,
402 5
            $end_position - $substr_index,
403 5
            $encoding
404
        );
405
    }
406
407
    /**
408
     * Convert binary into a string.
409
     *
410
     * INFO: opposite to UTF8::str_to_binary()
411
     *
412
     * EXAMPLE: <code>UTF8::binary_to_str('11110000100111111001100010000011'); // '😃'</code>
413
     *
414
     * @param string $bin 1|0
415
     *
416
     * @psalm-pure
417
     *
418
     * @return string
419
     */
420 2
    public static function binary_to_str($bin): string
421
    {
422 2
        if (!isset($bin[0])) {
423
            return '';
424
        }
425
426 2
        $convert = \base_convert($bin, 2, 16);
427 2
        if ($convert === '0') {
428 1
            return '';
429
        }
430
431 2
        return \pack('H*', $convert);
432
    }
433
434
    /**
435
     * Returns the UTF-8 Byte Order Mark Character.
436
     *
437
     * INFO: take a look at UTF8::$bom for e.g. UTF-16 and UTF-32 BOM values
438
     *
439
     * EXAMPLE: <code>UTF8::bom(); // "\xEF\xBB\xBF"</code>
440
     *
441
     * @psalm-pure
442
     *
443
     * @return string
444
     *                <p>UTF-8 Byte Order Mark.</p>
445
     */
446 4
    public static function bom(): string
447
    {
448 4
        return "\xef\xbb\xbf";
449
    }
450
451
    /**
452
     * @alias of UTF8::chr_map()
453
     *
454
     * @param callable $callback
455
     * @param string   $str
456
     *
457
     * @psalm-pure
458
     *
459
     * @return string[]
460
     *
461
     * @see   UTF8::chr_map()
462
     */
463 2
    public static function callback($callback, string $str): array
464
    {
465 2
        return self::chr_map($callback, $str);
466
    }
467
468
    /**
469
     * Returns the character at $index, with indexes starting at 0.
470
     *
471
     * @param string $str      <p>The input string.</p>
472
     * @param int    $index    <p>Position of the character.</p>
473
     * @param string $encoding [optional] <p>Default is UTF-8</p>
474
     *
475
     * @psalm-pure
476
     *
477
     * @return string
478
     *                <p>The character at $index.</p>
479
     */
480 9
    public static function char_at(string $str, int $index, string $encoding = 'UTF-8'): string
481
    {
482 9
        if ($encoding === 'UTF-8') {
483 5
            return (string) \mb_substr($str, $index, 1);
484
        }
485
486 4
        return (string) self::substr($str, $index, 1, $encoding);
487
    }
488
489
    /**
490
     * Returns an array consisting of the characters in the string.
491
     *
492
     * @param string $str <p>The input string.</p>
493
     *
494
     * @psalm-pure
495
     *
496
     * @return string[]
497
     *                  <p>An array of chars.</p>
498
     */
499 4
    public static function chars(string $str): array
500
    {
501
        /** @var string[] */
502 4
        return self::str_split($str);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::str_split($str) returns an array which contains values of type array|string[] which are incompatible with the documented value type string.
Loading history...
503
    }
504
505
    /**
506
     * This method will auto-detect your server environment for UTF-8 support.
507
     *
508
     * @return true|null
509
     *
510
     * @internal <p>You don't need to run it manually, it will be triggered if it's needed.</p>
511
     */
512 5
    public static function checkForSupport()
513
    {
514 5
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
515
            self::$SUPPORT['already_checked_via_portable_utf8'] = true;
516
517
            // http://php.net/manual/en/book.mbstring.php
518
            self::$SUPPORT['mbstring'] = self::mbstring_loaded();
519
            self::$SUPPORT['mbstring_func_overload'] = self::mbstring_overloaded();
520
            if (self::$SUPPORT['mbstring'] === true) {
521
                \mb_internal_encoding('UTF-8');
522
                /** @noinspection UnusedFunctionResultInspection */
523
                /** @noinspection PhpComposerExtensionStubsInspection */
524
                \mb_regex_encoding('UTF-8');
525
                self::$SUPPORT['mbstring_internal_encoding'] = 'UTF-8';
526
            }
527
528
            // http://php.net/manual/en/book.iconv.php
529
            self::$SUPPORT['iconv'] = self::iconv_loaded();
530
531
            // http://php.net/manual/en/book.intl.php
532
            self::$SUPPORT['intl'] = self::intl_loaded();
533
534
            // http://php.net/manual/en/class.intlchar.php
535
            self::$SUPPORT['intlChar'] = self::intlChar_loaded();
536
537
            // http://php.net/manual/en/book.ctype.php
538
            self::$SUPPORT['ctype'] = self::ctype_loaded();
539
540
            // http://php.net/manual/en/class.finfo.php
541
            self::$SUPPORT['finfo'] = self::finfo_loaded();
542
543
            // http://php.net/manual/en/book.json.php
544
            self::$SUPPORT['json'] = self::json_loaded();
545
546
            // http://php.net/manual/en/book.pcre.php
547
            self::$SUPPORT['pcre_utf8'] = self::pcre_utf8_support();
548
549
            self::$SUPPORT['symfony_polyfill_used'] = self::symfony_polyfill_used();
550
            if (self::$SUPPORT['symfony_polyfill_used'] === true) {
551
                \mb_internal_encoding('UTF-8');
552
                self::$SUPPORT['mbstring_internal_encoding'] = 'UTF-8';
553
            }
554
555
            return true;
556
        }
557
558 5
        return null;
559
    }
560
561
    /**
562
     * Generates a UTF-8 encoded character from the given code point.
563
     *
564
     * INFO: opposite to UTF8::ord()
565
     *
566
     * EXAMPLE: <code>UTF8::chr(0x2603); // '☃'</code>
567
     *
568
     * @param int    $code_point <p>The code point for which to generate a character.</p>
569
     * @param string $encoding   [optional] <p>Default is UTF-8</p>
570
     *
571
     * @psalm-pure
572
     *
573
     * @return string|null
574
     *                     <p>Multi-byte character, returns null on failure or empty input.</p>
575
     */
576 21
    public static function chr($code_point, string $encoding = 'UTF-8')
577
    {
578
        // init
579
        /**
580
         * @psalm-suppress ImpureStaticVariable
581
         *
582
         * @var array<string,string>
583
         */
584 21
        static $CHAR_CACHE = [];
585
586 21
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
587 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
588
        }
589
590
        /** @noinspection InArrayCanBeUsedInspection */
591
        if (
592 21
            $encoding !== 'UTF-8'
593
            &&
594 21
            $encoding !== 'ISO-8859-1'
595
            &&
596 21
            $encoding !== 'WINDOWS-1252'
597
            &&
598 21
            self::$SUPPORT['mbstring'] === false
599
        ) {
600
            /**
601
             * @psalm-suppress ImpureFunctionCall - is is only a warning
602
             */
603
            \trigger_error('UTF8::chr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
604
        }
605
606 21
        if ($code_point <= 0) {
607 5
            return null;
608
        }
609
610 21
        $cache_key = $code_point . '_' . $encoding;
611 21
        if (isset($CHAR_CACHE[$cache_key])) {
612 19
            return $CHAR_CACHE[$cache_key];
613
        }
614
615 10
        if ($code_point <= 0x80) { // only for "simple"-chars
616
617 9
            if (self::$CHR === null) {
618
                self::$CHR = self::getData('chr');
619
            }
620
621
            /**
622
             * @psalm-suppress PossiblyNullArrayAccess
623
             */
624 9
            $chr = self::$CHR[$code_point];
625
626 9
            if ($encoding !== 'UTF-8') {
627 1
                $chr = self::encode($encoding, $chr);
628
            }
629
630 9
            return $CHAR_CACHE[$cache_key] = $chr;
631
        }
632
633
        //
634
        // fallback via "IntlChar"
635
        //
636
637 6
        if (self::$SUPPORT['intlChar'] === true) {
638
            /** @noinspection PhpComposerExtensionStubsInspection */
639 6
            $chr = \IntlChar::chr($code_point);
640
641 6
            if ($encoding !== 'UTF-8') {
642
                $chr = self::encode($encoding, $chr);
643
            }
644
645 6
            return $CHAR_CACHE[$cache_key] = $chr;
646
        }
647
648
        //
649
        // fallback via vanilla php
650
        //
651
652
        if (self::$CHR === null) {
653
            self::$CHR = self::getData('chr');
654
        }
655
656
        $code_point = (int) $code_point;
657
        if ($code_point <= 0x7FF) {
658
            /**
659
             * @psalm-suppress PossiblyNullArrayAccess
660
             */
661
            $chr = self::$CHR[($code_point >> 6) + 0xC0] .
662
                   self::$CHR[($code_point & 0x3F) + 0x80];
663
        } elseif ($code_point <= 0xFFFF) {
664
            /**
665
             * @psalm-suppress PossiblyNullArrayAccess
666
             */
667
            $chr = self::$CHR[($code_point >> 12) + 0xE0] .
668
                   self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
669
                   self::$CHR[($code_point & 0x3F) + 0x80];
670
        } else {
671
            /**
672
             * @psalm-suppress PossiblyNullArrayAccess
673
             */
674
            $chr = self::$CHR[($code_point >> 18) + 0xF0] .
675
                   self::$CHR[(($code_point >> 12) & 0x3F) + 0x80] .
676
                   self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
677
                   self::$CHR[($code_point & 0x3F) + 0x80];
678
        }
679
680
        if ($encoding !== 'UTF-8') {
681
            $chr = self::encode($encoding, $chr);
682
        }
683
684
        return $CHAR_CACHE[$cache_key] = $chr;
685
    }
686
687
    /**
688
     * Applies callback to all characters of a string.
689
     *
690
     * EXAMPLE: <code>UTF8::chr_map([UTF8::class, 'strtolower'], 'Κόσμε'); // ['κ','ό', 'σ', 'μ', 'ε']</code>
691
     *
692
     * @param callable $callback <p>The callback function.</p>
693
     * @param string   $str      <p>UTF-8 string to run callback on.</p>
694
     *
695
     * @psalm-pure
696
     *
697
     * @return string[]
698
     *                  <p>The outcome of the callback, as array.</p>
699
     */
700 2
    public static function chr_map($callback, string $str): array
701
    {
702 2
        return \array_map(
703 2
            $callback,
704 2
            self::str_split($str)
705
        );
706
    }
707
708
    /**
709
     * Generates an array of byte length of each character of a Unicode string.
710
     *
711
     * 1 byte => U+0000  - U+007F
712
     * 2 byte => U+0080  - U+07FF
713
     * 3 byte => U+0800  - U+FFFF
714
     * 4 byte => U+10000 - U+10FFFF
715
     *
716
     * EXAMPLE: <code>UTF8::chr_size_list('中文空白-test'); // [3, 3, 3, 3, 1, 1, 1, 1, 1]</code>
717
     *
718
     * @param string $str <p>The original unicode string.</p>
719
     *
720
     * @psalm-pure
721
     *
722
     * @return int[]
723
     *               <p>An array of byte lengths of each character.</p>
724
     */
725 4
    public static function chr_size_list(string $str): array
726
    {
727 4
        if ($str === '') {
728 4
            return [];
729
        }
730
731 4
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
732
            return \array_map(
733
                static function (string $data): int {
734
                    // "mb_" is available if overload is used, so use it ...
735
                    return \mb_strlen($data, 'CP850'); // 8-BIT
736
                },
737
                self::str_split($str)
738
            );
739
        }
740
741 4
        return \array_map('\strlen', self::str_split($str));
742
    }
743
744
    /**
745
     * Get a decimal code representation of a specific character.
746
     *
747
     * INFO: opposite to UTF8::decimal_to_chr()
748
     *
749
     * EXAMPLE: <code>UTF8::chr_to_decimal('§'); // 0xa7</code>
750
     *
751
     * @param string $char <p>The input character.</p>
752
     *
753
     * @psalm-pure
754
     *
755
     * @return int
756
     */
757 5
    public static function chr_to_decimal(string $char): int
758
    {
759 5
        if (self::$SUPPORT['iconv'] === true) {
760 5
            $chr_tmp = \iconv('UTF-8', 'UCS-4LE', $char);
761 5
            if ($chr_tmp !== false) {
762
                /** @noinspection OffsetOperationsInspection */
763 5
                return \unpack('V', $chr_tmp)[1];
764
            }
765
        }
766
767
        $code = self::ord($char[0]);
768
        $bytes = 1;
769
770
        if (!($code & 0x80)) {
771
            // 0xxxxxxx
772
            return $code;
773
        }
774
775
        if (($code & 0xe0) === 0xc0) {
776
            // 110xxxxx
777
            $bytes = 2;
778
            $code &= ~0xc0;
779
        } elseif (($code & 0xf0) === 0xe0) {
780
            // 1110xxxx
781
            $bytes = 3;
782
            $code &= ~0xe0;
783
        } elseif (($code & 0xf8) === 0xf0) {
784
            // 11110xxx
785
            $bytes = 4;
786
            $code &= ~0xf0;
787
        }
788
789
        for ($i = 2; $i <= $bytes; ++$i) {
790
            // 10xxxxxx
791
            $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
792
        }
793
794
        return $code;
795
    }
796
797
    /**
798
     * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
799
     *
800
     * EXAMPLE: <code>UTF8::chr_to_hex('§'); // U+00a7</code>
801
     *
802
     * @param int|string $char   <p>The input character</p>
803
     * @param string     $prefix [optional]
804
     *
805
     * @psalm-pure
806
     *
807
     * @return string
808
     *                <p>The code point encoded as U+xxxx.</p>
809
     */
810 2
    public static function chr_to_hex($char, string $prefix = 'U+'): string
811
    {
812 2
        if ($char === '') {
813 2
            return '';
814
        }
815
816 2
        if ($char === '&#0;') {
817 2
            $char = '';
818
        }
819
820 2
        return self::int_to_hex(self::ord((string) $char), $prefix);
821
    }
822
823
    /**
824
     * alias for "UTF8::chr_to_decimal()"
825
     *
826
     * @param string $chr
827
     *
828
     * @psalm-pure
829
     *
830
     * @return int
831
     *
832
     * @see        UTF8::chr_to_decimal()
833
     * @deprecated <p>please use "UTF8::chr_to_decimal()"</p>
834
     */
835 2
    public static function chr_to_int(string $chr): int
836
    {
837 2
        return self::chr_to_decimal($chr);
838
    }
839
840
    /**
841
     * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
842
     *
843
     * EXAMPLE: <code>UTF8::chunk_split('ABC-ÖÄÜ-中文空白-κόσμε', 3); // "ABC\r\n-ÖÄ\r\nÜ-中\r\n文空白\r\n-κό\r\nσμε"</code>
844
     *
845
     * @param string $body         <p>The original string to be split.</p>
846
     * @param int    $chunk_length [optional] <p>The maximum character length of a chunk.</p>
847
     * @param string $end          [optional] <p>The character(s) to be inserted at the end of each chunk.</p>
848
     *
849
     * @psalm-pure
850
     *
851
     * @return string
852
     *                <p>The chunked string.</p>
853
     */
854 4
    public static function chunk_split(string $body, int $chunk_length = 76, string $end = "\r\n"): string
855
    {
856 4
        return \implode($end, self::str_split($body, $chunk_length));
857
    }
858
859
    /**
860
     * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
861
     *
862
     * EXAMPLE: <code>UTF8::clean("\xEF\xBB\xBF„Abcdef\xc2\xa0\x20…” — 😃 - Düsseldorf", true, true); // '„Abcdef  …” — 😃 - Düsseldorf'</code>
863
     *
864
     * @param string $str                                     <p>The string to be sanitized.</p>
865
     * @param bool   $remove_bom                              [optional] <p>Set to true, if you need to remove
866
     *                                                        UTF-BOM.</p>
867
     * @param bool   $normalize_whitespace                    [optional] <p>Set to true, if you need to normalize the
868
     *                                                        whitespace.</p>
869
     * @param bool   $normalize_msword                        [optional] <p>Set to true, if you need to normalize MS
870
     *                                                        Word chars e.g.: "…"
871
     *                                                        => "..."</p>
872
     * @param bool   $keep_non_breaking_space                 [optional] <p>Set to true, to keep non-breaking-spaces,
873
     *                                                        in
874
     *                                                        combination with
875
     *                                                        $normalize_whitespace</p>
876
     * @param bool   $replace_diamond_question_mark           [optional] <p>Set to true, if you need to remove diamond
877
     *                                                        question mark e.g.: "�"</p>
878
     * @param bool   $remove_invisible_characters             [optional] <p>Set to false, if you not want to remove
879
     *                                                        invisible characters e.g.: "\0"</p>
880
     * @param bool   $remove_invisible_characters_url_encoded [optional] <p>Set to true, if you not want to remove
881
     *                                                        invisible url encoded characters e.g.: "%0B"<br> WARNING:
882
     *                                                        maybe contains false-positives e.g. aa%0Baa -> aaaa.
883
     *                                                        </p>
884
     *
885
     * @psalm-pure
886
     *
887
     * @return string
888
     *                <p>An clean UTF-8 encoded string.</p>
889
     *
890
     * @noinspection PhpTooManyParametersInspection
891
     */
892 89
    public static function clean(
893
        string $str,
894
        bool $remove_bom = false,
895
        bool $normalize_whitespace = false,
896
        bool $normalize_msword = false,
897
        bool $keep_non_breaking_space = false,
898
        bool $replace_diamond_question_mark = false,
899
        bool $remove_invisible_characters = true,
900
        bool $remove_invisible_characters_url_encoded = false
901
    ): string {
902
        // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
903
        // caused connection reset problem on larger strings
904
905 89
        $regex = '/
906
          (
907
            (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
908
            |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
909
            |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
910
            |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
911
            ){1,100}                      # ...one or more times
912
          )
913
        | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
914
        | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
915
        /x';
916
        /** @noinspection NotOptimalRegularExpressionsInspection */
917 89
        $str = (string) \preg_replace($regex, '$1', $str);
918
919 89
        if ($replace_diamond_question_mark) {
920 33
            $str = self::replace_diamond_question_mark($str);
921
        }
922
923 89
        if ($remove_invisible_characters) {
924 89
            $str = self::remove_invisible_characters($str, $remove_invisible_characters_url_encoded);
925
        }
926
927 89
        if ($normalize_whitespace) {
928 37
            $str = self::normalize_whitespace($str, $keep_non_breaking_space);
929
        }
930
931 89
        if ($normalize_msword) {
932 4
            $str = self::normalize_msword($str);
933
        }
934
935 89
        if ($remove_bom) {
936 37
            $str = self::remove_bom($str);
937
        }
938
939 89
        return $str;
940
    }
941
942
    /**
943
     * Clean-up a string and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
944
     *
945
     * EXAMPLE: <code>UTF8::cleanup("\xEF\xBB\xBF„Abcdef\xc2\xa0\x20…” — 😃 - Düsseldorf", true, true); // '„Abcdef  …” — 😃 - Düsseldorf'</code>
946
     *
947
     * @param string $str <p>The input string.</p>
948
     *
949
     * @psalm-pure
950
     *
951
     * @return string
952
     */
953 33
    public static function cleanup($str): string
954
    {
955
        // init
956 33
        $str = (string) $str;
957
958 33
        if ($str === '') {
959 5
            return '';
960
        }
961
962
        // fixed ISO <-> UTF-8 Errors
963 33
        $str = self::fix_simple_utf8($str);
964
965
        // remove all none UTF-8 symbols
966
        // && remove diamond question mark (�)
967
        // && remove remove invisible characters (e.g. "\0")
968
        // && remove BOM
969
        // && normalize whitespace chars (but keep non-breaking-spaces)
970 33
        return self::clean(
971 33
            $str,
972 33
            true,
973 33
            true,
974 33
            false,
975 33
            true,
976 33
            true
977
        );
978
    }
979
980
    /**
981
     * Accepts a string or a array of strings and returns an array of Unicode code points.
982
     *
983
     * INFO: opposite to UTF8::string()
984
     *
985
     * EXAMPLE: <code>
986
     * UTF8::codepoints('κöñ'); // array(954, 246, 241)
987
     * // ... OR ...
988
     * UTF8::codepoints('κöñ', true); // array('U+03ba', 'U+00f6', 'U+00f1')
989
     * </code>
990
     *
991
     * @param string|string[] $arg         <p>A UTF-8 encoded string or an array of such strings.</p>
992
     * @param bool            $use_u_style <p>If True, will return code points in U+xxxx format,
993
     *                                     default, code points will be returned as integers.</p>
994
     *
995
     * @psalm-pure
996
     *
997
     * @return int[]|string[]
998
     *                        <p>
999
     *                        The array of code points:<br>
1000
     *                        int[] for $u_style === false<br>
1001
     *                        string[] for $u_style === true<br>
1002
     *                        </p>
1003
     */
1004 12
    public static function codepoints($arg, bool $use_u_style = false): array
1005
    {
1006 12
        if (\is_string($arg)) {
1007 12
            $arg = self::str_split($arg);
1008
        }
1009
1010
        /**
1011
         * @psalm-suppress DocblockTypeContradiction
1012
         */
1013 12
        if (!\is_array($arg)) {
0 ignored issues
show
introduced by
The condition is_array($arg) is always true.
Loading history...
1014 4
            return [];
1015
        }
1016
1017 12
        if ($arg === []) {
1018 7
            return [];
1019
        }
1020
1021 11
        $arg = \array_map(
1022
            [
1023 11
                self::class,
1024
                'ord',
1025
            ],
1026 11
            $arg
1027
        );
1028
1029 11
        if ($use_u_style) {
1030 2
            $arg = \array_map(
1031
                [
1032 2
                    self::class,
1033
                    'int_to_hex',
1034
                ],
1035 2
                $arg
1036
            );
1037
        }
1038
1039 11
        return $arg;
1040
    }
1041
1042
    /**
1043
     * Trims the string and replaces consecutive whitespace characters with a
1044
     * single space. This includes tabs and newline characters, as well as
1045
     * multibyte whitespace such as the thin space and ideographic space.
1046
     *
1047
     * @param string $str <p>The input string.</p>
1048
     *
1049
     * @psalm-pure
1050
     *
1051
     * @return string
1052
     *                <p>A string with trimmed $str and condensed whitespace.</p>
1053
     */
1054 13
    public static function collapse_whitespace(string $str): string
1055
    {
1056 13
        if (self::$SUPPORT['mbstring'] === true) {
1057
            /** @noinspection PhpComposerExtensionStubsInspection */
1058 13
            return \trim((string) \mb_ereg_replace('[[:space:]]+', ' ', $str));
1059
        }
1060
1061
        return \trim(self::regex_replace($str, '[[:space:]]+', ' '));
1062
    }
1063
1064
    /**
1065
     * Returns count of characters used in a string.
1066
     *
1067
     * EXAMPLE: <code>UTF8::count_chars('κaκbκc'); // array('κ' => 3, 'a' => 1, 'b' => 1, 'c' => 1)</code>
1068
     *
1069
     * @param string $str                     <p>The input string.</p>
1070
     * @param bool   $clean_utf8              [optional] <p>Remove non UTF-8 chars from the string.</p>
1071
     * @param bool   $try_to_use_mb_functions [optional] <p>Set to false, if you don't want to use
1072
     *
1073
     * @psalm-pure
1074
     *
1075
     * @return int[]
1076
     *               <p>An associative array of Character as keys and
1077
     *               their count as values.</p>
1078
     */
1079 19
    public static function count_chars(
1080
        string $str,
1081
        bool $clean_utf8 = false,
1082
        bool $try_to_use_mb_functions = true
1083
    ): array {
1084 19
        return \array_count_values(
1085 19
            self::str_split(
1086 19
                $str,
1087 19
                1,
1088 19
                $clean_utf8,
1089 19
                $try_to_use_mb_functions
1090
            )
1091
        );
1092
    }
1093
1094
    /**
1095
     * Create a valid CSS identifier for e.g. "class"- or "id"-attributes.
1096
     *
1097
     * EXAMPLE: <code>UTF8::css_identifier('123foo/bar!!!'); // _23foo-bar</code>
1098
     *
1099
     * copy&past from https://github.com/drupal/core/blob/8.8.x/lib/Drupal/Component/Utility/Html.php#L95
1100
     *
1101
     * @param string   $str        <p>INFO: if no identifier is given e.g. " " or "", we will create a unique string automatically</p>
1102
     * @param string[] $filter
1103
     * @param bool     $strip_tags
1104
     * @param bool     $strtolower
1105
     *
1106
     * @psalm-pure
1107
     *
1108
     * @return string
1109
     *
1110
     * @psalm-param array<string,string> $filter
1111
     */
1112 1
    public static function css_identifier(
1113
        string $str = '',
1114
        array $filter = [
1115
            ' ' => '-',
1116
            '/' => '-',
1117
            '[' => '',
1118
            ']' => '',
1119
        ],
1120
        bool $strip_tags = false,
1121
        bool $strtolower = true
1122
    ): string {
1123
        // We could also use strtr() here but its much slower than str_replace(). In
1124
        // order to keep '__' to stay '__' we first replace it with a different
1125
        // placeholder after checking that it is not defined as a filter.
1126 1
        $double_underscore_replacements = 0;
1127
1128
        // Fallback ...
1129 1
        if (\trim($str) === '') {
1130 1
            $str = \uniqid('auto-generated-css-class', true);
1131
        } else {
1132 1
            $str = self::clean($str);
1133
        }
1134
1135 1
        if ($strip_tags) {
1136
            $str = \strip_tags($str);
1137
        }
1138
1139 1
        if ($strtolower) {
1140 1
            $str = \strtolower($str);
1141
        }
1142
1143 1
        if (!isset($filter['__'])) {
1144 1
            $str = \str_replace('__', '##', $str, $double_underscore_replacements);
1145
        }
1146
1147
        /* @noinspection ArrayValuesMissUseInspection */
1148 1
        $str = \str_replace(\array_keys($filter), \array_values($filter), $str);
1149
        // Replace temporary placeholder '##' with '__' only if the original
1150
        // $identifier contained '__'.
1151 1
        if ($double_underscore_replacements > 0) {
1152
            $str = \str_replace('##', '__', $str);
1153
        }
1154
1155
        // Valid characters in a CSS identifier are:
1156
        // - the hyphen (U+002D)
1157
        // - a-z (U+0030 - U+0039)
1158
        // - A-Z (U+0041 - U+005A)
1159
        // - the underscore (U+005F)
1160
        // - 0-9 (U+0061 - U+007A)
1161
        // - ISO 10646 characters U+00A1 and higher
1162
        // We strip out any character not in the above list.
1163 1
        $str = (string) \preg_replace('/[^\x{002D}\x{0030}-\x{0039}\x{0041}-\x{005A}\x{005F}\x{0061}-\x{007A}\x{00A1}-\x{FFFF}]/u', '', $str);
1164
        // Identifiers cannot start with a digit, two hyphens, or a hyphen followed by a digit.
1165 1
        $str = (string) \preg_replace(['/^[0-9]/', '/^(-[0-9])|^(--)/'], ['_', '__'], $str);
1166
1167 1
        return \trim($str, '-');
1168
    }
1169
1170
    /**
1171
     * Remove css media-queries.
1172
     *
1173
     * @param string $str
1174
     *
1175
     * @psalm-pure
1176
     *
1177
     * @return string
1178
     */
1179 1
    public static function css_stripe_media_queries(string $str): string
1180
    {
1181 1
        return (string) \preg_replace(
1182 1
            '#@media\\s+(?:only\\s)?(?:[\\s{(]|screen|all)\\s?[^{]+{.*}\\s*}\\s*#isumU',
1183 1
            '',
1184 1
            $str
1185
        );
1186
    }
1187
1188
    /**
1189
     * Checks whether ctype is available on the server.
1190
     *
1191
     * @psalm-pure
1192
     *
1193
     * @return bool
1194
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
1195
     *
1196
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
1197
     */
1198
    public static function ctype_loaded(): bool
1199
    {
1200
        return \extension_loaded('ctype');
1201
    }
1202
1203
    /**
1204
     * Converts an int value into a UTF-8 character.
1205
     *
1206
     * INFO: opposite to UTF8::string()
1207
     *
1208
     * EXAMPLE: <code>UTF8::decimal_to_chr(931); // 'Σ'</code>
1209
     *
1210
     * @param int|string $int
1211
     *
1212
     * @psalm-param int|numeric-string $int
1213
     *
1214
     * @psalm-pure
1215
     *
1216
     * @return string
1217
     */
1218 20
    public static function decimal_to_chr($int): string
1219
    {
1220 20
        return self::html_entity_decode('&#' . $int . ';', \ENT_QUOTES | \ENT_HTML5);
1221
    }
1222
1223
    /**
1224
     * Decodes a MIME header field
1225
     *
1226
     * @param string $str
1227
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
1228
     *
1229
     * @psalm-pure
1230
     *
1231
     * @return false|string
1232
     *                      <p>A decoded MIME field on success,
1233
     *                      or false if an error occurs during the decoding.</p>
1234
     */
1235 2
    public static function decode_mimeheader($str, string $encoding = 'UTF-8')
1236
    {
1237 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
1238 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
1239
        }
1240
1241
        // always fallback via symfony polyfill
1242 2
        return \iconv_mime_decode($str, \ICONV_MIME_DECODE_CONTINUE_ON_ERROR, $encoding);
1243
    }
1244
1245
    /**
1246
     * Convert any two-letter country code (ISO 3166-1) to the corresponding Emoji.
1247
     *
1248
     * @see https://en.wikipedia.org/wiki/ISO_3166-1
1249
     *
1250
     * @param string $country_code_iso_3166_1 <p>e.g. DE</p>
1251
     *
1252
     * @return string
1253
     *                <p>Emoji or empty string on error.</p>
1254
     */
1255 1
    public static function emoji_from_country_code(string $country_code_iso_3166_1): string
1256
    {
1257 1
        if ($country_code_iso_3166_1 === '') {
1258 1
            return '';
1259
        }
1260
1261 1
        if (self::strlen($country_code_iso_3166_1) !== 2) {
1262 1
            return '';
1263
        }
1264
1265 1
        $country_code_iso_3166_1 = \strtoupper($country_code_iso_3166_1);
1266
1267 1
        $flagOffset = 0x1F1E6;
1268 1
        $asciiOffset = 0x41;
1269
1270 1
        return (self::chr((self::ord($country_code_iso_3166_1[0]) - $asciiOffset + $flagOffset)) ?? '') .
1271 1
               (self::chr((self::ord($country_code_iso_3166_1[1]) - $asciiOffset + $flagOffset)) ?? '');
1272
    }
1273
1274
    /**
1275
     * Decodes a string which was encoded by "UTF8::emoji_encode()".
1276
     *
1277
     * INFO: opposite to UTF8::emoji_encode()
1278
     *
1279
     * EXAMPLE: <code>
1280
     * UTF8::emoji_decode('foo CHARACTER_OGRE', false); // 'foo 👹'
1281
     * //
1282
     * UTF8::emoji_decode('foo _-_PORTABLE_UTF8_-_308095726_-_627590803_-_8FTU_ELBATROP_-_', true); // 'foo 👹'
1283
     * </code>
1284
     *
1285
     * @param string $str                            <p>The input string.</p>
1286
     * @param bool   $use_reversible_string_mappings [optional] <p>
1287
     *                                               When <b>TRUE</b>, we se a reversible string mapping
1288
     *                                               between "emoji_encode" and "emoji_decode".</p>
1289
     *
1290
     * @psalm-pure
1291
     *
1292
     * @return string
1293
     */
1294 9
    public static function emoji_decode(
1295
        string $str,
1296
        bool $use_reversible_string_mappings = false
1297
    ): string {
1298 9
        self::initEmojiData();
1299
1300 9
        if ($use_reversible_string_mappings) {
1301 9
            return (string) \str_replace(
1302 9
                (array) self::$EMOJI_KEYS_REVERSIBLE_CACHE,
1303 9
                (array) self::$EMOJI_VALUES_CACHE,
1304 9
                $str
1305
            );
1306
        }
1307
1308 1
        return (string) \str_replace(
1309 1
            (array) self::$EMOJI_KEYS_CACHE,
1310 1
            (array) self::$EMOJI_VALUES_CACHE,
1311 1
            $str
1312
        );
1313
    }
1314
1315
    /**
1316
     * Encode a string with emoji chars into a non-emoji string.
1317
     *
1318
     * INFO: opposite to UTF8::emoji_decode()
1319
     *
1320
     * EXAMPLE: <code>
1321
     * UTF8::emoji_encode('foo 👹', false)); // 'foo CHARACTER_OGRE'
1322
     * //
1323
     * UTF8::emoji_encode('foo 👹', true)); // 'foo _-_PORTABLE_UTF8_-_308095726_-_627590803_-_8FTU_ELBATROP_-_'
1324
     * </code>
1325
     *
1326
     * @param string $str                            <p>The input string</p>
1327
     * @param bool   $use_reversible_string_mappings [optional] <p>
1328
     *                                               when <b>TRUE</b>, we use a reversible string mapping
1329
     *                                               between "emoji_encode" and "emoji_decode"</p>
1330
     *
1331
     * @psalm-pure
1332
     *
1333
     * @return string
1334
     */
1335 12
    public static function emoji_encode(
1336
        string $str,
1337
        bool $use_reversible_string_mappings = false
1338
    ): string {
1339 12
        self::initEmojiData();
1340
1341 12
        if ($use_reversible_string_mappings) {
1342 9
            return (string) \str_replace(
1343 9
                (array) self::$EMOJI_VALUES_CACHE,
1344 9
                (array) self::$EMOJI_KEYS_REVERSIBLE_CACHE,
1345 9
                $str
1346
            );
1347
        }
1348
1349 4
        return (string) \str_replace(
1350 4
            (array) self::$EMOJI_VALUES_CACHE,
1351 4
            (array) self::$EMOJI_KEYS_CACHE,
1352 4
            $str
1353
        );
1354
    }
1355
1356
    /**
1357
     * Encode a string with a new charset-encoding.
1358
     *
1359
     * INFO:  This function will also try to fix broken / double encoding,
1360
     *        so you can call this function also on a UTF-8 string and you don't mess up the string.
1361
     *
1362
     * EXAMPLE: <code>
1363
     * UTF8::encode('ISO-8859-1', '-ABC-中文空白-'); // '-ABC-????-'
1364
     * //
1365
     * UTF8::encode('UTF-8', '-ABC-中文空白-'); // '-ABC-中文空白-'
1366
     * //
1367
     * UTF8::encode('HTML', '-ABC-中文空白-'); // '-ABC-&#20013;&#25991;&#31354;&#30333;-'
1368
     * //
1369
     * UTF8::encode('BASE64', '-ABC-中文空白-'); // 'LUFCQy3kuK3mlofnqbrnmb0t'
1370
     * </code>
1371
     *
1372
     * @param string $to_encoding                   <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.</p>
1373
     * @param string $str                           <p>The input string</p>
1374
     * @param bool   $auto_detect_the_from_encoding [optional] <p>Force the new encoding (we try to fix broken / double
1375
     *                                              encoding for UTF-8)<br> otherwise we auto-detect the current
1376
     *                                              string-encoding</p>
1377
     * @param string $from_encoding                 [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
1378
     *                                              A empty string will trigger the autodetect anyway.</p>
1379
     *
1380
     * @psalm-pure
1381
     *
1382
     * @return string
1383
     *
1384
     * @psalm-suppress InvalidReturnStatement
1385
     */
1386 29
    public static function encode(
1387
        string $to_encoding,
1388
        string $str,
1389
        bool $auto_detect_the_from_encoding = true,
1390
        string $from_encoding = ''
1391
    ): string {
1392 29
        if ($str === '' || $to_encoding === '') {
1393 13
            return $str;
1394
        }
1395
1396 29
        if ($to_encoding !== 'UTF-8' && $to_encoding !== 'CP850') {
1397 7
            $to_encoding = self::normalize_encoding($to_encoding, 'UTF-8');
1398
        }
1399
1400 29
        if ($from_encoding && $from_encoding !== 'UTF-8' && $from_encoding !== 'CP850') {
1401 2
            $from_encoding = self::normalize_encoding($from_encoding);
1402
        }
1403
1404
        if (
1405 29
            $to_encoding
1406
            &&
1407 29
            $from_encoding
1408
            &&
1409 29
            $from_encoding === $to_encoding
1410
        ) {
1411
            return $str;
1412
        }
1413
1414 29
        if ($to_encoding === 'JSON') {
1415 1
            $return = self::json_encode($str);
1416 1
            if ($return === false) {
1417
                throw new \InvalidArgumentException('The input string [' . $str . '] can not be used for json_encode().');
1418
            }
1419
1420 1
            return $return;
1421
        }
1422 29
        if ($from_encoding === 'JSON') {
1423 1
            $str = self::json_decode($str);
1424 1
            $from_encoding = '';
1425
        }
1426
1427 29
        if ($to_encoding === 'BASE64') {
1428 2
            return \base64_encode($str);
1429
        }
1430 29
        if ($from_encoding === 'BASE64') {
1431 2
            $str = \base64_decode($str, true);
1432 2
            $from_encoding = '';
1433
        }
1434
1435 29
        if ($to_encoding === 'HTML-ENTITIES') {
1436 2
            return self::html_encode($str, true);
1437
        }
1438 29
        if ($from_encoding === 'HTML-ENTITIES') {
1439 2
            $str = self::html_entity_decode($str, \ENT_COMPAT);
1440 2
            $from_encoding = '';
1441
        }
1442
1443 29
        $from_encoding_auto_detected = false;
1444
        if (
1445 29
            $auto_detect_the_from_encoding
1446
            ||
1447 29
            !$from_encoding
1448
        ) {
1449 29
            $from_encoding_auto_detected = self::str_detect_encoding($str);
1450
        }
1451
1452
        // DEBUG
1453
        //var_dump($to_encoding, $from_encoding, $from_encoding_auto_detected, $str, "\n\n");
1454
1455 29
        if ($from_encoding_auto_detected !== false) {
1456
            /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
1457 25
            $from_encoding = $from_encoding_auto_detected;
1458 7
        } elseif ($auto_detect_the_from_encoding) {
1459
            // fallback for the "autodetect"-mode
1460 7
            return self::to_utf8($str);
1461
        }
1462
1463
        if (
1464 25
            !$from_encoding
1465
            ||
1466 25
            $from_encoding === $to_encoding
1467
        ) {
1468 15
            return $str;
1469
        }
1470
1471
        if (
1472 20
            $to_encoding === 'UTF-8'
1473
            &&
1474
            (
1475 18
                $from_encoding === 'WINDOWS-1252'
1476
                ||
1477 20
                $from_encoding === 'ISO-8859-1'
1478
            )
1479
        ) {
1480 14
            return self::to_utf8($str);
1481
        }
1482
1483
        if (
1484 12
            $to_encoding === 'ISO-8859-1'
1485
            &&
1486
            (
1487 6
                $from_encoding === 'WINDOWS-1252'
1488
                ||
1489 12
                $from_encoding === 'UTF-8'
1490
            )
1491
        ) {
1492 6
            return self::to_iso8859($str);
1493
        }
1494
1495
        /** @noinspection InArrayCanBeUsedInspection */
1496
        if (
1497 10
            $to_encoding !== 'UTF-8'
1498
            &&
1499 10
            $to_encoding !== 'ISO-8859-1'
1500
            &&
1501 10
            $to_encoding !== 'WINDOWS-1252'
1502
            &&
1503 10
            self::$SUPPORT['mbstring'] === false
1504
        ) {
1505
            /**
1506
             * @psalm-suppress ImpureFunctionCall - is is only a warning
1507
             */
1508
            \trigger_error('UTF8::encode() without mbstring cannot handle "' . $to_encoding . '" encoding', \E_USER_WARNING);
1509
        }
1510
1511 10
        if (self::$SUPPORT['mbstring'] === true) {
1512
            // warning: do not use the symfony polyfill here
1513 10
            $str_encoded = \mb_convert_encoding(
1514 10
                $str,
1515 10
                $to_encoding,
1516 10
                $from_encoding
1517
            );
1518
1519 10
            if ($str_encoded) {
1520 10
                \assert(\is_string($str_encoded));
1521
1522 10
                return $str_encoded;
1523
            }
1524
        }
1525
1526
        /** @noinspection PhpUsageOfSilenceOperatorInspection - Detected an incomplete multibyte character in input string */
1527
        $return = @\iconv($from_encoding, $to_encoding, $str);
1528
        if ($return !== false) {
1529
            return $return;
1530
        }
1531
1532
        return $str;
1533
    }
1534
1535
    /**
1536
     * @param string $str
1537
     * @param string $from_charset      [optional] <p>Set the input charset.</p>
1538
     * @param string $to_charset        [optional] <p>Set the output charset.</p>
1539
     * @param string $transfer_encoding [optional] <p>Set the transfer encoding.</p>
1540
     * @param string $linefeed          [optional] <p>Set the used linefeed.</p>
1541
     * @param int    $indent            [optional] <p>Set the max length indent.</p>
1542
     *
1543
     * @psalm-pure
1544
     *
1545
     * @return false|string
1546
     *                      <p>An encoded MIME field on success,
1547
     *                      or false if an error occurs during the encoding.</p>
1548
     */
1549 1
    public static function encode_mimeheader(
1550
        string $str,
1551
        string $from_charset = 'UTF-8',
1552
        string $to_charset = 'UTF-8',
1553
        string $transfer_encoding = 'Q',
1554
        string $linefeed = "\r\n",
1555
        int $indent = 76
1556
    ) {
1557 1
        if ($from_charset !== 'UTF-8' && $from_charset !== 'CP850') {
1558
            $from_charset = self::normalize_encoding($from_charset, 'UTF-8');
1559
        }
1560
1561 1
        if ($to_charset !== 'UTF-8' && $to_charset !== 'CP850') {
1562 1
            $to_charset = self::normalize_encoding($to_charset, 'UTF-8');
1563
        }
1564
1565
        // always fallback via symfony polyfill
1566 1
        return \iconv_mime_encode(
1567 1
            '',
1568 1
            $str,
1569
            [
1570 1
                'scheme'           => $transfer_encoding,
1571 1
                'line-length'      => $indent,
1572 1
                'input-charset'    => $from_charset,
1573 1
                'output-charset'   => $to_charset,
1574 1
                'line-break-chars' => $linefeed,
1575
            ]
1576
        );
1577
    }
1578
1579
    /**
1580
     * Create an extract from a sentence, so if the search-string was found, it try to centered in the output.
1581
     *
1582
     * @param string   $str                       <p>The input string.</p>
1583
     * @param string   $search                    <p>The searched string.</p>
1584
     * @param int|null $length                    [optional] <p>Default: null === text->length / 2</p>
1585
     * @param string   $replacer_for_skipped_text [optional] <p>Default: …</p>
1586
     * @param string   $encoding                  [optional] <p>Set the charset for e.g. "mb_" function</p>
1587
     *
1588
     * @psalm-pure
1589
     *
1590
     * @return string
1591
     */
1592 1
    public static function extract_text(
1593
        string $str,
1594
        string $search = '',
1595
        int $length = null,
1596
        string $replacer_for_skipped_text = '…',
1597
        string $encoding = 'UTF-8'
1598
    ): string {
1599 1
        if ($str === '') {
1600 1
            return '';
1601
        }
1602
1603 1
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
1604
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
1605
        }
1606
1607 1
        $trim_chars = "\t\r\n -_()!~?=+/*\\,.:;\"'[]{}`&";
1608
1609 1
        if ($length === null) {
1610 1
            $length = (int) \round((int) self::strlen($str, $encoding) / 2);
1611
        }
1612
1613 1
        if ($search === '') {
1614 1
            if ($encoding === 'UTF-8') {
1615 1
                if ($length > 0) {
1616 1
                    $string_length = (int) \mb_strlen($str);
1617 1
                    $end = ($length - 1) > $string_length ? $string_length : ($length - 1);
1618
                } else {
1619 1
                    $end = 0;
1620
                }
1621
1622 1
                $pos = (int) \min(
1623 1
                    \mb_strpos($str, ' ', $end),
1624 1
                    \mb_strpos($str, '.', $end)
1625
                );
1626
            } else {
1627
                if ($length > 0) {
1628
                    $string_length = (int) self::strlen($str, $encoding);
1629
                    $end = ($length - 1) > $string_length ? $string_length : ($length - 1);
1630
                } else {
1631
                    $end = 0;
1632
                }
1633
1634
                $pos = (int) \min(
1635
                    self::strpos($str, ' ', $end, $encoding),
1636
                    self::strpos($str, '.', $end, $encoding)
1637
                );
1638
            }
1639
1640 1
            if ($pos) {
1641 1
                if ($encoding === 'UTF-8') {
1642 1
                    $str_sub = \mb_substr($str, 0, $pos);
1643
                } else {
1644
                    $str_sub = self::substr($str, 0, $pos, $encoding);
1645
                }
1646
1647 1
                if ($str_sub === false) {
1648
                    return '';
1649
                }
1650
1651 1
                return \rtrim($str_sub, $trim_chars) . $replacer_for_skipped_text;
1652
            }
1653
1654
            return $str;
1655
        }
1656
1657 1
        if ($encoding === 'UTF-8') {
1658 1
            $word_position = (int) \mb_stripos($str, $search);
1659 1
            $half_side = (int) ($word_position - $length / 2 + (int) \mb_strlen($search) / 2);
1660
        } else {
1661
            $word_position = (int) self::stripos($str, $search, 0, $encoding);
1662
            $half_side = (int) ($word_position - $length / 2 + (int) self::strlen($search, $encoding) / 2);
1663
        }
1664
1665 1
        $pos_start = 0;
1666 1
        if ($half_side > 0) {
1667 1
            if ($encoding === 'UTF-8') {
1668 1
                $half_text = \mb_substr($str, 0, $half_side);
1669
            } else {
1670
                $half_text = self::substr($str, 0, $half_side, $encoding);
1671
            }
1672 1
            if ($half_text !== false) {
1673 1
                if ($encoding === 'UTF-8') {
1674 1
                    $pos_start = (int) \max(
1675 1
                        \mb_strrpos($half_text, ' '),
1676 1
                        \mb_strrpos($half_text, '.')
1677
                    );
1678
                } else {
1679
                    $pos_start = (int) \max(
1680
                        self::strrpos($half_text, ' ', 0, $encoding),
1681
                        self::strrpos($half_text, '.', 0, $encoding)
1682
                    );
1683
                }
1684
            }
1685
        }
1686
1687 1
        if ($word_position && $half_side > 0) {
1688 1
            $offset = $pos_start + $length - 1;
1689 1
            $real_length = (int) self::strlen($str, $encoding);
1690
1691 1
            if ($offset > $real_length) {
1692
                $offset = $real_length;
1693
            }
1694
1695 1
            if ($encoding === 'UTF-8') {
1696 1
                $pos_end = (int) \min(
1697 1
                    \mb_strpos($str, ' ', $offset),
1698 1
                    \mb_strpos($str, '.', $offset)
1699 1
                ) - $pos_start;
1700
            } else {
1701
                $pos_end = (int) \min(
1702
                    self::strpos($str, ' ', $offset, $encoding),
1703
                    self::strpos($str, '.', $offset, $encoding)
1704
                ) - $pos_start;
1705
            }
1706
1707 1
            if (!$pos_end || $pos_end <= 0) {
1708 1
                if ($encoding === 'UTF-8') {
1709 1
                    $str_sub = \mb_substr($str, $pos_start, (int) \mb_strlen($str));
1710
                } else {
1711
                    $str_sub = self::substr($str, $pos_start, (int) self::strlen($str, $encoding), $encoding);
1712
                }
1713 1
                if ($str_sub !== false) {
1714 1
                    $extract = $replacer_for_skipped_text . \ltrim($str_sub, $trim_chars);
1715
                } else {
1716 1
                    $extract = '';
1717
                }
1718
            } else {
1719 1
                if ($encoding === 'UTF-8') {
1720 1
                    $str_sub = \mb_substr($str, $pos_start, $pos_end);
1721
                } else {
1722
                    $str_sub = self::substr($str, $pos_start, $pos_end, $encoding);
1723
                }
1724 1
                if ($str_sub !== false) {
1725 1
                    $extract = $replacer_for_skipped_text . \trim($str_sub, $trim_chars) . $replacer_for_skipped_text;
1726
                } else {
1727 1
                    $extract = '';
1728
                }
1729
            }
1730
        } else {
1731 1
            $offset = $length - 1;
1732 1
            $true_length = (int) self::strlen($str, $encoding);
1733
1734 1
            if ($offset > $true_length) {
1735
                $offset = $true_length;
1736
            }
1737
1738 1
            if ($encoding === 'UTF-8') {
1739 1
                $pos_end = (int) \min(
1740 1
                    \mb_strpos($str, ' ', $offset),
1741 1
                    \mb_strpos($str, '.', $offset)
1742
                );
1743
            } else {
1744
                $pos_end = (int) \min(
1745
                    self::strpos($str, ' ', $offset, $encoding),
1746
                    self::strpos($str, '.', $offset, $encoding)
1747
                );
1748
            }
1749
1750 1
            if ($pos_end) {
1751 1
                if ($encoding === 'UTF-8') {
1752 1
                    $str_sub = \mb_substr($str, 0, $pos_end);
1753
                } else {
1754
                    $str_sub = self::substr($str, 0, $pos_end, $encoding);
1755
                }
1756 1
                if ($str_sub !== false) {
1757 1
                    $extract = \rtrim($str_sub, $trim_chars) . $replacer_for_skipped_text;
1758
                } else {
1759 1
                    $extract = '';
1760
                }
1761
            } else {
1762 1
                $extract = $str;
1763
            }
1764
        }
1765
1766 1
        return $extract;
1767
    }
1768
1769
    /**
1770
     * Reads entire file into a string.
1771
     *
1772
     * EXAMPLE: <code>UTF8::file_get_contents('utf16le.txt'); // ...</code>
1773
     *
1774
     * WARNING: Do not use UTF-8 Option ($convert_to_utf8) for binary files (e.g.: images) !!!
1775
     *
1776
     * @see http://php.net/manual/en/function.file-get-contents.php
1777
     *
1778
     * @param string        $filename         <p>
1779
     *                                        Name of the file to read.
1780
     *                                        </p>
1781
     * @param bool          $use_include_path [optional] <p>
1782
     *                                        Prior to PHP 5, this parameter is called
1783
     *                                        use_include_path and is a bool.
1784
     *                                        As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
1785
     *                                        to trigger include path
1786
     *                                        search.
1787
     *                                        </p>
1788
     * @param resource|null $context          [optional] <p>
1789
     *                                        A valid context resource created with
1790
     *                                        stream_context_create. If you don't need to use a
1791
     *                                        custom context, you can skip this parameter by &null;.
1792
     *                                        </p>
1793
     * @param int|null      $offset           [optional] <p>
1794
     *                                        The offset where the reading starts.
1795
     *                                        </p>
1796
     * @param int|null      $max_length       [optional] <p>
1797
     *                                        Maximum length of data read. The default is to read until end
1798
     *                                        of file is reached.
1799
     *                                        </p>
1800
     * @param int           $timeout          <p>The time in seconds for the timeout.</p>
1801
     * @param bool          $convert_to_utf8  <strong>WARNING!!!</strong> <p>Maybe you can't use this option for
1802
     *                                        some files, because they used non default utf-8 chars. Binary files
1803
     *                                        like images or pdf will not be converted.</p>
1804
     * @param string        $from_encoding    [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
1805
     *                                        A empty string will trigger the autodetect anyway.</p>
1806
     *
1807
     * @psalm-pure
1808
     *
1809
     * @return false|string
1810
     *                      <p>The function returns the read data as string or <b>false</b> on failure.</p>
1811
     *
1812
     * @noinspection PhpTooManyParametersInspection
1813
     */
1814 12
    public static function file_get_contents(
1815
        string $filename,
1816
        bool $use_include_path = false,
1817
        $context = null,
1818
        int $offset = null,
1819
        int $max_length = null,
1820
        int $timeout = 10,
1821
        bool $convert_to_utf8 = true,
1822
        string $from_encoding = ''
1823
    ) {
1824
        // init
1825 12
        $filename = \filter_var($filename, \FILTER_SANITIZE_STRING);
1826
        /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
1827 12
        if ($filename === false) {
1828
            return false;
1829
        }
1830
1831 12
        if ($timeout && $context === null) {
1832 9
            $context = \stream_context_create(
1833
                [
1834
                    'http' => [
1835 9
                        'timeout' => $timeout,
1836
                    ],
1837
                ]
1838
            );
1839
        }
1840
1841 12
        if ($offset === null) {
1842 12
            $offset = 0;
1843
        }
1844
1845 12
        if (\is_int($max_length)) {
1846 2
            $data = \file_get_contents($filename, $use_include_path, $context, $offset, $max_length);
1847
        } else {
1848 12
            $data = \file_get_contents($filename, $use_include_path, $context, $offset);
1849
        }
1850
1851
        // return false on error
1852 12
        if ($data === false) {
1853
            return false;
1854
        }
1855
1856 12
        if ($convert_to_utf8) {
1857
            if (
1858 12
                !self::is_binary($data, true)
1859
                ||
1860 9
                self::is_utf16($data, false) !== false
1861
                ||
1862 12
                self::is_utf32($data, false) !== false
1863
            ) {
1864 9
                $data = self::encode('UTF-8', $data, false, $from_encoding);
1865 9
                $data = self::cleanup($data);
1866
            }
1867
        }
1868
1869 12
        return $data;
1870
    }
1871
1872
    /**
1873
     * Checks if a file starts with BOM (Byte Order Mark) character.
1874
     *
1875
     * EXAMPLE: <code>UTF8::file_has_bom('utf8_with_bom.txt'); // true</code>
1876
     *
1877
     * @param string $file_path <p>Path to a valid file.</p>
1878
     *
1879
     * @throws \RuntimeException if file_get_contents() returned false
1880
     *
1881
     * @return bool
1882
     *              <p><strong>true</strong> if the file has BOM at the start, <strong>false</strong> otherwise</p>
1883
     *
1884
     * @psalm-pure
1885
     */
1886 2
    public static function file_has_bom(string $file_path): bool
1887
    {
1888 2
        $file_content = \file_get_contents($file_path);
1889 2
        if ($file_content === false) {
1890
            throw new \RuntimeException('file_get_contents() returned false for:' . $file_path);
1891
        }
1892
1893 2
        return self::string_has_bom($file_content);
1894
    }
1895
1896
    /**
1897
     * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1898
     *
1899
     * EXAMPLE: <code>UTF8::filter(array("\xE9", 'à', 'a')); // array('é', 'à', 'a')</code>
1900
     *
1901
     * @param array|object|string $var
1902
     * @param int                 $normalization_form
1903
     * @param string              $leading_combining
1904
     *
1905
     * @psalm-pure
1906
     *
1907
     * @return mixed
1908
     *
1909
     * @template TFilter
1910
     * @psalm-param TFilter $var
1911
     * @psalm-return TFilter
1912
     */
1913 65
    public static function filter(
1914
        $var,
1915
        int $normalization_form = \Normalizer::NFC,
1916
        string $leading_combining = '◌'
1917
    ) {
1918 65
        switch (\gettype($var)) {
1919 65
            case 'object':
1920 65
            case 'array':
1921 6
                foreach ($var as $k => &$v) {
1922 6
                    $v = self::filter($v, $normalization_form, $leading_combining);
1923
                }
1924 6
                unset($v);
1925
1926 6
                break;
1927 65
            case 'string':
1928
1929 63
                if (\strpos($var, "\r") !== false) {
1930 3
                    $var = self::normalize_line_ending($var);
1931
                }
1932
1933 63
                if (!ASCII::is_ascii($var)) {
1934 33
                    if (\Normalizer::isNormalized($var, $normalization_form)) {
1935 27
                        $n = '-';
1936
                    } else {
1937 13
                        $n = \Normalizer::normalize($var, $normalization_form);
1938
1939 13
                        if (isset($n[0])) {
1940 7
                            $var = $n;
1941
                        } else {
1942 9
                            $var = self::encode('UTF-8', $var);
1943
                        }
1944
                    }
1945
1946 33
                    \assert(\is_string($var));
1947
                    if (
1948 33
                        $var[0] >= "\x80"
1949
                        &&
1950 33
                        isset($n[0], $leading_combining[0])
1951
                        &&
1952 33
                        \preg_match('/^\\p{Mn}/u', $var)
1953
                    ) {
1954
                        // Prevent leading combining chars
1955
                        // for NFC-safe concatenations.
1956 3
                        $var = $leading_combining . $var;
1957
                    }
1958
                }
1959
1960 63
                break;
1961
            default:
1962
                // nothing
1963
        }
1964
1965
        /** @noinspection PhpSillyAssignmentInspection */
1966
        /** @psalm-var TFilter $var */
1967 65
        $var = $var;
1968
1969 65
        return $var;
1970
    }
1971
1972
    /**
1973
     * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1974
     *
1975
     * Gets a specific external variable by name and optionally filters it.
1976
     *
1977
     * EXAMPLE: <code>
1978
     * // _GET['foo'] = 'bar';
1979
     * UTF8::filter_input(INPUT_GET, 'foo', FILTER_SANITIZE_STRING)); // 'bar'
1980
     * </code>
1981
     *
1982
     * @see http://php.net/manual/en/function.filter-input.php
1983
     *
1984
     * @param int            $type          <p>
1985
     *                                      One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1986
     *                                      <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1987
     *                                      <b>INPUT_ENV</b>.
1988
     *                                      </p>
1989
     * @param string         $variable_name <p>
1990
     *                                      Name of a variable to get.
1991
     *                                      </p>
1992
     * @param int            $filter        [optional] <p>
1993
     *                                      The ID of the filter to apply. The
1994
     *                                      manual page lists the available filters.
1995
     *                                      </p>
1996
     * @param int|int[]|null $options       [optional] <p>
1997
     *                                      Associative array of options or bitwise disjunction of flags. If filter
1998
     *                                      accepts options, flags can be provided in "flags" field of array.
1999
     *                                      </p>
2000
     *
2001
     * @psalm-pure
2002
     *
2003
     * @return mixed
2004
     *               <p>
2005
     *               Value of the requested variable on success, <b>FALSE</b> if the filter fails, or <b>NULL</b> if the
2006
     *               <i>variable_name</i> variable is not set. If the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it
2007
     *               returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter fails.
2008
     *               </p>
2009
     */
2010 1
    public static function filter_input(
2011
        int $type,
2012
        string $variable_name,
2013
        int $filter = \FILTER_DEFAULT,
2014
        $options = null
2015
    ) {
2016
        /**
2017
         * @psalm-suppress ImpureFunctionCall - we use func_num_args only for args count matching here
2018
         */
2019 1
        if ($options === null || \func_num_args() < 4) {
2020 1
            $var = \filter_input($type, $variable_name, $filter);
2021
        } else {
2022
            $var = \filter_input($type, $variable_name, $filter, $options);
2023
        }
2024
2025 1
        return self::filter($var);
2026
    }
2027
2028
    /**
2029
     * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2030
     *
2031
     * Gets external variables and optionally filters them.
2032
     *
2033
     * EXAMPLE: <code>
2034
     * // _GET['foo'] = 'bar';
2035
     * UTF8::filter_input_array(INPUT_GET, array('foo' => 'FILTER_SANITIZE_STRING')); // array('bar')
2036
     * </code>
2037
     *
2038
     * @see http://php.net/manual/en/function.filter-input-array.php
2039
     *
2040
     * @param int        $type       <p>
2041
     *                               One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
2042
     *                               <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
2043
     *                               <b>INPUT_ENV</b>.
2044
     *                               </p>
2045
     * @param array|null $definition [optional] <p>
2046
     *                               An array defining the arguments. A valid key is a string
2047
     *                               containing a variable name and a valid value is either a filter type, or an array
2048
     *                               optionally specifying the filter, flags and options. If the value is an
2049
     *                               array, valid keys are filter which specifies the
2050
     *                               filter type,
2051
     *                               flags which specifies any flags that apply to the
2052
     *                               filter, and options which specifies any options that
2053
     *                               apply to the filter. See the example below for a better understanding.
2054
     *                               </p>
2055
     *                               <p>
2056
     *                               This parameter can be also an integer holding a filter constant. Then all values in the
2057
     *                               input array are filtered by this filter.
2058
     *                               </p>
2059
     * @param bool       $add_empty  [optional] <p>
2060
     *                               Add missing keys as <b>NULL</b> to the return value.
2061
     *                               </p>
2062
     *
2063
     * @psalm-pure
2064
     *
2065
     * @return mixed
2066
     *               <p>
2067
     *               An array containing the values of the requested variables on success, or <b>FALSE</b> on failure.
2068
     *               An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not
2069
     *               set. Or if the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it returns <b>FALSE</b> if the variable
2070
     *               is not set and <b>NULL</b> if the filter fails.
2071
     *               </p>
2072
     */
2073 1
    public static function filter_input_array(
2074
        int $type,
2075
        $definition = null,
2076
        bool $add_empty = true
2077
    ) {
2078
        /**
2079
         * @psalm-suppress ImpureFunctionCall - we use func_num_args only for args count matching here
2080
         */
2081 1
        if ($definition === null || \func_num_args() < 2) {
2082
            $a = \filter_input_array($type);
2083
        } else {
2084 1
            $a = \filter_input_array($type, $definition, $add_empty);
2085
        }
2086
2087 1
        return self::filter($a);
2088
    }
2089
2090
    /**
2091
     * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2092
     *
2093
     * Filters a variable with a specified filter.
2094
     *
2095
     * EXAMPLE: <code>UTF8::filter_var('-ABC-中文空白-', FILTER_VALIDATE_URL); // false</code>
2096
     *
2097
     * @see http://php.net/manual/en/function.filter-var.php
2098
     *
2099
     * @param float|int|string|null $variable <p>
2100
     *                                        Value to filter.
2101
     *                                        </p>
2102
     * @param int                   $filter   [optional] <p>
2103
     *                                        The ID of the filter to apply. The
2104
     *                                        manual page lists the available filters.
2105
     *                                        </p>
2106
     * @param int|int[]|null        $options  [optional] <p>
2107
     *                                        Associative array of options or bitwise disjunction of flags. If filter
2108
     *                                        accepts options, flags can be provided in "flags" field of array. For
2109
     *                                        the "callback" filter, callable type should be passed. The
2110
     *                                        callback must accept one argument, the value to be filtered, and return
2111
     *                                        the value after filtering/sanitizing it.
2112
     *                                        </p>
2113
     *                                        <p>
2114
     *                                        <code>
2115
     *                                        // for filters that accept options, use this format
2116
     *                                        $options = array(
2117
     *                                        'options' => array(
2118
     *                                        'default' => 3, // value to return if the filter fails
2119
     *                                        // other options here
2120
     *                                        'min_range' => 0
2121
     *                                        ),
2122
     *                                        'flags' => FILTER_FLAG_ALLOW_OCTAL,
2123
     *                                        );
2124
     *                                        $var = filter_var('0755', FILTER_VALIDATE_INT, $options);
2125
     *                                        // for filter that only accept flags, you can pass them directly
2126
     *                                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);
2127
     *                                        // for filter that only accept flags, you can also pass as an array
2128
     *                                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN,
2129
     *                                        array('flags' => FILTER_NULL_ON_FAILURE));
2130
     *                                        // callback validate filter
2131
     *                                        function foo($value)
2132
     *                                        {
2133
     *                                        // Expected format: Surname, GivenNames
2134
     *                                        if (strpos($value, ", ") === false) return false;
2135
     *                                        list($surname, $givennames) = explode(", ", $value, 2);
2136
     *                                        $empty = (empty($surname) || empty($givennames));
2137
     *                                        $notstrings = (!is_string($surname) || !is_string($givennames));
2138
     *                                        if ($empty || $notstrings) {
2139
     *                                        return false;
2140
     *                                        } else {
2141
     *                                        return $value;
2142
     *                                        }
2143
     *                                        }
2144
     *                                        $var = filter_var('Doe, Jane Sue', FILTER_CALLBACK, array('options' => 'foo'));
2145
     *                                        </code>
2146
     *                                        </p>
2147
     *
2148
     * @psalm-pure
2149
     *
2150
     * @return mixed
2151
     *               <p>The filtered data, or <b>FALSE</b> if the filter fails.</p>
2152
     */
2153 2
    public static function filter_var(
2154
        $variable,
2155
        int $filter = \FILTER_DEFAULT,
2156
        $options = null
2157
    ) {
2158
        /**
2159
         * @psalm-suppress ImpureFunctionCall - we use func_num_args only for args count matching here
2160
         */
2161 2
        if (\func_num_args() < 3) {
2162 2
            $variable = \filter_var($variable, $filter);
2163
        } else {
2164 2
            $variable = \filter_var($variable, $filter, $options);
2165
        }
2166
2167 2
        return self::filter($variable);
2168
    }
2169
2170
    /**
2171
     * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2172
     *
2173
     * Gets multiple variables and optionally filters them.
2174
     *
2175
     * EXAMPLE: <code>
2176
     * $filters = [
2177
     *     'name'  => ['filter'  => FILTER_CALLBACK, 'options' => [UTF8::class, 'ucwords']],
2178
     *     'age'   => ['filter'  => FILTER_VALIDATE_INT, 'options' => ['min_range' => 1, 'max_range' => 120]],
2179
     *     'email' => FILTER_VALIDATE_EMAIL,
2180
     * ];
2181
     *
2182
     * $data = [
2183
     *     'name' => 'κόσμε',
2184
     *     'age' => '18',
2185
     *     'email' => '[email protected]'
2186
     * ];
2187
     *
2188
     * UTF8::filter_var_array($data, $filters, true); // ['name' => 'Κόσμε', 'age' => 18, 'email' => '[email protected]']
2189
     * </code>
2190
     *
2191
     * @see http://php.net/manual/en/function.filter-var-array.php
2192
     *
2193
     * @param array<mixed>   $data       <p>
2194
     *                                   An array with string keys containing the data to filter.
2195
     *                                   </p>
2196
     * @param array|int|null $definition [optional] <p>
2197
     *                                   An array defining the arguments. A valid key is a string
2198
     *                                   containing a variable name and a valid value is either a
2199
     *                                   filter type, or an
2200
     *                                   array optionally specifying the filter, flags and options.
2201
     *                                   If the value is an array, valid keys are filter
2202
     *                                   which specifies the filter type,
2203
     *                                   flags which specifies any flags that apply to the
2204
     *                                   filter, and options which specifies any options that
2205
     *                                   apply to the filter. See the example below for a better understanding.
2206
     *                                   </p>
2207
     *                                   <p>
2208
     *                                   This parameter can be also an integer holding a filter constant. Then all values
2209
     *                                   in the input array are filtered by this filter.
2210
     *                                   </p>
2211
     * @param bool           $add_empty  [optional] <p>
2212
     *                                   Add missing keys as <b>NULL</b> to the return value.
2213
     *                                   </p>
2214
     *
2215
     * @psalm-pure
2216
     *
2217
     * @return mixed
2218
     *               <p>
2219
     *               An array containing the values of the requested variables on success, or <b>FALSE</b> on failure.
2220
     *               An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not
2221
     *               set.
2222
     *               </p>
2223
     */
2224 2
    public static function filter_var_array(
2225
        array $data,
2226
        $definition = null,
2227
        bool $add_empty = true
2228
    ) {
2229
        /**
2230
         * @psalm-suppress ImpureFunctionCall - we use func_num_args only for args count matching here
2231
         */
2232 2
        if (\func_num_args() < 2) {
2233 2
            $a = \filter_var_array($data);
2234
        } else {
2235 2
            $a = \filter_var_array($data, $definition, $add_empty);
2236
        }
2237
2238 2
        return self::filter($a);
2239
    }
2240
2241
    /**
2242
     * Checks whether finfo is available on the server.
2243
     *
2244
     * @psalm-pure
2245
     *
2246
     * @return bool
2247
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
2248
     *
2249
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
2250
     */
2251
    public static function finfo_loaded(): bool
2252
    {
2253
        return \class_exists('finfo');
2254
    }
2255
2256
    /**
2257
     * Returns the first $n characters of the string.
2258
     *
2259
     * @param string $str      <p>The input string.</p>
2260
     * @param int    $n        <p>Number of characters to retrieve from the start.</p>
2261
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
2262
     *
2263
     * @psalm-pure
2264
     *
2265
     * @return string
2266
     */
2267 13
    public static function first_char(
2268
        string $str,
2269
        int $n = 1,
2270
        string $encoding = 'UTF-8'
2271
    ): string {
2272 13
        if ($str === '' || $n <= 0) {
2273 5
            return '';
2274
        }
2275
2276 8
        if ($encoding === 'UTF-8') {
2277 4
            return (string) \mb_substr($str, 0, $n);
2278
        }
2279
2280 4
        return (string) self::substr($str, 0, $n, $encoding);
2281
    }
2282
2283
    /**
2284
     * Check if the number of Unicode characters isn't greater than the specified integer.
2285
     *
2286
     * EXAMPLE: <code>UTF8::fits_inside('κόσμε', 6); // false</code>
2287
     *
2288
     * @param string $str      the original string to be checked
2289
     * @param int    $box_size the size in number of chars to be checked against string
2290
     *
2291
     * @psalm-pure
2292
     *
2293
     * @return bool
2294
     *              <p><strong>TRUE</strong> if string is less than or equal to $box_size, <strong>FALSE</strong> otherwise.</p>
2295
     */
2296 2
    public static function fits_inside(string $str, int $box_size): bool
2297
    {
2298 2
        return (int) self::strlen($str) <= $box_size;
2299
    }
2300
2301
    /**
2302
     * Try to fix simple broken UTF-8 strings.
2303
     *
2304
     * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
2305
     *
2306
     * EXAMPLE: <code>UTF8::fix_simple_utf8('Düsseldorf'); // 'Düsseldorf'</code>
2307
     *
2308
     * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
2309
     * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
2310
     * See: http://en.wikipedia.org/wiki/Windows-1252
2311
     *
2312
     * @param string $str <p>The input string</p>
2313
     *
2314
     * @psalm-pure
2315
     *
2316
     * @return string
2317
     */
2318 47
    public static function fix_simple_utf8(string $str): string
2319
    {
2320 47
        if ($str === '') {
2321 4
            return '';
2322
        }
2323
2324
        /**
2325
         * @psalm-suppress ImpureStaticVariable
2326
         *
2327
         * @var array<mixed>|null
2328
         */
2329 47
        static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null;
2330
2331
        /**
2332
         * @psalm-suppress ImpureStaticVariable
2333
         *
2334
         * @var array<mixed>|null
2335
         */
2336 47
        static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null;
2337
2338 47
        if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) {
2339 1
            if (self::$BROKEN_UTF8_FIX === null) {
2340 1
                self::$BROKEN_UTF8_FIX = self::getData('utf8_fix');
2341
            }
2342
2343 1
            $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = \array_keys(self::$BROKEN_UTF8_FIX);
2344 1
            $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = self::$BROKEN_UTF8_FIX;
2345
        }
2346
2347 47
        \assert(\is_array($BROKEN_UTF8_TO_UTF8_VALUES_CACHE));
2348
2349 47
        return \str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str);
2350
    }
2351
2352
    /**
2353
     * Fix a double (or multiple) encoded UTF8 string.
2354
     *
2355
     * EXAMPLE: <code>UTF8::fix_utf8('Fédération'); // 'Fédération'</code>
2356
     *
2357
     * @param string|string[] $str you can use a string or an array of strings
2358
     *
2359
     * @psalm-pure
2360
     *
2361
     * @return string|string[]
2362
     *                         Will return the fixed input-"array" or
2363
     *                         the fixed input-"string"
2364
     *
2365
     * @psalm-suppress InvalidReturnType
2366
     */
2367 2
    public static function fix_utf8($str)
2368
    {
2369 2
        if (\is_array($str)) {
2370 2
            foreach ($str as $k => &$v) {
2371 2
                $v = self::fix_utf8($v);
2372
            }
2373 2
            unset($v);
2374
2375
            /**
2376
             * @psalm-suppress InvalidReturnStatement
2377
             */
2378 2
            return $str;
2379
        }
2380
2381 2
        $str = (string) $str;
2382 2
        $last = '';
2383 2
        while ($last !== $str) {
2384 2
            $last = $str;
2385
            /**
2386
             * @psalm-suppress PossiblyInvalidArgument
2387
             */
2388 2
            $str = self::to_utf8(
2389 2
                self::utf8_decode($str, true)
2390
            );
2391
        }
2392
2393
        /**
2394
         * @psalm-suppress InvalidReturnStatement
2395
         */
2396 2
        return $str;
2397
    }
2398
2399
    /**
2400
     * Get character of a specific character.
2401
     *
2402
     * EXAMPLE: <code>UTF8::getCharDirection('ا'); // 'RTL'</code>
2403
     *
2404
     * @param string $char
2405
     *
2406
     * @psalm-pure
2407
     *
2408
     * @return string
2409
     *                <p>'RTL' or 'LTR'.</p>
2410
     */
2411 2
    public static function getCharDirection(string $char): string
2412
    {
2413 2
        if (self::$SUPPORT['intlChar'] === true) {
2414
            /** @noinspection PhpComposerExtensionStubsInspection */
2415 2
            $tmp_return = \IntlChar::charDirection($char);
2416
2417
            // from "IntlChar"-Class
2418
            $char_direction = [
2419 2
                'RTL' => [1, 13, 14, 15, 21],
2420
                'LTR' => [0, 11, 12, 20],
2421
            ];
2422
2423 2
            if (\in_array($tmp_return, $char_direction['LTR'], true)) {
2424
                return 'LTR';
2425
            }
2426
2427 2
            if (\in_array($tmp_return, $char_direction['RTL'], true)) {
2428 2
                return 'RTL';
2429
            }
2430
        }
2431
2432 2
        $c = static::chr_to_decimal($char);
2433
2434 2
        if (!($c >= 0x5be && $c <= 0x10b7f)) {
2435 2
            return 'LTR';
2436
        }
2437
2438 2
        if ($c <= 0x85e) {
2439 2
            if ($c === 0x5be ||
2440 2
                $c === 0x5c0 ||
2441 2
                $c === 0x5c3 ||
2442 2
                $c === 0x5c6 ||
2443 2
                ($c >= 0x5d0 && $c <= 0x5ea) ||
2444 2
                ($c >= 0x5f0 && $c <= 0x5f4) ||
2445 2
                $c === 0x608 ||
2446 2
                $c === 0x60b ||
2447 2
                $c === 0x60d ||
2448 2
                $c === 0x61b ||
2449 2
                ($c >= 0x61e && $c <= 0x64a) ||
2450
                ($c >= 0x66d && $c <= 0x66f) ||
2451
                ($c >= 0x671 && $c <= 0x6d5) ||
2452
                ($c >= 0x6e5 && $c <= 0x6e6) ||
2453
                ($c >= 0x6ee && $c <= 0x6ef) ||
2454
                ($c >= 0x6fa && $c <= 0x70d) ||
2455
                $c === 0x710 ||
2456
                ($c >= 0x712 && $c <= 0x72f) ||
2457
                ($c >= 0x74d && $c <= 0x7a5) ||
2458
                $c === 0x7b1 ||
2459
                ($c >= 0x7c0 && $c <= 0x7ea) ||
2460
                ($c >= 0x7f4 && $c <= 0x7f5) ||
2461
                $c === 0x7fa ||
2462
                ($c >= 0x800 && $c <= 0x815) ||
2463
                $c === 0x81a ||
2464
                $c === 0x824 ||
2465
                $c === 0x828 ||
2466
                ($c >= 0x830 && $c <= 0x83e) ||
2467
                ($c >= 0x840 && $c <= 0x858) ||
2468 2
                $c === 0x85e
2469
            ) {
2470 2
                return 'RTL';
2471
            }
2472 2
        } elseif ($c === 0x200f) {
2473
            return 'RTL';
2474 2
        } elseif ($c >= 0xfb1d) {
2475 2
            if ($c === 0xfb1d ||
2476 2
                ($c >= 0xfb1f && $c <= 0xfb28) ||
2477 2
                ($c >= 0xfb2a && $c <= 0xfb36) ||
2478 2
                ($c >= 0xfb38 && $c <= 0xfb3c) ||
2479 2
                $c === 0xfb3e ||
2480 2
                ($c >= 0xfb40 && $c <= 0xfb41) ||
2481 2
                ($c >= 0xfb43 && $c <= 0xfb44) ||
2482 2
                ($c >= 0xfb46 && $c <= 0xfbc1) ||
2483 2
                ($c >= 0xfbd3 && $c <= 0xfd3d) ||
2484 2
                ($c >= 0xfd50 && $c <= 0xfd8f) ||
2485 2
                ($c >= 0xfd92 && $c <= 0xfdc7) ||
2486 2
                ($c >= 0xfdf0 && $c <= 0xfdfc) ||
2487 2
                ($c >= 0xfe70 && $c <= 0xfe74) ||
2488 2
                ($c >= 0xfe76 && $c <= 0xfefc) ||
2489 2
                ($c >= 0x10800 && $c <= 0x10805) ||
2490 2
                $c === 0x10808 ||
2491 2
                ($c >= 0x1080a && $c <= 0x10835) ||
2492 2
                ($c >= 0x10837 && $c <= 0x10838) ||
2493 2
                $c === 0x1083c ||
2494 2
                ($c >= 0x1083f && $c <= 0x10855) ||
2495 2
                ($c >= 0x10857 && $c <= 0x1085f) ||
2496 2
                ($c >= 0x10900 && $c <= 0x1091b) ||
2497 2
                ($c >= 0x10920 && $c <= 0x10939) ||
2498 2
                $c === 0x1093f ||
2499 2
                $c === 0x10a00 ||
2500 2
                ($c >= 0x10a10 && $c <= 0x10a13) ||
2501 2
                ($c >= 0x10a15 && $c <= 0x10a17) ||
2502 2
                ($c >= 0x10a19 && $c <= 0x10a33) ||
2503 2
                ($c >= 0x10a40 && $c <= 0x10a47) ||
2504 2
                ($c >= 0x10a50 && $c <= 0x10a58) ||
2505 2
                ($c >= 0x10a60 && $c <= 0x10a7f) ||
2506 2
                ($c >= 0x10b00 && $c <= 0x10b35) ||
2507 2
                ($c >= 0x10b40 && $c <= 0x10b55) ||
2508 2
                ($c >= 0x10b58 && $c <= 0x10b72) ||
2509 2
                ($c >= 0x10b78 && $c <= 0x10b7f)
2510
            ) {
2511 2
                return 'RTL';
2512
            }
2513
        }
2514
2515 2
        return 'LTR';
2516
    }
2517
2518
    /**
2519
     * Check for php-support.
2520
     *
2521
     * @param string|null $key
2522
     *
2523
     * @psalm-pure
2524
     *
2525
     * @return mixed
2526
     *               Return the full support-"array", if $key === null<br>
2527
     *               return bool-value, if $key is used and available<br>
2528
     *               otherwise return <strong>null</strong>
2529
     */
2530 27
    public static function getSupportInfo(string $key = null)
2531
    {
2532 27
        if ($key === null) {
2533 4
            return self::$SUPPORT;
2534
        }
2535
2536 25
        if (self::$INTL_TRANSLITERATOR_LIST === null) {
2537 1
            self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
2538
        }
2539
        // compatibility fix for old versions
2540 25
        self::$SUPPORT['intl__transliterator_list_ids'] = self::$INTL_TRANSLITERATOR_LIST;
2541
2542 25
        return self::$SUPPORT[$key] ?? null;
2543
    }
2544
2545
    /**
2546
     * Warning: this method only works for some file-types (png, jpg)
2547
     *          if you need more supported types, please use e.g. "finfo"
2548
     *
2549
     * @param string $str
2550
     * @param array  $fallback <p>with this keys: 'ext', 'mime', 'type'
2551
     *
2552
     * @psalm-pure
2553
     *
2554
     * @return null[]|string[]
2555
     *                         <p>with this keys: 'ext', 'mime', 'type'</p>
2556
     *
2557
     * @phpstan-param array{ext: null|string, mime: null|string, type: null|string} $fallback
2558
     */
2559 40
    public static function get_file_type(
2560
        string $str,
2561
        array $fallback = [
2562
            'ext'  => null,
2563
            'mime' => 'application/octet-stream',
2564
            'type' => null,
2565
        ]
2566
    ): array {
2567 40
        if ($str === '') {
2568
            return $fallback;
2569
        }
2570
2571
        /** @var false|string $str_info - needed for PhpStan (stubs error) */
2572 40
        $str_info = \substr($str, 0, 2);
2573 40
        if ($str_info === false || \strlen($str_info) !== 2) {
2574 11
            return $fallback;
2575
        }
2576
2577
        // DEBUG
2578
        //var_dump($str_info);
2579
2580 36
        $str_info = \unpack('C2chars', $str_info);
2581
2582
        /** @noinspection PhpSillyAssignmentInspection */
2583
        /** @var array|false $str_info - needed for PhpStan (stubs error) */
2584 36
        $str_info = $str_info;
2585
2586 36
        if ($str_info === false) {
2587
            return $fallback;
2588
        }
2589
        /** @noinspection OffsetOperationsInspection */
2590 36
        $type_code = (int) ($str_info['chars1'] . $str_info['chars2']);
2591
2592
        // DEBUG
2593
        //var_dump($type_code);
2594
2595
        //
2596
        // info: https://en.wikipedia.org/wiki/Magic_number_%28programming%29#Format_indicator
2597
        //
2598
        switch ($type_code) {
2599
            // WARNING: do not add too simple comparisons, because of false-positive results:
2600
            //
2601
            // 3780 => 'pdf', 7790 => 'exe', 7784 => 'midi', 8075 => 'zip',
2602
            // 8297 => 'rar', 7173 => 'gif', 7373 => 'tiff' 6677 => 'bmp', ...
2603
            //
2604 36
            case 255216:
2605
                $ext = 'jpg';
2606
                $mime = 'image/jpeg';
2607
                $type = 'binary';
2608
2609
                break;
2610 36
            case 13780:
2611 7
                $ext = 'png';
2612 7
                $mime = 'image/png';
2613 7
                $type = 'binary';
2614
2615 7
                break;
2616
            default:
2617 35
                return $fallback;
2618
        }
2619
2620
        return [
2621 7
            'ext'  => $ext,
2622 7
            'mime' => $mime,
2623 7
            'type' => $type,
2624
        ];
2625
    }
2626
2627
    /**
2628
     * @param int    $length         <p>Length of the random string.</p>
2629
     * @param string $possible_chars [optional] <p>Characters string for the random selection.</p>
2630
     * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
2631
     *
2632
     * @return string
2633
     */
2634 1
    public static function get_random_string(
2635
        int $length,
2636
        string $possible_chars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789',
2637
        string $encoding = 'UTF-8'
2638
    ): string {
2639
        // init
2640 1
        $i = 0;
2641 1
        $str = '';
2642
2643
        //
2644
        // add random chars
2645
        //
2646
2647 1
        if ($encoding === 'UTF-8') {
2648 1
            $max_length = (int) \mb_strlen($possible_chars);
2649 1
            if ($max_length === 0) {
2650 1
                return '';
2651
            }
2652
2653 1
            while ($i < $length) {
2654
                try {
2655 1
                    $rand_int = \random_int(0, $max_length - 1);
2656
                } catch (\Exception $e) {
2657
                    /** @noinspection RandomApiMigrationInspection */
2658
                    $rand_int = \mt_rand(0, $max_length - 1);
2659
                }
2660 1
                $char = \mb_substr($possible_chars, $rand_int, 1);
2661 1
                if ($char !== false) {
2662 1
                    $str .= $char;
2663 1
                    ++$i;
2664
                }
2665
            }
2666
        } else {
2667
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2668
2669
            $max_length = (int) self::strlen($possible_chars, $encoding);
2670
            if ($max_length === 0) {
2671
                return '';
2672
            }
2673
2674
            while ($i < $length) {
2675
                try {
2676
                    $rand_int = \random_int(0, $max_length - 1);
2677
                } catch (\Exception $e) {
2678
                    /** @noinspection RandomApiMigrationInspection */
2679
                    $rand_int = \mt_rand(0, $max_length - 1);
2680
                }
2681
                $char = self::substr($possible_chars, $rand_int, 1, $encoding);
2682
                if ($char !== false) {
2683
                    $str .= $char;
2684
                    ++$i;
2685
                }
2686
            }
2687
        }
2688
2689 1
        return $str;
2690
    }
2691
2692
    /**
2693
     * @param int|string $extra_entropy [optional] <p>Extra entropy via a string or int value.</p>
2694
     * @param bool       $use_md5       [optional] <p>Return the unique identifier as md5-hash? Default: true</p>
2695
     *
2696
     * @return string
2697
     */
2698 1
    public static function get_unique_string($extra_entropy = '', bool $use_md5 = true): string
2699
    {
2700
        try {
2701 1
            $rand_int = \random_int(0, \mt_getrandmax());
2702
        } catch (\Exception $e) {
2703
            /** @noinspection RandomApiMigrationInspection */
2704
            $rand_int = \mt_rand(0, \mt_getrandmax());
2705
        }
2706
2707
        $unique_helper = $rand_int .
2708 1
                         \session_id() .
2709 1
                         ($_SERVER['REMOTE_ADDR'] ?? '') .
2710 1
                         ($_SERVER['SERVER_ADDR'] ?? '') .
2711 1
                         $extra_entropy;
2712
2713 1
        $unique_string = \uniqid($unique_helper, true);
2714
2715 1
        if ($use_md5) {
2716 1
            $unique_string = \md5($unique_string . $unique_helper);
2717
        }
2718
2719 1
        return $unique_string;
2720
    }
2721
2722
    /**
2723
     * alias for "UTF8::string_has_bom()"
2724
     *
2725
     * @param string $str
2726
     *
2727
     * @psalm-pure
2728
     *
2729
     * @return bool
2730
     *
2731
     * @see        UTF8::string_has_bom()
2732
     * @deprecated <p>please use "UTF8::string_has_bom()"</p>
2733
     */
2734 2
    public static function hasBom(string $str): bool
2735
    {
2736 2
        return self::string_has_bom($str);
2737
    }
2738
2739
    /**
2740
     * Returns true if the string contains a lower case char, false otherwise.
2741
     *
2742
     * @param string $str <p>The input string.</p>
2743
     *
2744
     * @psalm-pure
2745
     *
2746
     * @return bool
2747
     *              <p>Whether or not the string contains a lower case character.</p>
2748
     */
2749 47
    public static function has_lowercase(string $str): bool
2750
    {
2751 47
        if (self::$SUPPORT['mbstring'] === true) {
2752
            /** @noinspection PhpComposerExtensionStubsInspection */
2753 47
            return \mb_ereg_match('.*[[:lower:]]', $str);
2754
        }
2755
2756
        return self::str_matches_pattern($str, '.*[[:lower:]]');
2757
    }
2758
2759
    /**
2760
     * Returns true if the string contains whitespace, false otherwise.
2761
     *
2762
     * @param string $str <p>The input string.</p>
2763
     *
2764
     * @psalm-pure
2765
     *
2766
     * @return bool
2767
     *              <p>Whether or not the string contains whitespace.</p>
2768
     */
2769 11
    public static function has_whitespace(string $str): bool
2770
    {
2771 11
        if (self::$SUPPORT['mbstring'] === true) {
2772
            /** @noinspection PhpComposerExtensionStubsInspection */
2773 11
            return \mb_ereg_match('.*[[:space:]]', $str);
2774
        }
2775
2776
        return self::str_matches_pattern($str, '.*[[:space:]]');
2777
    }
2778
2779
    /**
2780
     * Returns true if the string contains an upper case char, false otherwise.
2781
     *
2782
     * @param string $str <p>The input string.</p>
2783
     *
2784
     * @psalm-pure
2785
     *
2786
     * @return bool
2787
     *              <p>Whether or not the string contains an upper case character.</p>
2788
     */
2789 12
    public static function has_uppercase(string $str): bool
2790
    {
2791 12
        if (self::$SUPPORT['mbstring'] === true) {
2792
            /** @noinspection PhpComposerExtensionStubsInspection */
2793 12
            return \mb_ereg_match('.*[[:upper:]]', $str);
2794
        }
2795
2796
        return self::str_matches_pattern($str, '.*[[:upper:]]');
2797
    }
2798
2799
    /**
2800
     * Converts a hexadecimal value into a UTF-8 character.
2801
     *
2802
     * INFO: opposite to UTF8::chr_to_hex()
2803
     *
2804
     * EXAMPLE: <code>UTF8::hex_to_chr('U+00a7'); // '§'</code>
2805
     *
2806
     * @param string $hexdec <p>The hexadecimal value.</p>
2807
     *
2808
     * @psalm-pure
2809
     *
2810
     * @return false|string one single UTF-8 character
2811
     */
2812 4
    public static function hex_to_chr(string $hexdec)
2813
    {
2814
        /** @noinspection PhpUsageOfSilenceOperatorInspection - Invalid characters passed for attempted conversion, these have been ignored */
2815 4
        return self::decimal_to_chr((int) @\hexdec($hexdec));
2816
    }
2817
2818
    /**
2819
     * Converts hexadecimal U+xxxx code point representation to integer.
2820
     *
2821
     * INFO: opposite to UTF8::int_to_hex()
2822
     *
2823
     * EXAMPLE: <code>UTF8::hex_to_int('U+00f1'); // 241</code>
2824
     *
2825
     * @param string $hexdec <p>The hexadecimal code point representation.</p>
2826
     *
2827
     * @psalm-pure
2828
     *
2829
     * @return false|int
2830
     *                   <p>The code point, or false on failure.</p>
2831
     */
2832 2
    public static function hex_to_int($hexdec)
2833
    {
2834
        // init
2835 2
        $hexdec = (string) $hexdec;
2836
2837 2
        if ($hexdec === '') {
2838 2
            return false;
2839
        }
2840
2841 2
        if (\preg_match('/^(?:\\\u|U\+|)([a-zA-Z0-9]{4,6})$/', $hexdec, $match)) {
2842 2
            return \intval($match[1], 16);
2843
        }
2844
2845 2
        return false;
2846
    }
2847
2848
    /**
2849
     * alias for "UTF8::html_entity_decode()"
2850
     *
2851
     * @param string   $str
2852
     * @param int|null $flags
2853
     * @param string   $encoding
2854
     *
2855
     * @psalm-pure
2856
     *
2857
     * @return string
2858
     *
2859
     * @see        UTF8::html_entity_decode()
2860
     * @deprecated <p>please use "UTF8::html_entity_decode()"</p>
2861
     */
2862 2
    public static function html_decode(
2863
        string $str,
2864
        int $flags = null,
2865
        string $encoding = 'UTF-8'
2866
    ): string {
2867 2
        return self::html_entity_decode($str, $flags, $encoding);
2868
    }
2869
2870
    /**
2871
     * Converts a UTF-8 string to a series of HTML numbered entities.
2872
     *
2873
     * INFO: opposite to UTF8::html_decode()
2874
     *
2875
     * EXAMPLE: <code>UTF8::html_encode('中文空白'); // '&#20013;&#25991;&#31354;&#30333;'</code>
2876
     *
2877
     * @param string $str              <p>The Unicode string to be encoded as numbered entities.</p>
2878
     * @param bool   $keep_ascii_chars [optional] <p>Keep ASCII chars.</p>
2879
     * @param string $encoding         [optional] <p>Set the charset for e.g. "mb_" function</p>
2880
     *
2881
     * @psalm-pure
2882
     *
2883
     * @return string HTML numbered entities
2884
     */
2885 14
    public static function html_encode(
2886
        string $str,
2887
        bool $keep_ascii_chars = false,
2888
        string $encoding = 'UTF-8'
2889
    ): string {
2890 14
        if ($str === '') {
2891 4
            return '';
2892
        }
2893
2894 14
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2895 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2896
        }
2897
2898
        // INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2899 14
        if (self::$SUPPORT['mbstring'] === true) {
2900 14
            $start_code = 0x00;
2901 14
            if ($keep_ascii_chars) {
2902 13
                $start_code = 0x80;
2903
            }
2904
2905 14
            if ($encoding === 'UTF-8') {
2906
                /** @var false|string|null $return - needed for PhpStan (stubs error) */
2907 14
                $return = \mb_encode_numericentity(
2908 14
                    $str,
2909 14
                    [$start_code, 0xfffff, 0, 0xfffff, 0]
2910
                );
2911 14
                if ($return !== null && $return !== false) {
2912 14
                    return $return;
2913
                }
2914
            }
2915
2916
            /** @var false|string|null $return - needed for PhpStan (stubs error) */
2917 4
            $return = \mb_encode_numericentity(
2918 4
                $str,
2919 4
                [$start_code, 0xfffff, 0, 0xfffff, 0],
2920 4
                $encoding
2921
            );
2922 4
            if ($return !== null && $return !== false) {
2923 4
                return $return;
2924
            }
2925
        }
2926
2927
        //
2928
        // fallback via vanilla php
2929
        //
2930
2931
        return \implode(
2932
            '',
2933
            \array_map(
2934
                static function (string $chr) use ($keep_ascii_chars, $encoding): string {
2935
                    return self::single_chr_html_encode($chr, $keep_ascii_chars, $encoding);
2936
                },
2937
                self::str_split($str)
2938
            )
2939
        );
2940
    }
2941
2942
    /**
2943
     * UTF-8 version of html_entity_decode()
2944
     *
2945
     * The reason we are not using html_entity_decode() by itself is because
2946
     * while it is not technically correct to leave out the semicolon
2947
     * at the end of an entity most browsers will still interpret the entity
2948
     * correctly. html_entity_decode() does not convert entities without
2949
     * semicolons, so we are left with our own little solution here. Bummer.
2950
     *
2951
     * Convert all HTML entities to their applicable characters.
2952
     *
2953
     * INFO: opposite to UTF8::html_encode()
2954
     *
2955
     * EXAMPLE: <code>UTF8::html_entity_decode('&#20013;&#25991;&#31354;&#30333;'); // '中文空白'</code>
2956
     *
2957
     * @see http://php.net/manual/en/function.html-entity-decode.php
2958
     *
2959
     * @param string   $str      <p>
2960
     *                           The input string.
2961
     *                           </p>
2962
     * @param int|null $flags    [optional] <p>
2963
     *                           A bitmask of one or more of the following flags, which specify how to handle quotes
2964
     *                           and which document type to use. The default is ENT_COMPAT | ENT_HTML401.
2965
     *                           <table>
2966
     *                           Available <i>flags</i> constants
2967
     *                           <tr valign="top">
2968
     *                           <td>Constant Name</td>
2969
     *                           <td>Description</td>
2970
     *                           </tr>
2971
     *                           <tr valign="top">
2972
     *                           <td><b>ENT_COMPAT</b></td>
2973
     *                           <td>Will convert double-quotes and leave single-quotes alone.</td>
2974
     *                           </tr>
2975
     *                           <tr valign="top">
2976
     *                           <td><b>ENT_QUOTES</b></td>
2977
     *                           <td>Will convert both double and single quotes.</td>
2978
     *                           </tr>
2979
     *                           <tr valign="top">
2980
     *                           <td><b>ENT_NOQUOTES</b></td>
2981
     *                           <td>Will leave both double and single quotes unconverted.</td>
2982
     *                           </tr>
2983
     *                           <tr valign="top">
2984
     *                           <td><b>ENT_HTML401</b></td>
2985
     *                           <td>
2986
     *                           Handle code as HTML 4.01.
2987
     *                           </td>
2988
     *                           </tr>
2989
     *                           <tr valign="top">
2990
     *                           <td><b>ENT_XML1</b></td>
2991
     *                           <td>
2992
     *                           Handle code as XML 1.
2993
     *                           </td>
2994
     *                           </tr>
2995
     *                           <tr valign="top">
2996
     *                           <td><b>ENT_XHTML</b></td>
2997
     *                           <td>
2998
     *                           Handle code as XHTML.
2999
     *                           </td>
3000
     *                           </tr>
3001
     *                           <tr valign="top">
3002
     *                           <td><b>ENT_HTML5</b></td>
3003
     *                           <td>
3004
     *                           Handle code as HTML 5.
3005
     *                           </td>
3006
     *                           </tr>
3007
     *                           </table>
3008
     *                           </p>
3009
     * @param string   $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
3010
     *
3011
     * @psalm-pure
3012
     *
3013
     * @return string the decoded string
3014
     */
3015 51
    public static function html_entity_decode(
3016
        string $str,
3017
        int $flags = null,
3018
        string $encoding = 'UTF-8'
3019
    ): string {
3020
        if (
3021 51
            !isset($str[3]) // examples: &; || &x;
3022
            ||
3023 51
            \strpos($str, '&') === false // no "&"
3024
        ) {
3025 24
            return $str;
3026
        }
3027
3028 49
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
3029 9
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
3030
        }
3031
3032 49
        if ($flags === null) {
3033 11
            $flags = \ENT_QUOTES | \ENT_HTML5;
3034
        }
3035
3036
        /** @noinspection InArrayCanBeUsedInspection */
3037
        if (
3038 49
            $encoding !== 'UTF-8'
3039
            &&
3040 49
            $encoding !== 'ISO-8859-1'
3041
            &&
3042 49
            $encoding !== 'WINDOWS-1252'
3043
            &&
3044 49
            self::$SUPPORT['mbstring'] === false
3045
        ) {
3046
            /**
3047
             * @psalm-suppress ImpureFunctionCall - is is only a warning
3048
             */
3049
            \trigger_error('UTF8::html_entity_decode() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
3050
        }
3051
3052
        do {
3053 49
            $str_compare = $str;
3054
3055 49
            if (\strpos($str, '&') !== false) {
3056 49
                if (\strpos($str, '&#') !== false) {
3057
                    // decode also numeric & UTF16 two byte entities
3058 41
                    $str = (string) \preg_replace(
3059 41
                        '/(&#(?:x0*[0-9a-fA-F]{2,6}(?![0-9a-fA-F;])|(?:0*\d{2,6}(?![0-9;]))))/S',
3060 41
                        '$1;',
3061 41
                        $str
3062
                    );
3063
                }
3064
3065 49
                $str = \html_entity_decode(
3066 49
                    $str,
3067 49
                    $flags,
3068 49
                    $encoding
3069
                );
3070
            }
3071 49
        } while ($str_compare !== $str);
3072
3073 49
        return $str;
3074
    }
3075
3076
    /**
3077
     * Create a escape html version of the string via "UTF8::htmlspecialchars()".
3078
     *
3079
     * @param string $str
3080
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
3081
     *
3082
     * @psalm-pure
3083
     *
3084
     * @return string
3085
     */
3086 6
    public static function html_escape(string $str, string $encoding = 'UTF-8'): string
3087
    {
3088 6
        return self::htmlspecialchars(
3089 6
            $str,
3090 6
            \ENT_QUOTES | \ENT_SUBSTITUTE,
3091 6
            $encoding
3092
        );
3093
    }
3094
3095
    /**
3096
     * Remove empty html-tag.
3097
     *
3098
     * e.g.: <pre><tag></tag></pre>
3099
     *
3100
     * @param string $str
3101
     *
3102
     * @psalm-pure
3103
     *
3104
     * @return string
3105
     */
3106 1
    public static function html_stripe_empty_tags(string $str): string
3107
    {
3108 1
        return (string) \preg_replace(
3109 1
            '/<[^\\/>]*?>\\s*?<\\/[^>]*?>/u',
3110 1
            '',
3111 1
            $str
3112
        );
3113
    }
3114
3115
    /**
3116
     * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities().
3117
     *
3118
     * EXAMPLE: <code>UTF8::htmlentities('<白-öäü>'); // '&lt;&#30333;-&ouml;&auml;&uuml;&gt;'</code>
3119
     *
3120
     * @see http://php.net/manual/en/function.htmlentities.php
3121
     *
3122
     * @param string $str           <p>
3123
     *                              The input string.
3124
     *                              </p>
3125
     * @param int    $flags         [optional] <p>
3126
     *                              A bitmask of one or more of the following flags, which specify how to handle
3127
     *                              quotes, invalid code unit sequences and the used document type. The default is
3128
     *                              ENT_COMPAT | ENT_HTML401.
3129
     *                              <table>
3130
     *                              Available <i>flags</i> constants
3131
     *                              <tr valign="top">
3132
     *                              <td>Constant Name</td>
3133
     *                              <td>Description</td>
3134
     *                              </tr>
3135
     *                              <tr valign="top">
3136
     *                              <td><b>ENT_COMPAT</b></td>
3137
     *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
3138
     *                              </tr>
3139
     *                              <tr valign="top">
3140
     *                              <td><b>ENT_QUOTES</b></td>
3141
     *                              <td>Will convert both double and single quotes.</td>
3142
     *                              </tr>
3143
     *                              <tr valign="top">
3144
     *                              <td><b>ENT_NOQUOTES</b></td>
3145
     *                              <td>Will leave both double and single quotes unconverted.</td>
3146
     *                              </tr>
3147
     *                              <tr valign="top">
3148
     *                              <td><b>ENT_IGNORE</b></td>
3149
     *                              <td>
3150
     *                              Silently discard invalid code unit sequences instead of returning
3151
     *                              an empty string. Using this flag is discouraged as it
3152
     *                              may have security implications.
3153
     *                              </td>
3154
     *                              </tr>
3155
     *                              <tr valign="top">
3156
     *                              <td><b>ENT_SUBSTITUTE</b></td>
3157
     *                              <td>
3158
     *                              Replace invalid code unit sequences with a Unicode Replacement Character
3159
     *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty
3160
     *                              string.
3161
     *                              </td>
3162
     *                              </tr>
3163
     *                              <tr valign="top">
3164
     *                              <td><b>ENT_DISALLOWED</b></td>
3165
     *                              <td>
3166
     *                              Replace invalid code points for the given document type with a
3167
     *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
3168
     *                              (otherwise) instead of leaving them as is. This may be useful, for
3169
     *                              instance, to ensure the well-formedness of XML documents with
3170
     *                              embedded external content.
3171
     *                              </td>
3172
     *                              </tr>
3173
     *                              <tr valign="top">
3174
     *                              <td><b>ENT_HTML401</b></td>
3175
     *                              <td>
3176
     *                              Handle code as HTML 4.01.
3177
     *                              </td>
3178
     *                              </tr>
3179
     *                              <tr valign="top">
3180
     *                              <td><b>ENT_XML1</b></td>
3181
     *                              <td>
3182
     *                              Handle code as XML 1.
3183
     *                              </td>
3184
     *                              </tr>
3185
     *                              <tr valign="top">
3186
     *                              <td><b>ENT_XHTML</b></td>
3187
     *                              <td>
3188
     *                              Handle code as XHTML.
3189
     *                              </td>
3190
     *                              </tr>
3191
     *                              <tr valign="top">
3192
     *                              <td><b>ENT_HTML5</b></td>
3193
     *                              <td>
3194
     *                              Handle code as HTML 5.
3195
     *                              </td>
3196
     *                              </tr>
3197
     *                              </table>
3198
     *                              </p>
3199
     * @param string $encoding      [optional] <p>
3200
     *                              Like <b>htmlspecialchars</b>,
3201
     *                              <b>htmlentities</b> takes an optional third argument
3202
     *                              <i>encoding</i> which defines encoding used in
3203
     *                              conversion.
3204
     *                              Although this argument is technically optional, you are highly
3205
     *                              encouraged to specify the correct value for your code.
3206
     *                              </p>
3207
     * @param bool   $double_encode [optional] <p>
3208
     *                              When <i>double_encode</i> is turned off PHP will not
3209
     *                              encode existing html entities. The default is to convert everything.
3210
     *                              </p>
3211
     *
3212
     * @psalm-pure
3213
     *
3214
     * @return string
3215
     *                <p>
3216
     *                The encoded string.
3217
     *                <br><br>
3218
     *                If the input <i>string</i> contains an invalid code unit
3219
     *                sequence within the given <i>encoding</i> an empty string
3220
     *                will be returned, unless either the <b>ENT_IGNORE</b> or
3221
     *                <b>ENT_SUBSTITUTE</b> flags are set.
3222
     *                </p>
3223
     */
3224 9
    public static function htmlentities(
3225
        string $str,
3226
        int $flags = \ENT_COMPAT,
3227
        string $encoding = 'UTF-8',
3228
        bool $double_encode = true
3229
    ): string {
3230 9
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
3231 7
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
3232
        }
3233
3234 9
        $str = \htmlentities(
3235 9
            $str,
3236 9
            $flags,
3237 9
            $encoding,
3238 9
            $double_encode
3239
        );
3240
3241
        /**
3242
         * PHP doesn't replace a backslash to its html entity since this is something
3243
         * that's mostly used to escape characters when inserting in a database. Since
3244
         * we're using a decent database layer, we don't need this shit and we're replacing
3245
         * the double backslashes by its' html entity equivalent.
3246
         *
3247
         * https://github.com/forkcms/library/blob/master/spoon/filter/filter.php#L303
3248
         */
3249 9
        $str = \str_replace('\\', '&#92;', $str);
3250
3251 9
        return self::html_encode($str, true, $encoding);
3252
    }
3253
3254
    /**
3255
     * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
3256
     *
3257
     * INFO: Take a look at "UTF8::htmlentities()"
3258
     *
3259
     * EXAMPLE: <code>UTF8::htmlspecialchars('<白-öäü>'); // '&lt;白-öäü&gt;'</code>
3260
     *
3261
     * @see http://php.net/manual/en/function.htmlspecialchars.php
3262
     *
3263
     * @param string $str           <p>
3264
     *                              The string being converted.
3265
     *                              </p>
3266
     * @param int    $flags         [optional] <p>
3267
     *                              A bitmask of one or more of the following flags, which specify how to handle
3268
     *                              quotes, invalid code unit sequences and the used document type. The default is
3269
     *                              ENT_COMPAT | ENT_HTML401.
3270
     *                              <table>
3271
     *                              Available <i>flags</i> constants
3272
     *                              <tr valign="top">
3273
     *                              <td>Constant Name</td>
3274
     *                              <td>Description</td>
3275
     *                              </tr>
3276
     *                              <tr valign="top">
3277
     *                              <td><b>ENT_COMPAT</b></td>
3278
     *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
3279
     *                              </tr>
3280
     *                              <tr valign="top">
3281
     *                              <td><b>ENT_QUOTES</b></td>
3282
     *                              <td>Will convert both double and single quotes.</td>
3283
     *                              </tr>
3284
     *                              <tr valign="top">
3285
     *                              <td><b>ENT_NOQUOTES</b></td>
3286
     *                              <td>Will leave both double and single quotes unconverted.</td>
3287
     *                              </tr>
3288
     *                              <tr valign="top">
3289
     *                              <td><b>ENT_IGNORE</b></td>
3290
     *                              <td>
3291
     *                              Silently discard invalid code unit sequences instead of returning
3292
     *                              an empty string. Using this flag is discouraged as it
3293
     *                              may have security implications.
3294
     *                              </td>
3295
     *                              </tr>
3296
     *                              <tr valign="top">
3297
     *                              <td><b>ENT_SUBSTITUTE</b></td>
3298
     *                              <td>
3299
     *                              Replace invalid code unit sequences with a Unicode Replacement Character
3300
     *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty
3301
     *                              string.
3302
     *                              </td>
3303
     *                              </tr>
3304
     *                              <tr valign="top">
3305
     *                              <td><b>ENT_DISALLOWED</b></td>
3306
     *                              <td>
3307
     *                              Replace invalid code points for the given document type with a
3308
     *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
3309
     *                              (otherwise) instead of leaving them as is. This may be useful, for
3310
     *                              instance, to ensure the well-formedness of XML documents with
3311
     *                              embedded external content.
3312
     *                              </td>
3313
     *                              </tr>
3314
     *                              <tr valign="top">
3315
     *                              <td><b>ENT_HTML401</b></td>
3316
     *                              <td>
3317
     *                              Handle code as HTML 4.01.
3318
     *                              </td>
3319
     *                              </tr>
3320
     *                              <tr valign="top">
3321
     *                              <td><b>ENT_XML1</b></td>
3322
     *                              <td>
3323
     *                              Handle code as XML 1.
3324
     *                              </td>
3325
     *                              </tr>
3326
     *                              <tr valign="top">
3327
     *                              <td><b>ENT_XHTML</b></td>
3328
     *                              <td>
3329
     *                              Handle code as XHTML.
3330
     *                              </td>
3331
     *                              </tr>
3332
     *                              <tr valign="top">
3333
     *                              <td><b>ENT_HTML5</b></td>
3334
     *                              <td>
3335
     *                              Handle code as HTML 5.
3336
     *                              </td>
3337
     *                              </tr>
3338
     *                              </table>
3339
     *                              </p>
3340
     * @param string $encoding      [optional] <p>
3341
     *                              Defines encoding used in conversion.
3342
     *                              </p>
3343
     *                              <p>
3344
     *                              For the purposes of this function, the encodings
3345
     *                              ISO-8859-1, ISO-8859-15,
3346
     *                              UTF-8, cp866,
3347
     *                              cp1251, cp1252, and
3348
     *                              KOI8-R are effectively equivalent, provided the
3349
     *                              <i>string</i> itself is valid for the encoding, as
3350
     *                              the characters affected by <b>htmlspecialchars</b> occupy
3351
     *                              the same positions in all of these encodings.
3352
     *                              </p>
3353
     * @param bool   $double_encode [optional] <p>
3354
     *                              When <i>double_encode</i> is turned off PHP will not
3355
     *                              encode existing html entities, the default is to convert everything.
3356
     *                              </p>
3357
     *
3358
     * @psalm-pure
3359
     *
3360
     * @return string the converted string.
3361
     *                </p>
3362
     *                <p>
3363
     *                If the input <i>string</i> contains an invalid code unit
3364
     *                sequence within the given <i>encoding</i> an empty string
3365
     *                will be returned, unless either the <b>ENT_IGNORE</b> or
3366
     *                <b>ENT_SUBSTITUTE</b> flags are set
3367
     */
3368 8
    public static function htmlspecialchars(
3369
        string $str,
3370
        int $flags = \ENT_COMPAT,
3371
        string $encoding = 'UTF-8',
3372
        bool $double_encode = true
3373
    ): string {
3374 8
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
3375 8
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
3376
        }
3377
3378 8
        return \htmlspecialchars(
3379 8
            $str,
3380 8
            $flags,
3381 8
            $encoding,
3382 8
            $double_encode
3383
        );
3384
    }
3385
3386
    /**
3387
     * Checks whether iconv is available on the server.
3388
     *
3389
     * @psalm-pure
3390
     *
3391
     * @return bool
3392
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
3393
     *
3394
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
3395
     */
3396
    public static function iconv_loaded(): bool
3397
    {
3398
        return \extension_loaded('iconv');
3399
    }
3400
3401
    /**
3402
     * alias for "UTF8::decimal_to_chr()"
3403
     *
3404
     * @param int|string $int
3405
     *
3406
     * @psalm-param int|numeric-string $int
3407
     *
3408
     * @psalm-pure
3409
     *
3410
     * @return string
3411
     *
3412
     * @see        UTF8::decimal_to_chr()
3413
     * @deprecated <p>please use "UTF8::decimal_to_chr()"</p>
3414
     */
3415 4
    public static function int_to_chr($int): string
3416
    {
3417 4
        return self::decimal_to_chr($int);
3418
    }
3419
3420
    /**
3421
     * Converts Integer to hexadecimal U+xxxx code point representation.
3422
     *
3423
     * INFO: opposite to UTF8::hex_to_int()
3424
     *
3425
     * EXAMPLE: <code>UTF8::int_to_hex(241); // 'U+00f1'</code>
3426
     *
3427
     * @param int    $int    <p>The integer to be converted to hexadecimal code point.</p>
3428
     * @param string $prefix [optional]
3429
     *
3430
     * @psalm-pure
3431
     *
3432
     * @return string the code point, or empty string on failure
3433
     */
3434 6
    public static function int_to_hex(int $int, string $prefix = 'U+'): string
3435
    {
3436 6
        $hex = \dechex($int);
3437
3438 6
        $hex = (\strlen($hex) < 4 ? \substr('0000' . $hex, -4) : $hex);
3439
3440 6
        return $prefix . $hex . '';
3441
    }
3442
3443
    /**
3444
     * Checks whether intl-char is available on the server.
3445
     *
3446
     * @psalm-pure
3447
     *
3448
     * @return bool
3449
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
3450
     *
3451
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
3452
     */
3453
    public static function intlChar_loaded(): bool
3454
    {
3455
        return \class_exists('IntlChar');
3456
    }
3457
3458
    /**
3459
     * Checks whether intl is available on the server.
3460
     *
3461
     * @psalm-pure
3462
     *
3463
     * @return bool
3464
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
3465
     *
3466
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
3467
     */
3468 5
    public static function intl_loaded(): bool
3469
    {
3470 5
        return \extension_loaded('intl');
3471
    }
3472
3473
    /**
3474
     * alias for "UTF8::is_ascii()"
3475
     *
3476
     * @param string $str
3477
     *
3478
     * @psalm-pure
3479
     *
3480
     * @return bool
3481
     *
3482
     * @see        UTF8::is_ascii()
3483
     * @deprecated <p>please use "UTF8::is_ascii()"</p>
3484
     */
3485 2
    public static function isAscii(string $str): bool
3486
    {
3487 2
        return ASCII::is_ascii($str);
3488
    }
3489
3490
    /**
3491
     * alias for "UTF8::is_base64()"
3492
     *
3493
     * @param string $str
3494
     *
3495
     * @psalm-pure
3496
     *
3497
     * @return bool
3498
     *
3499
     * @see        UTF8::is_base64()
3500
     * @deprecated <p>please use "UTF8::is_base64()"</p>
3501
     */
3502 2
    public static function isBase64($str): bool
3503
    {
3504 2
        return self::is_base64($str);
3505
    }
3506
3507
    /**
3508
     * alias for "UTF8::is_binary()"
3509
     *
3510
     * @param int|string $str
3511
     * @param bool       $strict
3512
     *
3513
     * @psalm-pure
3514
     *
3515
     * @return bool
3516
     *
3517
     * @see        UTF8::is_binary()
3518
     * @deprecated <p>please use "UTF8::is_binary()"</p>
3519
     */
3520 4
    public static function isBinary($str, bool $strict = false): bool
3521
    {
3522 4
        return self::is_binary($str, $strict);
3523
    }
3524
3525
    /**
3526
     * alias for "UTF8::is_bom()"
3527
     *
3528
     * @param string $utf8_chr
3529
     *
3530
     * @psalm-pure
3531
     *
3532
     * @return bool
3533
     *
3534
     * @see        UTF8::is_bom()
3535
     * @deprecated <p>please use "UTF8::is_bom()"</p>
3536
     */
3537 2
    public static function isBom(string $utf8_chr): bool
3538
    {
3539 2
        return self::is_bom($utf8_chr);
3540
    }
3541
3542
    /**
3543
     * alias for "UTF8::is_html()"
3544
     *
3545
     * @param string $str
3546
     *
3547
     * @psalm-pure
3548
     *
3549
     * @return bool
3550
     *
3551
     * @see        UTF8::is_html()
3552
     * @deprecated <p>please use "UTF8::is_html()"</p>
3553
     */
3554 2
    public static function isHtml(string $str): bool
3555
    {
3556 2
        return self::is_html($str);
3557
    }
3558
3559
    /**
3560
     * alias for "UTF8::is_json()"
3561
     *
3562
     * @param string $str
3563
     *
3564
     * @return bool
3565
     *
3566
     * @see        UTF8::is_json()
3567
     * @deprecated <p>please use "UTF8::is_json()"</p>
3568
     */
3569 1
    public static function isJson(string $str): bool
3570
    {
3571 1
        return self::is_json($str);
3572
    }
3573
3574
    /**
3575
     * alias for "UTF8::is_utf16()"
3576
     *
3577
     * @param string $str
3578
     *
3579
     * @psalm-pure
3580
     *
3581
     * @return false|int
3582
     *                   <strong>false</strong> if is't not UTF16,<br>
3583
     *                   <strong>1</strong> for UTF-16LE,<br>
3584
     *                   <strong>2</strong> for UTF-16BE
3585
     *
3586
     * @see        UTF8::is_utf16()
3587
     * @deprecated <p>please use "UTF8::is_utf16()"</p>
3588
     */
3589 2
    public static function isUtf16($str)
3590
    {
3591 2
        return self::is_utf16($str);
3592
    }
3593
3594
    /**
3595
     * alias for "UTF8::is_utf32()"
3596
     *
3597
     * @param string $str
3598
     *
3599
     * @psalm-pure
3600
     *
3601
     * @return false|int
3602
     *                   <strong>false</strong> if is't not UTF16,
3603
     *                   <strong>1</strong> for UTF-32LE,
3604
     *                   <strong>2</strong> for UTF-32BE
3605
     *
3606
     * @see        UTF8::is_utf32()
3607
     * @deprecated <p>please use "UTF8::is_utf32()"</p>
3608
     */
3609 2
    public static function isUtf32($str)
3610
    {
3611 2
        return self::is_utf32($str);
3612
    }
3613
3614
    /**
3615
     * alias for "UTF8::is_utf8()"
3616
     *
3617
     * @param string $str
3618
     * @param bool   $strict
3619
     *
3620
     * @psalm-pure
3621
     *
3622
     * @return bool
3623
     *
3624
     * @see        UTF8::is_utf8()
3625
     * @deprecated <p>please use "UTF8::is_utf8()"</p>
3626
     */
3627 17
    public static function isUtf8($str, bool $strict = false): bool
3628
    {
3629 17
        return self::is_utf8($str, $strict);
3630
    }
3631
3632
    /**
3633
     * Returns true if the string contains only alphabetic chars, false otherwise.
3634
     *
3635
     * @param string $str <p>The input string.</p>
3636
     *
3637
     * @psalm-pure
3638
     *
3639
     * @return bool
3640
     *              <p>Whether or not $str contains only alphabetic chars.</p>
3641
     */
3642 10
    public static function is_alpha(string $str): bool
3643
    {
3644 10
        if (self::$SUPPORT['mbstring'] === true) {
3645
            /** @noinspection PhpComposerExtensionStubsInspection */
3646 10
            return \mb_ereg_match('^[[:alpha:]]*$', $str);
3647
        }
3648
3649
        return self::str_matches_pattern($str, '^[[:alpha:]]*$');
3650
    }
3651
3652
    /**
3653
     * Returns true if the string contains only alphabetic and numeric chars, false otherwise.
3654
     *
3655
     * @param string $str <p>The input string.</p>
3656
     *
3657
     * @psalm-pure
3658
     *
3659
     * @return bool
3660
     *              <p>Whether or not $str contains only alphanumeric chars.</p>
3661
     */
3662 13
    public static function is_alphanumeric(string $str): bool
3663
    {
3664 13
        if (self::$SUPPORT['mbstring'] === true) {
3665
            /** @noinspection PhpComposerExtensionStubsInspection */
3666 13
            return \mb_ereg_match('^[[:alnum:]]*$', $str);
3667
        }
3668
3669
        return self::str_matches_pattern($str, '^[[:alnum:]]*$');
3670
    }
3671
3672
    /**
3673
     * Returns true if the string contains only punctuation chars, false otherwise.
3674
     *
3675
     * @param string $str <p>The input string.</p>
3676
     *
3677
     * @psalm-pure
3678
     *
3679
     * @return bool
3680
     *              <p>Whether or not $str contains only punctuation chars.</p>
3681
     */
3682 10
    public static function is_punctuation(string $str): bool
3683
    {
3684 10
        return self::str_matches_pattern($str, '^[[:punct:]]*$');
3685
    }
3686
3687
    /**
3688
     * Returns true if the string contains only printable (non-invisible) chars, false otherwise.
3689
     *
3690
     * @param string $str <p>The input string.</p>
3691
     *
3692
     * @psalm-pure
3693
     *
3694
     * @return bool
3695
     *              <p>Whether or not $str contains only printable (non-invisible) chars.</p>
3696
     */
3697 1
    public static function is_printable(string $str): bool
3698
    {
3699 1
        return self::remove_invisible_characters($str) === $str;
3700
    }
3701
3702
    /**
3703
     * Checks if a string is 7 bit ASCII.
3704
     *
3705
     * EXAMPLE: <code>UTF8::is_ascii('白'); // false</code>
3706
     *
3707
     * @param string $str <p>The string to check.</p>
3708
     *
3709
     * @psalm-pure
3710
     *
3711
     * @return bool
3712
     *              <p>
3713
     *              <strong>true</strong> if it is ASCII<br>
3714
     *              <strong>false</strong> otherwise
3715
     *              </p>
3716
     */
3717 8
    public static function is_ascii(string $str): bool
3718
    {
3719 8
        return ASCII::is_ascii($str);
3720
    }
3721
3722
    /**
3723
     * Returns true if the string is base64 encoded, false otherwise.
3724
     *
3725
     * EXAMPLE: <code>UTF8::is_base64('4KSu4KWL4KSo4KS/4KSa'); // true</code>
3726
     *
3727
     * @param string|null $str                   <p>The input string.</p>
3728
     * @param bool        $empty_string_is_valid [optional] <p>Is an empty string valid base64 or not?</p>
3729
     *
3730
     * @psalm-pure
3731
     *
3732
     * @return bool
3733
     *              <p>Whether or not $str is base64 encoded.</p>
3734
     */
3735 16
    public static function is_base64($str, bool $empty_string_is_valid = false): bool
3736
    {
3737
        if (
3738 16
            !$empty_string_is_valid
3739
            &&
3740 16
            $str === ''
3741
        ) {
3742 3
            return false;
3743
        }
3744
3745 15
        if (!\is_string($str)) {
3746 2
            return false;
3747
        }
3748
3749 15
        $base64String = \base64_decode($str, true);
3750
3751 15
        return $base64String !== false && \base64_encode($base64String) === $str;
3752
    }
3753
3754
    /**
3755
     * Check if the input is binary... (is look like a hack).
3756
     *
3757
     * EXAMPLE: <code>UTF8::is_binary(01); // true</code>
3758
     *
3759
     * @param int|string $input
3760
     * @param bool       $strict
3761
     *
3762
     * @psalm-pure
3763
     *
3764
     * @return bool
3765
     */
3766 40
    public static function is_binary($input, bool $strict = false): bool
3767
    {
3768 40
        $input = (string) $input;
3769 40
        if ($input === '') {
3770 10
            return false;
3771
        }
3772
3773 40
        if (\preg_match('~^[01]+$~', $input)) {
3774 13
            return true;
3775
        }
3776
3777 40
        $ext = self::get_file_type($input);
3778 40
        if ($ext['type'] === 'binary') {
3779 7
            return true;
3780
        }
3781
3782 39
        $test_length = \strlen($input);
3783 39
        $test_null_counting = \substr_count($input, "\x0", 0, $test_length);
3784 39
        if (($test_null_counting / $test_length) > 0.25) {
3785 15
            return true;
3786
        }
3787
3788 35
        if ($strict) {
3789 35
            if (self::$SUPPORT['finfo'] === false) {
3790
                throw new \RuntimeException('ext-fileinfo: is not installed');
3791
            }
3792
3793
            /**
3794
             * @noinspection   PhpComposerExtensionStubsInspection
3795
             * @psalm-suppress ImpureMethodCall - it will return the same result for the same file ...
3796
             */
3797 35
            $finfo_encoding = (new \finfo(\FILEINFO_MIME_ENCODING))->buffer($input);
3798 35
            if ($finfo_encoding && $finfo_encoding === 'binary') {
3799 15
                return true;
3800
            }
3801
        }
3802
3803 31
        return false;
3804
    }
3805
3806
    /**
3807
     * Check if the file is binary.
3808
     *
3809
     * EXAMPLE: <code>UTF8::is_binary('./utf32.txt'); // true</code>
3810
     *
3811
     * @param string $file
3812
     *
3813
     * @return bool
3814
     */
3815 6
    public static function is_binary_file($file): bool
3816
    {
3817
        // init
3818 6
        $block = '';
3819
3820 6
        $fp = \fopen($file, 'rb');
3821 6
        if (\is_resource($fp)) {
3822 6
            $block = \fread($fp, 512);
3823 6
            \fclose($fp);
3824
        }
3825
3826 6
        if ($block === '' || $block === false) {
3827 2
            return false;
3828
        }
3829
3830 6
        return self::is_binary($block, true);
3831
    }
3832
3833
    /**
3834
     * Returns true if the string contains only whitespace chars, false otherwise.
3835
     *
3836
     * @param string $str <p>The input string.</p>
3837
     *
3838
     * @psalm-pure
3839
     *
3840
     * @return bool
3841
     *              <p>Whether or not $str contains only whitespace characters.</p>
3842
     */
3843 15
    public static function is_blank(string $str): bool
3844
    {
3845 15
        if (self::$SUPPORT['mbstring'] === true) {
3846
            /** @noinspection PhpComposerExtensionStubsInspection */
3847 15
            return \mb_ereg_match('^[[:space:]]*$', $str);
3848
        }
3849
3850
        return self::str_matches_pattern($str, '^[[:space:]]*$');
3851
    }
3852
3853
    /**
3854
     * Checks if the given string is equal to any "Byte Order Mark".
3855
     *
3856
     * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
3857
     *
3858
     * EXAMPLE: <code>UTF8::is_bom("\xef\xbb\xbf"); // true</code>
3859
     *
3860
     * @param string $str <p>The input string.</p>
3861
     *
3862
     * @psalm-pure
3863
     *
3864
     * @return bool
3865
     *              <p><strong>true</strong> if the $utf8_chr is Byte Order Mark, <strong>false</strong> otherwise.</p>
3866
     */
3867 2
    public static function is_bom($str): bool
3868
    {
3869
        /** @noinspection PhpUnusedLocalVariableInspection */
3870 2
        foreach (self::$BOM as $bom_string => &$bom_byte_length) {
3871 2
            if ($str === $bom_string) {
3872 2
                return true;
3873
            }
3874
        }
3875
3876 2
        return false;
3877
    }
3878
3879
    /**
3880
     * Determine whether the string is considered to be empty.
3881
     *
3882
     * A variable is considered empty if it does not exist or if its value equals FALSE.
3883
     * empty() does not generate a warning if the variable does not exist.
3884
     *
3885
     * @param array|float|int|string $str
3886
     *
3887
     * @psalm-pure
3888
     *
3889
     * @return bool
3890
     *              <p>Whether or not $str is empty().</p>
3891
     */
3892 1
    public static function is_empty($str): bool
3893
    {
3894 1
        return empty($str);
3895
    }
3896
3897
    /**
3898
     * Returns true if the string contains only hexadecimal chars, false otherwise.
3899
     *
3900
     * @param string $str <p>The input string.</p>
3901
     *
3902
     * @psalm-pure
3903
     *
3904
     * @return bool
3905
     *              <p>Whether or not $str contains only hexadecimal chars.</p>
3906
     */
3907 13
    public static function is_hexadecimal(string $str): bool
3908
    {
3909 13
        if (self::$SUPPORT['mbstring'] === true) {
3910
            /** @noinspection PhpComposerExtensionStubsInspection */
3911 13
            return \mb_ereg_match('^[[:xdigit:]]*$', $str);
3912
        }
3913
3914
        return self::str_matches_pattern($str, '^[[:xdigit:]]*$');
3915
    }
3916
3917
    /**
3918
     * Check if the string contains any HTML tags.
3919
     *
3920
     * EXAMPLE: <code>UTF8::is_html('<b>lall</b>'); // true</code>
3921
     *
3922
     * @param string $str <p>The input string.</p>
3923
     *
3924
     * @psalm-pure
3925
     *
3926
     * @return bool
3927
     *              <p>Whether or not $str contains html elements.</p>
3928
     */
3929 3
    public static function is_html(string $str): bool
3930
    {
3931 3
        if ($str === '') {
3932 3
            return false;
3933
        }
3934
3935
        // init
3936 3
        $matches = [];
3937
3938 3
        $str = self::emoji_encode($str); // hack for emoji support :/
3939
3940 3
        \preg_match("/<\\/?\\w+(?:(?:\\s+\\w+(?:\\s*=\\s*(?:\".*?\"|'.*?'|[^'\">\\s]+))?)*\\s*|\\s*)\\/?>/u", $str, $matches);
3941
3942 3
        return $matches !== [];
3943
    }
3944
3945
    /**
3946
     * Check if $url is an correct url.
3947
     *
3948
     * @param string $url
3949
     * @param bool   $disallow_localhost
3950
     *
3951
     * @psalm-pure
3952
     *
3953
     * @return bool
3954
     */
3955 1
    public static function is_url(string $url, bool $disallow_localhost = false): bool
3956
    {
3957 1
        if ($url === '') {
3958 1
            return false;
3959
        }
3960
3961
        // WARNING: keep this as hack protection
3962 1
        if (!self::str_istarts_with_any($url, ['http://', 'https://'])) {
3963 1
            return false;
3964
        }
3965
3966
        // e.g. -> the server itself connect to "https://foo.localhost/phpmyadmin/...
3967 1
        if ($disallow_localhost) {
3968 1
            if (self::str_istarts_with_any(
3969 1
                $url,
3970
                [
3971 1
                    'http://localhost',
3972
                    'https://localhost',
3973
                    'http://127.0.0.1',
3974
                    'https://127.0.0.1',
3975
                    'http://::1',
3976
                    'https://::1',
3977
                ]
3978
            )) {
3979 1
                return false;
3980
            }
3981
3982 1
            $regex = '/^(?:http(?:s)?:\/\/).*?(?:\.localhost)/iu';
3983
            /** @noinspection BypassedUrlValidationInspection */
3984 1
            if (\preg_match($regex, $url)) {
3985 1
                return false;
3986
            }
3987
        }
3988
3989
        // INFO: this is needed for e.g. "http://müller.de/" (internationalized domain names) and non ASCII-parameters
3990
        /** @noinspection SuspiciousAssignmentsInspection - false-positive - https://github.com/kalessil/phpinspectionsea/issues/1500 */
3991 1
        $regex = '/^(?:http(?:s)?:\\/\\/)(?:[\p{L}0-9][\p{L}0-9_-]*(?:\\.[\p{L}0-9][\p{L}0-9_-]*))(?:\\d+)?(?:\\/\\.*)?/iu';
3992
        /** @noinspection BypassedUrlValidationInspection */
3993 1
        if (\preg_match($regex, $url)) {
3994 1
            return true;
3995
        }
3996
3997
        /** @noinspection BypassedUrlValidationInspection */
3998 1
        return \filter_var($url, \FILTER_VALIDATE_URL) !== false;
3999
    }
4000
4001
    /**
4002
     * Try to check if "$str" is a JSON-string.
4003
     *
4004
     * EXAMPLE: <code>UTF8::is_json('{"array":[1,"¥","ä"]}'); // true</code>
4005
     *
4006
     * @param string $str                                    <p>The input string.</p>
4007
     * @param bool   $only_array_or_object_results_are_valid [optional] <p>Only array and objects are valid json
4008
     *                                                       results.</p>
4009
     *
4010
     * @return bool
4011
     *              <p>Whether or not the $str is in JSON format.</p>
4012
     */
4013 42
    public static function is_json(string $str, bool $only_array_or_object_results_are_valid = true): bool
4014
    {
4015 42
        if ($str === '') {
4016 4
            return false;
4017
        }
4018
4019 40
        if (self::$SUPPORT['json'] === false) {
4020
            throw new \RuntimeException('ext-json: is not installed');
4021
        }
4022
4023 40
        $jsonOrNull = self::json_decode($str);
4024 40
        if ($jsonOrNull === null && \strtoupper($str) !== 'NULL') {
4025 18
            return false;
4026
        }
4027
4028
        if (
4029 24
            $only_array_or_object_results_are_valid
4030
            &&
4031 24
            !\is_object($jsonOrNull)
4032
            &&
4033 24
            !\is_array($jsonOrNull)
4034
        ) {
4035 5
            return false;
4036
        }
4037
4038
        /** @noinspection PhpComposerExtensionStubsInspection */
4039 19
        return \json_last_error() === \JSON_ERROR_NONE;
4040
    }
4041
4042
    /**
4043
     * @param string $str <p>The input string.</p>
4044
     *
4045
     * @psalm-pure
4046
     *
4047
     * @return bool
4048
     *              <p>Whether or not $str contains only lowercase chars.</p>
4049
     */
4050 8
    public static function is_lowercase(string $str): bool
4051
    {
4052 8
        if (self::$SUPPORT['mbstring'] === true) {
4053
            /** @noinspection PhpComposerExtensionStubsInspection */
4054 8
            return \mb_ereg_match('^[[:lower:]]*$', $str);
4055
        }
4056
4057
        return self::str_matches_pattern($str, '^[[:lower:]]*$');
4058
    }
4059
4060
    /**
4061
     * Returns true if the string is serialized, false otherwise.
4062
     *
4063
     * @param string $str <p>The input string.</p>
4064
     *
4065
     * @psalm-pure
4066
     *
4067
     * @return bool
4068
     *              <p>Whether or not $str is serialized.</p>
4069
     */
4070 7
    public static function is_serialized(string $str): bool
4071
    {
4072 7
        if ($str === '') {
4073 1
            return false;
4074
        }
4075
4076
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
4077
        /** @noinspection UnserializeExploitsInspection */
4078 6
        return $str === 'b:0;'
4079
               ||
4080 6
               @\unserialize($str) !== false;
4081
    }
4082
4083
    /**
4084
     * Returns true if the string contains only lower case chars, false
4085
     * otherwise.
4086
     *
4087
     * @param string $str <p>The input string.</p>
4088
     *
4089
     * @psalm-pure
4090
     *
4091
     * @return bool
4092
     *              <p>Whether or not $str contains only lower case characters.</p>
4093
     */
4094 8
    public static function is_uppercase(string $str): bool
4095
    {
4096 8
        if (self::$SUPPORT['mbstring'] === true) {
4097
            /** @noinspection PhpComposerExtensionStubsInspection */
4098 8
            return \mb_ereg_match('^[[:upper:]]*$', $str);
4099
        }
4100
4101
        return self::str_matches_pattern($str, '^[[:upper:]]*$');
4102
    }
4103
4104
    /**
4105
     * Check if the string is UTF-16.
4106
     *
4107
     * EXAMPLE: <code>
4108
     * UTF8::is_utf16(file_get_contents('utf-16-le.txt')); // 1
4109
     * //
4110
     * UTF8::is_utf16(file_get_contents('utf-16-be.txt')); // 2
4111
     * //
4112
     * UTF8::is_utf16(file_get_contents('utf-8.txt')); // false
4113
     * </code>
4114
     *
4115
     * @param string $str                       <p>The input string.</p>
4116
     * @param bool   $check_if_string_is_binary
4117
     *
4118
     * @psalm-pure
4119
     *
4120
     * @return false|int
4121
     *                   <strong>false</strong> if is't not UTF-16,<br>
4122
     *                   <strong>1</strong> for UTF-16LE,<br>
4123
     *                   <strong>2</strong> for UTF-16BE
4124
     */
4125 22
    public static function is_utf16($str, bool $check_if_string_is_binary = true)
4126
    {
4127
        // init
4128 22
        $str = (string) $str;
4129 22
        $str_chars = [];
4130
4131
        if (
4132 22
            $check_if_string_is_binary
4133
            &&
4134 22
            !self::is_binary($str, true)
4135
        ) {
4136 2
            return false;
4137
        }
4138
4139 22
        if (self::$SUPPORT['mbstring'] === false) {
4140
            /**
4141
             * @psalm-suppress ImpureFunctionCall - is is only a warning
4142
             */
4143 3
            \trigger_error('UTF8::is_utf16() without mbstring may did not work correctly', \E_USER_WARNING);
4144
        }
4145
4146 22
        $str = self::remove_bom($str);
4147
4148 22
        $maybe_utf16le = 0;
4149 22
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
4150 22
        if ($test) {
4151 15
            $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
4152 15
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
4153 15
            if ($test3 === $test) {
4154
                /**
4155
                 * @psalm-suppress RedundantCondition
4156
                 */
4157 15
                if ($str_chars === []) {
4158 15
                    $str_chars = self::count_chars($str, true, false);
4159
                }
4160 15
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
4161 15
                    if (\in_array($test3char, $str_chars, true)) {
4162 15
                        ++$maybe_utf16le;
4163
                    }
4164
                }
4165 15
                unset($test3charEmpty);
4166
            }
4167
        }
4168
4169 22
        $maybe_utf16be = 0;
4170 22
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
4171 22
        if ($test) {
4172 15
            $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
4173 15
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
4174 15
            if ($test3 === $test) {
4175 15
                if ($str_chars === []) {
4176 7
                    $str_chars = self::count_chars($str, true, false);
4177
                }
4178 15
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
4179 15
                    if (\in_array($test3char, $str_chars, true)) {
4180 15
                        ++$maybe_utf16be;
4181
                    }
4182
                }
4183 15
                unset($test3charEmpty);
4184
            }
4185
        }
4186
4187 22
        if ($maybe_utf16be !== $maybe_utf16le) {
4188 7
            if ($maybe_utf16le > $maybe_utf16be) {
4189 5
                return 1;
4190
            }
4191
4192 6
            return 2;
4193
        }
4194
4195 18
        return false;
4196
    }
4197
4198
    /**
4199
     * Check if the string is UTF-32.
4200
     *
4201
     * EXAMPLE: <code>
4202
     * UTF8::is_utf32(file_get_contents('utf-32-le.txt')); // 1
4203
     * //
4204
     * UTF8::is_utf32(file_get_contents('utf-32-be.txt')); // 2
4205
     * //
4206
     * UTF8::is_utf32(file_get_contents('utf-8.txt')); // false
4207
     * </code>
4208
     *
4209
     * @param string $str                       <p>The input string.</p>
4210
     * @param bool   $check_if_string_is_binary
4211
     *
4212
     * @psalm-pure
4213
     *
4214
     * @return false|int
4215
     *                   <strong>false</strong> if is't not UTF-32,<br>
4216
     *                   <strong>1</strong> for UTF-32LE,<br>
4217
     *                   <strong>2</strong> for UTF-32BE
4218
     */
4219 20
    public static function is_utf32($str, bool $check_if_string_is_binary = true)
4220
    {
4221
        // init
4222 20
        $str = (string) $str;
4223 20
        $str_chars = [];
4224
4225
        if (
4226 20
            $check_if_string_is_binary
4227
            &&
4228 20
            !self::is_binary($str, true)
4229
        ) {
4230 2
            return false;
4231
        }
4232
4233 20
        if (self::$SUPPORT['mbstring'] === false) {
4234
            /**
4235
             * @psalm-suppress ImpureFunctionCall - is is only a warning
4236
             */
4237 3
            \trigger_error('UTF8::is_utf32() without mbstring may did not work correctly', \E_USER_WARNING);
4238
        }
4239
4240 20
        $str = self::remove_bom($str);
4241
4242 20
        $maybe_utf32le = 0;
4243 20
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
4244 20
        if ($test) {
4245 13
            $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
4246 13
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
4247 13
            if ($test3 === $test) {
4248
                /**
4249
                 * @psalm-suppress RedundantCondition
4250
                 */
4251 13
                if ($str_chars === []) {
4252 13
                    $str_chars = self::count_chars($str, true, false);
4253
                }
4254 13
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
4255 13
                    if (\in_array($test3char, $str_chars, true)) {
4256 13
                        ++$maybe_utf32le;
4257
                    }
4258
                }
4259 13
                unset($test3charEmpty);
4260
            }
4261
        }
4262
4263 20
        $maybe_utf32be = 0;
4264 20
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
4265 20
        if ($test) {
4266 13
            $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
4267 13
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
4268 13
            if ($test3 === $test) {
4269 13
                if ($str_chars === []) {
4270 7
                    $str_chars = self::count_chars($str, true, false);
4271
                }
4272 13
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
4273 13
                    if (\in_array($test3char, $str_chars, true)) {
4274 13
                        ++$maybe_utf32be;
4275
                    }
4276
                }
4277 13
                unset($test3charEmpty);
4278
            }
4279
        }
4280
4281 20
        if ($maybe_utf32be !== $maybe_utf32le) {
4282 3
            if ($maybe_utf32le > $maybe_utf32be) {
4283 2
                return 1;
4284
            }
4285
4286 3
            return 2;
4287
        }
4288
4289 20
        return false;
4290
    }
4291
4292
    /**
4293
     * Checks whether the passed input contains only byte sequences that appear valid UTF-8.
4294
     *
4295
     * EXAMPLE: <code>
4296
     * UTF8::is_utf8(['Iñtërnâtiônàlizætiøn', 'foo']); // true
4297
     * //
4298
     * UTF8::is_utf8(["Iñtërnâtiônàlizætiøn\xA0\xA1", 'bar']); // false
4299
     * </code>
4300
     *
4301
     * @param int|string|string[]|null $str    <p>The input to be checked.</p>
4302
     * @param bool                     $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
4303
     *
4304
     * @psalm-pure
4305
     *
4306
     * @return bool
4307
     */
4308 83
    public static function is_utf8($str, bool $strict = false): bool
4309
    {
4310 83
        if (\is_array($str)) {
4311 2
            foreach ($str as &$v) {
4312 2
                if (!self::is_utf8($v, $strict)) {
4313 2
                    return false;
4314
                }
4315
            }
4316
4317
            return true;
4318
        }
4319
4320 83
        return self::is_utf8_string((string) $str, $strict);
4321
    }
4322
4323
    /**
4324
     * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
4325
     * Decodes a JSON string
4326
     *
4327
     * EXAMPLE: <code>UTF8::json_decode('[1,"\u00a5","\u00e4"]'); // array(1, '¥', 'ä')</code>
4328
     *
4329
     * @see http://php.net/manual/en/function.json-decode.php
4330
     *
4331
     * @param string $json    <p>
4332
     *                        The <i>json</i> string being decoded.
4333
     *                        </p>
4334
     *                        <p>
4335
     *                        This function only works with UTF-8 encoded strings.
4336
     *                        </p>
4337
     *                        <p>PHP implements a superset of
4338
     *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
4339
     *                        only supports these values when they are nested inside an array or an object.
4340
     *                        </p>
4341
     * @param bool   $assoc   [optional] <p>
4342
     *                        When <b>TRUE</b>, returned objects will be converted into
4343
     *                        associative arrays.
4344
     *                        </p>
4345
     * @param int    $depth   [optional] <p>
4346
     *                        User specified recursion depth.
4347
     *                        </p>
4348
     * @param int    $options [optional] <p>
4349
     *                        Bitmask of JSON decode options. Currently only
4350
     *                        <b>JSON_BIGINT_AS_STRING</b>
4351
     *                        is supported (default is to cast large integers as floats)
4352
     *                        </p>
4353
     *
4354
     * @psalm-pure
4355
     *
4356
     * @return mixed
4357
     *               <p>The value encoded in <i>json</i> in appropriate PHP type. Values true, false and
4358
     *               null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b> and <b>NULL</b> respectively.
4359
     *               <b>NULL</b> is returned if the <i>json</i> cannot be decoded or if the encoded data
4360
     *               is deeper than the recursion limit.</p>
4361
     */
4362 43
    public static function json_decode(
4363
        string $json,
4364
        bool $assoc = false,
4365
        int $depth = 512,
4366
        int $options = 0
4367
    ) {
4368 43
        $json = self::filter($json);
4369
4370 43
        if (self::$SUPPORT['json'] === false) {
4371
            throw new \RuntimeException('ext-json: is not installed');
4372
        }
4373
4374
        /** @noinspection PhpComposerExtensionStubsInspection */
4375 43
        return \json_decode($json, $assoc, $depth, $options);
4376
    }
4377
4378
    /**
4379
     * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
4380
     * Returns the JSON representation of a value.
4381
     *
4382
     * EXAMPLE: <code>UTF8::json_enocde(array(1, '¥', 'ä')); // '[1,"\u00a5","\u00e4"]'</code>
4383
     *
4384
     * @see http://php.net/manual/en/function.json-encode.php
4385
     *
4386
     * @param mixed $value   <p>
4387
     *                       The <i>value</i> being encoded. Can be any type except
4388
     *                       a resource.
4389
     *                       </p>
4390
     *                       <p>
4391
     *                       All string data must be UTF-8 encoded.
4392
     *                       </p>
4393
     *                       <p>PHP implements a superset of
4394
     *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
4395
     *                       only supports these values when they are nested inside an array or an object.
4396
     *                       </p>
4397
     * @param int   $options [optional] <p>
4398
     *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
4399
     *                       <b>JSON_HEX_TAG</b>,
4400
     *                       <b>JSON_HEX_AMP</b>,
4401
     *                       <b>JSON_HEX_APOS</b>,
4402
     *                       <b>JSON_NUMERIC_CHECK</b>,
4403
     *                       <b>JSON_PRETTY_PRINT</b>,
4404
     *                       <b>JSON_UNESCAPED_SLASHES</b>,
4405
     *                       <b>JSON_FORCE_OBJECT</b>,
4406
     *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
4407
     *                       constants is described on
4408
     *                       the JSON constants page.
4409
     *                       </p>
4410
     * @param int   $depth   [optional] <p>
4411
     *                       Set the maximum depth. Must be greater than zero.
4412
     *                       </p>
4413
     *
4414
     * @psalm-pure
4415
     *
4416
     * @return false|string
4417
     *                      A JSON encoded <strong>string</strong> on success or<br>
4418
     *                      <strong>FALSE</strong> on failure
4419
     */
4420 5
    public static function json_encode($value, int $options = 0, int $depth = 512)
4421
    {
4422 5
        $value = self::filter($value);
4423
4424 5
        if (self::$SUPPORT['json'] === false) {
4425
            throw new \RuntimeException('ext-json: is not installed');
4426
        }
4427
4428
        /** @noinspection PhpComposerExtensionStubsInspection */
4429 5
        return \json_encode($value, $options, $depth);
4430
    }
4431
4432
    /**
4433
     * Checks whether JSON is available on the server.
4434
     *
4435
     * @psalm-pure
4436
     *
4437
     * @return bool
4438
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
4439
     *
4440
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
4441
     */
4442
    public static function json_loaded(): bool
4443
    {
4444
        return \function_exists('json_decode');
4445
    }
4446
4447
    /**
4448
     * Makes string's first char lowercase.
4449
     *
4450
     * EXAMPLE: <code>UTF8::lcfirst('ÑTËRNÂTIÔNÀLIZÆTIØN'); // ñTËRNÂTIÔNÀLIZÆTIØN</code>
4451
     *
4452
     * @param string      $str                           <p>The input string</p>
4453
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
4454
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
4455
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
4456
     *                                                   tr</p>
4457
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
4458
     *                                                   -> ß</p>
4459
     *
4460
     * @psalm-pure
4461
     *
4462
     * @return string the resulting string
4463
     */
4464 46
    public static function lcfirst(
4465
        string $str,
4466
        string $encoding = 'UTF-8',
4467
        bool $clean_utf8 = false,
4468
        string $lang = null,
4469
        bool $try_to_keep_the_string_length = false
4470
    ): string {
4471 46
        if ($clean_utf8) {
4472
            $str = self::clean($str);
4473
        }
4474
4475 46
        $use_mb_functions = ($lang === null && !$try_to_keep_the_string_length);
4476
4477 46
        if ($encoding === 'UTF-8') {
4478 43
            $str_part_two = (string) \mb_substr($str, 1);
4479
4480 43
            if ($use_mb_functions) {
4481 43
                $str_part_one = \mb_strtolower(
4482 43
                    (string) \mb_substr($str, 0, 1)
4483
                );
4484
            } else {
4485
                $str_part_one = self::strtolower(
4486
                    (string) \mb_substr($str, 0, 1),
4487
                    $encoding,
4488
                    false,
4489
                    $lang,
4490 43
                    $try_to_keep_the_string_length
4491
                );
4492
            }
4493
        } else {
4494 3
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4495
4496 3
            $str_part_two = (string) self::substr($str, 1, null, $encoding);
4497
4498 3
            $str_part_one = self::strtolower(
4499 3
                (string) self::substr($str, 0, 1, $encoding),
4500 3
                $encoding,
4501 3
                false,
4502 3
                $lang,
4503 3
                $try_to_keep_the_string_length
4504
            );
4505
        }
4506
4507 46
        return $str_part_one . $str_part_two;
4508
    }
4509
4510
    /**
4511
     * alias for "UTF8::lcfirst()"
4512
     *
4513
     * @param string      $str
4514
     * @param string      $encoding
4515
     * @param bool        $clean_utf8
4516
     * @param string|null $lang
4517
     * @param bool        $try_to_keep_the_string_length
4518
     *
4519
     * @psalm-pure
4520
     *
4521
     * @return string
4522
     *
4523
     * @see        UTF8::lcfirst()
4524
     * @deprecated <p>please use "UTF8::lcfirst()"</p>
4525
     */
4526 2
    public static function lcword(
4527
        string $str,
4528
        string $encoding = 'UTF-8',
4529
        bool $clean_utf8 = false,
4530
        string $lang = null,
4531
        bool $try_to_keep_the_string_length = false
4532
    ): string {
4533 2
        return self::lcfirst(
4534 2
            $str,
4535 2
            $encoding,
4536 2
            $clean_utf8,
4537 2
            $lang,
4538 2
            $try_to_keep_the_string_length
4539
        );
4540
    }
4541
4542
    /**
4543
     * Lowercase for all words in the string.
4544
     *
4545
     * @param string      $str                           <p>The input string.</p>
4546
     * @param string[]    $exceptions                    [optional] <p>Exclusion for some words.</p>
4547
     * @param string      $char_list                     [optional] <p>Additional chars that contains to words and do
4548
     *                                                   not start a new word.</p>
4549
     * @param string      $encoding                      [optional] <p>Set the charset.</p>
4550
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
4551
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
4552
     *                                                   tr</p>
4553
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
4554
     *                                                   -> ß</p>
4555
     *
4556
     * @psalm-pure
4557
     *
4558
     * @return string
4559
     */
4560 2
    public static function lcwords(
4561
        string $str,
4562
        array $exceptions = [],
4563
        string $char_list = '',
4564
        string $encoding = 'UTF-8',
4565
        bool $clean_utf8 = false,
4566
        string $lang = null,
4567
        bool $try_to_keep_the_string_length = false
4568
    ): string {
4569 2
        if (!$str) {
4570 2
            return '';
4571
        }
4572
4573 2
        $words = self::str_to_words($str, $char_list);
4574 2
        $use_exceptions = $exceptions !== [];
4575
4576 2
        $words_str = '';
4577 2
        foreach ($words as &$word) {
4578 2
            if (!$word) {
4579 2
                continue;
4580
            }
4581
4582
            if (
4583 2
                !$use_exceptions
4584
                ||
4585 2
                !\in_array($word, $exceptions, true)
4586
            ) {
4587 2
                $words_str .= self::lcfirst($word, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
4588
            } else {
4589 2
                $words_str .= $word;
4590
            }
4591
        }
4592
4593 2
        return $words_str;
4594
    }
4595
4596
    /**
4597
     * alias for "UTF8::lcfirst()"
4598
     *
4599
     * @param string      $str
4600
     * @param string      $encoding
4601
     * @param bool        $clean_utf8
4602
     * @param string|null $lang
4603
     * @param bool        $try_to_keep_the_string_length
4604
     *
4605
     * @psalm-pure
4606
     *
4607
     * @return string
4608
     *
4609
     * @see        UTF8::lcfirst()
4610
     * @deprecated <p>please use "UTF8::lcfirst()"</p>
4611
     */
4612 5
    public static function lowerCaseFirst(
4613
        string $str,
4614
        string $encoding = 'UTF-8',
4615
        bool $clean_utf8 = false,
4616
        string $lang = null,
4617
        bool $try_to_keep_the_string_length = false
4618
    ): string {
4619 5
        return self::lcfirst(
4620 5
            $str,
4621 5
            $encoding,
4622 5
            $clean_utf8,
4623 5
            $lang,
4624 5
            $try_to_keep_the_string_length
4625
        );
4626
    }
4627
4628
    /**
4629
     * Strip whitespace or other characters from the beginning of a UTF-8 string.
4630
     *
4631
     * EXAMPLE: <code>UTF8::ltrim(' 中文空白  '); // '中文空白  '</code>
4632
     *
4633
     * @param string      $str   <p>The string to be trimmed</p>
4634
     * @param string|null $chars <p>Optional characters to be stripped</p>
4635
     *
4636
     * @psalm-pure
4637
     *
4638
     * @return string the string with unwanted characters stripped from the left
4639
     */
4640 23
    public static function ltrim(string $str = '', string $chars = null): string
4641
    {
4642 23
        if ($str === '') {
4643 3
            return '';
4644
        }
4645
4646 22
        if (self::$SUPPORT['mbstring'] === true) {
4647 22
            if ($chars !== null) {
4648
                /** @noinspection PregQuoteUsageInspection */
4649 11
                $chars = \preg_quote($chars);
4650 11
                $pattern = "^[${chars}]+";
4651
            } else {
4652 14
                $pattern = '^[\\s]+';
4653
            }
4654
4655
            /** @noinspection PhpComposerExtensionStubsInspection */
4656 22
            return (string) \mb_ereg_replace($pattern, '', $str);
4657
        }
4658
4659
        if ($chars !== null) {
4660
            $chars = \preg_quote($chars, '/');
4661
            $pattern = "^[${chars}]+";
4662
        } else {
4663
            $pattern = '^[\\s]+';
4664
        }
4665
4666
        return self::regex_replace($str, $pattern, '');
4667
    }
4668
4669
    /**
4670
     * Returns the UTF-8 character with the maximum code point in the given data.
4671
     *
4672
     * EXAMPLE: <code>UTF8::max('abc-äöü-中文空白'); // 'ø'</code>
4673
     *
4674
     * @param array<string>|string $arg <p>A UTF-8 encoded string or an array of such strings.</p>
4675
     *
4676
     * @psalm-pure
4677
     *
4678
     * @return string|null the character with the highest code point than others, returns null on failure or empty input
4679
     */
4680 2
    public static function max($arg)
4681
    {
4682 2
        if (\is_array($arg)) {
4683 2
            $arg = \implode('', $arg);
4684
        }
4685
4686 2
        $codepoints = self::codepoints($arg);
4687 2
        if ($codepoints === []) {
4688 2
            return null;
4689
        }
4690
4691 2
        $codepoint_max = \max($codepoints);
4692
4693 2
        return self::chr((int) $codepoint_max);
4694
    }
4695
4696
    /**
4697
     * Calculates and returns the maximum number of bytes taken by any
4698
     * UTF-8 encoded character in the given string.
4699
     *
4700
     * EXAMPLE: <code>UTF8::max_chr_width('Intërnâtiônàlizætiøn'); // 2</code>
4701
     *
4702
     * @param string $str <p>The original Unicode string.</p>
4703
     *
4704
     * @psalm-pure
4705
     *
4706
     * @return int
4707
     *             <p>Max byte lengths of the given chars.</p>
4708
     */
4709 2
    public static function max_chr_width(string $str): int
4710
    {
4711 2
        $bytes = self::chr_size_list($str);
4712 2
        if ($bytes !== []) {
4713 2
            return (int) \max($bytes);
4714
        }
4715
4716 2
        return 0;
4717
    }
4718
4719
    /**
4720
     * Checks whether mbstring is available on the server.
4721
     *
4722
     * @psalm-pure
4723
     *
4724
     * @return bool
4725
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
4726
     *
4727
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
4728
     */
4729 26
    public static function mbstring_loaded(): bool
4730
    {
4731 26
        return \extension_loaded('mbstring');
4732
    }
4733
4734
    /**
4735
     * Returns the UTF-8 character with the minimum code point in the given data.
4736
     *
4737
     * EXAMPLE: <code>UTF8::min('abc-äöü-中文空白'); // '-'</code>
4738
     *
4739
     * @param string|string[] $arg <strong>A UTF-8 encoded string or an array of such strings.</strong>
4740
     *
4741
     * @psalm-pure
4742
     *
4743
     * @return string|null
4744
     *                     <p>The character with the lowest code point than others, returns null on failure or empty input.</p>
4745
     */
4746 2
    public static function min($arg)
4747
    {
4748 2
        if (\is_array($arg)) {
4749 2
            $arg = \implode('', $arg);
4750
        }
4751
4752 2
        $codepoints = self::codepoints($arg);
4753 2
        if ($codepoints === []) {
4754 2
            return null;
4755
        }
4756
4757 2
        $codepoint_min = \min($codepoints);
4758
4759 2
        return self::chr((int) $codepoint_min);
4760
    }
4761
4762
    /**
4763
     * alias for "UTF8::normalize_encoding()"
4764
     *
4765
     * @param mixed $encoding
4766
     * @param mixed $fallback
4767
     *
4768
     * @psalm-pure
4769
     *
4770
     * @return mixed
4771
     *
4772
     * @see        UTF8::normalize_encoding()
4773
     * @deprecated <p>please use "UTF8::normalize_encoding()"</p>
4774
     */
4775 2
    public static function normalizeEncoding($encoding, $fallback = '')
4776
    {
4777 2
        return self::normalize_encoding($encoding, $fallback);
4778
    }
4779
4780
    /**
4781
     * Normalize the encoding-"name" input.
4782
     *
4783
     * EXAMPLE: <code>UTF8::normalize_encoding('UTF8'); // 'UTF-8'</code>
4784
     *
4785
     * @param mixed $encoding <p>e.g.: ISO, UTF8, WINDOWS-1251 etc.</p>
4786
     * @param mixed $fallback <p>e.g.: UTF-8</p>
4787
     *
4788
     * @psalm-pure
4789
     *
4790
     * @return mixed|string
4791
     *                      <p>e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.<br>Will return a empty string as fallback (by default)</p>
4792
     *
4793
     * @template TNormalizeEncodingFallback
4794
     * @psalm-param string|TNormalizeEncodingFallback $fallback
4795
     * @psalm-return string|TNormalizeEncodingFallback
4796
     */
4797 339
    public static function normalize_encoding($encoding, $fallback = '')
4798
    {
4799
        /**
4800
         * @psalm-suppress ImpureStaticVariable
4801
         *
4802
         * @var array<string,string>
4803
         */
4804 339
        static $STATIC_NORMALIZE_ENCODING_CACHE = [];
4805
4806
        // init
4807 339
        $encoding = (string) $encoding;
4808
4809 339
        if (!$encoding) {
4810 290
            return $fallback;
4811
        }
4812
4813
        if (
4814 53
            $encoding === 'UTF-8'
4815
            ||
4816 53
            $encoding === 'UTF8'
4817
        ) {
4818 29
            return 'UTF-8';
4819
        }
4820
4821
        if (
4822 44
            $encoding === '8BIT'
4823
            ||
4824 44
            $encoding === 'BINARY'
4825
        ) {
4826
            return 'CP850';
4827
        }
4828
4829
        if (
4830 44
            $encoding === 'HTML'
4831
            ||
4832 44
            $encoding === 'HTML-ENTITIES'
4833
        ) {
4834 2
            return 'HTML-ENTITIES';
4835
        }
4836
4837
        if (
4838 44
            $encoding === 'ISO'
4839
            ||
4840 44
            $encoding === 'ISO-8859-1'
4841
        ) {
4842 41
            return 'ISO-8859-1';
4843
        }
4844
4845
        if (
4846 11
            $encoding === '1' // only a fallback, for non "strict_types" usage ...
4847
            ||
4848 11
            $encoding === '0' // only a fallback, for non "strict_types" usage ...
4849
        ) {
4850
            return $fallback;
4851
        }
4852
4853 11
        if (isset($STATIC_NORMALIZE_ENCODING_CACHE[$encoding])) {
4854 8
            return $STATIC_NORMALIZE_ENCODING_CACHE[$encoding];
4855
        }
4856
4857 5
        if (self::$ENCODINGS === null) {
4858 1
            self::$ENCODINGS = self::getData('encodings');
4859
        }
4860
4861 5
        if (\in_array($encoding, self::$ENCODINGS, true)) {
4862 3
            $STATIC_NORMALIZE_ENCODING_CACHE[$encoding] = $encoding;
4863
4864 3
            return $encoding;
4865
        }
4866
4867 4
        $encoding_original = $encoding;
4868 4
        $encoding = \strtoupper($encoding);
4869 4
        $encoding_upper_helper = (string) \preg_replace('/[^a-zA-Z0-9]/u', '', $encoding);
4870
4871
        $equivalences = [
4872 4
            'ISO8859'     => 'ISO-8859-1',
4873
            'ISO88591'    => 'ISO-8859-1',
4874
            'ISO'         => 'ISO-8859-1',
4875
            'LATIN'       => 'ISO-8859-1',
4876
            'LATIN1'      => 'ISO-8859-1', // Western European
4877
            'ISO88592'    => 'ISO-8859-2',
4878
            'LATIN2'      => 'ISO-8859-2', // Central European
4879
            'ISO88593'    => 'ISO-8859-3',
4880
            'LATIN3'      => 'ISO-8859-3', // Southern European
4881
            'ISO88594'    => 'ISO-8859-4',
4882
            'LATIN4'      => 'ISO-8859-4', // Northern European
4883
            'ISO88595'    => 'ISO-8859-5',
4884
            'ISO88596'    => 'ISO-8859-6', // Greek
4885
            'ISO88597'    => 'ISO-8859-7',
4886
            'ISO88598'    => 'ISO-8859-8', // Hebrew
4887
            'ISO88599'    => 'ISO-8859-9',
4888
            'LATIN5'      => 'ISO-8859-9', // Turkish
4889
            'ISO885911'   => 'ISO-8859-11',
4890
            'TIS620'      => 'ISO-8859-11', // Thai
4891
            'ISO885910'   => 'ISO-8859-10',
4892
            'LATIN6'      => 'ISO-8859-10', // Nordic
4893
            'ISO885913'   => 'ISO-8859-13',
4894
            'LATIN7'      => 'ISO-8859-13', // Baltic
4895
            'ISO885914'   => 'ISO-8859-14',
4896
            'LATIN8'      => 'ISO-8859-14', // Celtic
4897
            'ISO885915'   => 'ISO-8859-15',
4898
            'LATIN9'      => 'ISO-8859-15', // Western European (with some extra chars e.g. €)
4899
            'ISO885916'   => 'ISO-8859-16',
4900
            'LATIN10'     => 'ISO-8859-16', // Southeast European
4901
            'CP1250'      => 'WINDOWS-1250',
4902
            'WIN1250'     => 'WINDOWS-1250',
4903
            'WINDOWS1250' => 'WINDOWS-1250',
4904
            'CP1251'      => 'WINDOWS-1251',
4905
            'WIN1251'     => 'WINDOWS-1251',
4906
            'WINDOWS1251' => 'WINDOWS-1251',
4907
            'CP1252'      => 'WINDOWS-1252',
4908
            'WIN1252'     => 'WINDOWS-1252',
4909
            'WINDOWS1252' => 'WINDOWS-1252',
4910
            'CP1253'      => 'WINDOWS-1253',
4911
            'WIN1253'     => 'WINDOWS-1253',
4912
            'WINDOWS1253' => 'WINDOWS-1253',
4913
            'CP1254'      => 'WINDOWS-1254',
4914
            'WIN1254'     => 'WINDOWS-1254',
4915
            'WINDOWS1254' => 'WINDOWS-1254',
4916
            'CP1255'      => 'WINDOWS-1255',
4917
            'WIN1255'     => 'WINDOWS-1255',
4918
            'WINDOWS1255' => 'WINDOWS-1255',
4919
            'CP1256'      => 'WINDOWS-1256',
4920
            'WIN1256'     => 'WINDOWS-1256',
4921
            'WINDOWS1256' => 'WINDOWS-1256',
4922
            'CP1257'      => 'WINDOWS-1257',
4923
            'WIN1257'     => 'WINDOWS-1257',
4924
            'WINDOWS1257' => 'WINDOWS-1257',
4925
            'CP1258'      => 'WINDOWS-1258',
4926
            'WIN1258'     => 'WINDOWS-1258',
4927
            'WINDOWS1258' => 'WINDOWS-1258',
4928
            'UTF16'       => 'UTF-16',
4929
            'UTF32'       => 'UTF-32',
4930
            'UTF8'        => 'UTF-8',
4931
            'UTF'         => 'UTF-8',
4932
            'UTF7'        => 'UTF-7',
4933
            '8BIT'        => 'CP850',
4934
            'BINARY'      => 'CP850',
4935
        ];
4936
4937 4
        if (!empty($equivalences[$encoding_upper_helper])) {
4938 3
            $encoding = $equivalences[$encoding_upper_helper];
4939
        }
4940
4941 4
        $STATIC_NORMALIZE_ENCODING_CACHE[$encoding_original] = $encoding;
4942
4943 4
        return $encoding;
4944
    }
4945
4946
    /**
4947
     * Standardize line ending to unix-like.
4948
     *
4949
     * @param string          $str      <p>The input string.</p>
4950
     * @param string|string[] $replacer <p>The replacer char e.g. "\n" (Linux) or "\r\n" (Windows). You can also use \PHP_EOL
4951
     *                                  here.</p>
4952
     *
4953
     * @psalm-pure
4954
     *
4955
     * @return string
4956
     *                <p>A string with normalized line ending.</p>
4957
     */
4958 5
    public static function normalize_line_ending(string $str, $replacer = "\n"): string
4959
    {
4960 5
        return \str_replace(["\r\n", "\r", "\n"], $replacer, $str);
4961
    }
4962
4963
    /**
4964
     * Normalize some MS Word special characters.
4965
     *
4966
     * EXAMPLE: <code>UTF8::normalize_msword('„Abcdef…”'); // '"Abcdef..."'</code>
4967
     *
4968
     * @param string $str <p>The string to be normalized.</p>
4969
     *
4970
     * @psalm-pure
4971
     *
4972
     * @return string
4973
     *                <p>A string with normalized characters for commonly used chars in Word documents.</p>
4974
     */
4975 10
    public static function normalize_msword(string $str): string
4976
    {
4977 10
        return ASCII::normalize_msword($str);
4978
    }
4979
4980
    /**
4981
     * Normalize the whitespace.
4982
     *
4983
     * EXAMPLE: <code>UTF8::normalize_whitespace("abc-\xc2\xa0-öäü-\xe2\x80\xaf-\xE2\x80\xAC", true); // "abc-\xc2\xa0-öäü- -"</code>
4984
     *
4985
     * @param string $str                        <p>The string to be normalized.</p>
4986
     * @param bool   $keep_non_breaking_space    [optional] <p>Set to true, to keep non-breaking-spaces.</p>
4987
     * @param bool   $keep_bidi_unicode_controls [optional] <p>Set to true, to keep non-printable (for the web)
4988
     *                                           bidirectional text chars.</p>
4989
     *
4990
     * @psalm-pure
4991
     *
4992
     * @return string
4993
     *                <p>A string with normalized whitespace.</p>
4994
     */
4995 61
    public static function normalize_whitespace(
4996
        string $str,
4997
        bool $keep_non_breaking_space = false,
4998
        bool $keep_bidi_unicode_controls = false
4999
    ): string {
5000 61
        return ASCII::normalize_whitespace(
5001 61
            $str,
5002 61
            $keep_non_breaking_space,
5003 61
            $keep_bidi_unicode_controls
5004
        );
5005
    }
5006
5007
    /**
5008
     * Calculates Unicode code point of the given UTF-8 encoded character.
5009
     *
5010
     * INFO: opposite to UTF8::chr()
5011
     *
5012
     * EXAMPLE: <code>UTF8::ord('☃'); // 0x2603</code>
5013
     *
5014
     * @param string $chr      <p>The character of which to calculate code point.<p/>
5015
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5016
     *
5017
     * @psalm-pure
5018
     *
5019
     * @return int
5020
     *             <p>Unicode code point of the given character,<br>
5021
     *             0 on invalid UTF-8 byte sequence</p>
5022
     */
5023 27
    public static function ord($chr, string $encoding = 'UTF-8'): int
5024
    {
5025
        /**
5026
         * @psalm-suppress ImpureStaticVariable
5027
         *
5028
         * @var array<string,int>
5029
         */
5030 27
        static $CHAR_CACHE = [];
5031
5032
        // init
5033 27
        $chr = (string) $chr;
5034
5035 27
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
5036 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
5037
        }
5038
5039 27
        $cache_key = $chr . '_' . $encoding;
5040 27
        if (isset($CHAR_CACHE[$cache_key])) {
5041 27
            return $CHAR_CACHE[$cache_key];
5042
        }
5043
5044
        // check again, if it's still not UTF-8
5045 11
        if ($encoding !== 'UTF-8') {
5046 3
            $chr = self::encode($encoding, $chr);
5047
        }
5048
5049 11
        if (self::$ORD === null) {
5050
            self::$ORD = self::getData('ord');
5051
        }
5052
5053 11
        if (isset(self::$ORD[$chr])) {
5054 11
            return $CHAR_CACHE[$cache_key] = self::$ORD[$chr];
5055
        }
5056
5057
        //
5058
        // fallback via "IntlChar"
5059
        //
5060
5061 6
        if (self::$SUPPORT['intlChar'] === true) {
5062
            /** @noinspection PhpComposerExtensionStubsInspection */
5063 5
            $code = \IntlChar::ord($chr);
5064 5
            if ($code) {
5065 5
                return $CHAR_CACHE[$cache_key] = $code;
5066
            }
5067
        }
5068
5069
        //
5070
        // fallback via vanilla php
5071
        //
5072
5073
        /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
5074 1
        $chr = \unpack('C*', (string) \substr($chr, 0, 4));
5075
        /** @noinspection OffsetOperationsInspection */
5076 1
        $code = $chr ? $chr[1] : 0;
5077
5078
        /** @noinspection OffsetOperationsInspection */
5079 1
        if ($code >= 0xF0 && isset($chr[4])) {
5080
            /** @noinspection UnnecessaryCastingInspection */
5081
            /** @noinspection OffsetOperationsInspection */
5082
            return $CHAR_CACHE[$cache_key] = (int) ((($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80);
5083
        }
5084
5085
        /** @noinspection OffsetOperationsInspection */
5086 1
        if ($code >= 0xE0 && isset($chr[3])) {
5087
            /** @noinspection UnnecessaryCastingInspection */
5088
            /** @noinspection OffsetOperationsInspection */
5089 1
            return $CHAR_CACHE[$cache_key] = (int) ((($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80);
5090
        }
5091
5092
        /** @noinspection OffsetOperationsInspection */
5093 1
        if ($code >= 0xC0 && isset($chr[2])) {
5094
            /** @noinspection UnnecessaryCastingInspection */
5095
            /** @noinspection OffsetOperationsInspection */
5096 1
            return $CHAR_CACHE[$cache_key] = (int) ((($code - 0xC0) << 6) + $chr[2] - 0x80);
5097
        }
5098
5099
        return $CHAR_CACHE[$cache_key] = $code;
5100
    }
5101
5102
    /**
5103
     * Parses the string into an array (into the the second parameter).
5104
     *
5105
     * WARNING: Unlike "parse_str()", this method does not (re-)place variables in the current scope,
5106
     *          if the second parameter is not set!
5107
     *
5108
     * EXAMPLE: <code>
5109
     * UTF8::parse_str('Iñtërnâtiônéàlizætiøn=測試&arr[]=foo+測試&arr[]=ການທົດສອບ', $array);
5110
     * echo $array['Iñtërnâtiônéàlizætiøn']; // '測試'
5111
     * </code>
5112
     *
5113
     * @see http://php.net/manual/en/function.parse-str.php
5114
     *
5115
     * @param string $str        <p>The input string.</p>
5116
     * @param array  $result     <p>The result will be returned into this reference parameter.</p>
5117
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5118
     *
5119
     * @psalm-pure
5120
     *
5121
     * @return bool
5122
     *              <p>Will return <strong>false</strong> if php can't parse the string and we haven't any $result.</p>
5123
     */
5124 2
    public static function parse_str(string $str, &$result, bool $clean_utf8 = false): bool
5125
    {
5126 2
        if ($clean_utf8) {
5127 2
            $str = self::clean($str);
5128
        }
5129
5130 2
        if (self::$SUPPORT['mbstring'] === true) {
5131 2
            $return = \mb_parse_str($str, $result);
5132
5133 2
            return $return !== false && $result !== [];
5134
        }
5135
5136
        /**
5137
         * @psalm-suppress ImpureFunctionCall - we use the second parameter, so we don't change variables by magic
5138
         */
5139
        \parse_str($str, $result);
5140
5141
        return $result !== [];
5142
    }
5143
5144
    /**
5145
     * Checks if \u modifier is available that enables Unicode support in PCRE.
5146
     *
5147
     * @psalm-pure
5148
     *
5149
     * @return bool
5150
     *              <p>
5151
     *              <strong>true</strong> if support is available,<br>
5152
     *              <strong>false</strong> otherwise
5153
     *              </p>
5154
     */
5155
    public static function pcre_utf8_support(): bool
5156
    {
5157
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
5158
        return (bool) @\preg_match('//u', '');
5159
    }
5160
5161
    /**
5162
     * Create an array containing a range of UTF-8 characters.
5163
     *
5164
     * EXAMPLE: <code>UTF8::range('κ', 'ζ'); // array('κ', 'ι', 'θ', 'η', 'ζ',)</code>
5165
     *
5166
     * @param int|string $var1      <p>Numeric or hexadecimal code points, or a UTF-8 character to start from.</p>
5167
     * @param int|string $var2      <p>Numeric or hexadecimal code points, or a UTF-8 character to end at.</p>
5168
     * @param bool       $use_ctype <p>use ctype to detect numeric and hexadecimal, otherwise we will use a simple
5169
     *                              "is_numeric"</p>
5170
     * @param string     $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
5171
     * @param float|int  $step      [optional] <p>
5172
     *                              If a step value is given, it will be used as the
5173
     *                              increment between elements in the sequence. step
5174
     *                              should be given as a positive number. If not specified,
5175
     *                              step will default to 1.
5176
     *                              </p>
5177
     *
5178
     * @psalm-pure
5179
     *
5180
     * @return string[]
5181
     */
5182 2
    public static function range(
5183
        $var1,
5184
        $var2,
5185
        bool $use_ctype = true,
5186
        string $encoding = 'UTF-8',
5187
        $step = 1
5188
    ): array {
5189 2
        if (!$var1 || !$var2) {
5190 2
            return [];
5191
        }
5192
5193 2
        if ($step !== 1) {
5194
            /**
5195
             * @psalm-suppress RedundantConditionGivenDocblockType
5196
             * @psalm-suppress DocblockTypeContradiction
5197
             */
5198 1
            if (!\is_numeric($step)) {
0 ignored issues
show
introduced by
The condition is_numeric($step) is always true.
Loading history...
5199
                throw new \InvalidArgumentException('$step need to be a number, type given: ' . \gettype($step));
5200
            }
5201
5202
            /**
5203
             * @psalm-suppress RedundantConditionGivenDocblockType - false-positive from psalm?
5204
             */
5205 1
            if ($step <= 0) {
5206
                throw new \InvalidArgumentException('$step need to be a positive number, given: ' . $step);
5207
            }
5208
        }
5209
5210 2
        if ($use_ctype && self::$SUPPORT['ctype'] === false) {
5211
            throw new \RuntimeException('ext-ctype: is not installed');
5212
        }
5213
5214 2
        $is_digit = false;
5215 2
        $is_xdigit = false;
5216
5217
        /** @noinspection PhpComposerExtensionStubsInspection */
5218 2
        if ($use_ctype && \ctype_digit((string) $var1) && \ctype_digit((string) $var2)) {
5219 2
            $is_digit = true;
5220 2
            $start = (int) $var1;
5221 2
        } /** @noinspection PhpComposerExtensionStubsInspection */ elseif ($use_ctype && \ctype_xdigit($var1) && \ctype_xdigit($var2)) {
5222
            $is_xdigit = true;
5223
            $start = (int) self::hex_to_int((string) $var1);
5224 2
        } elseif (!$use_ctype && \is_numeric($var1)) {
5225 1
            $start = (int) $var1;
5226
        } else {
5227 2
            $start = self::ord((string) $var1);
5228
        }
5229
5230 2
        if (!$start) {
5231
            return [];
5232
        }
5233
5234 2
        if ($is_digit) {
5235 2
            $end = (int) $var2;
5236 2
        } elseif ($is_xdigit) {
5237
            $end = (int) self::hex_to_int((string) $var2);
5238 2
        } elseif (!$use_ctype && \is_numeric($var2)) {
5239 1
            $end = (int) $var2;
5240
        } else {
5241 2
            $end = self::ord((string) $var2);
5242
        }
5243
5244 2
        if (!$end) {
5245
            return [];
5246
        }
5247
5248 2
        $array = [];
5249 2
        foreach (\range($start, $end, $step) as $i) {
5250 2
            $array[] = (string) self::chr((int) $i, $encoding);
5251
        }
5252
5253 2
        return $array;
5254
    }
5255
5256
    /**
5257
     * Multi decode HTML entity + fix urlencoded-win1252-chars.
5258
     *
5259
     * EXAMPLE: <code>UTF8::rawurldecode('tes%20öäü%20\u00edtest+test'); // 'tes öäü ítest+test'</code>
5260
     *
5261
     * e.g:
5262
     * 'test+test'                     => 'test+test'
5263
     * 'D&#252;sseldorf'               => 'Düsseldorf'
5264
     * 'D%FCsseldorf'                  => 'Düsseldorf'
5265
     * 'D&#xFC;sseldorf'               => 'Düsseldorf'
5266
     * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
5267
     * 'Düsseldorf'                   => 'Düsseldorf'
5268
     * 'D%C3%BCsseldorf'               => 'Düsseldorf'
5269
     * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
5270
     * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
5271
     *
5272
     * @param string $str          <p>The input string.</p>
5273
     * @param bool   $multi_decode <p>Decode as often as possible.</p>
5274
     *
5275
     * @psalm-pure
5276
     *
5277
     * @return string
5278
     *                <p>The decoded URL, as a string.</p>
5279
     */
5280 7
    public static function rawurldecode(string $str, bool $multi_decode = true): string
5281
    {
5282 7
        if ($str === '') {
5283 4
            return '';
5284
        }
5285
5286
        if (
5287 7
            \strpos($str, '&') === false
5288
            &&
5289 7
            \strpos($str, '%') === false
5290
            &&
5291 7
            \strpos($str, '+') === false
5292
            &&
5293 7
            \strpos($str, '\u') === false
5294
        ) {
5295 4
            return self::fix_simple_utf8($str);
5296
        }
5297
5298 7
        $str = self::urldecode_unicode_helper($str);
5299
5300 7
        if ($multi_decode) {
5301
            do {
5302 6
                $str_compare = $str;
5303
5304
                /**
5305
                 * @psalm-suppress PossiblyInvalidArgument
5306
                 */
5307 6
                $str = self::fix_simple_utf8(
5308 6
                    \rawurldecode(
5309 6
                        self::html_entity_decode(
5310 6
                            self::to_utf8($str),
5311 6
                            \ENT_QUOTES | \ENT_HTML5
5312
                        )
5313
                    )
5314
                );
5315 6
            } while ($str_compare !== $str);
5316
        } else {
5317
            /**
5318
             * @psalm-suppress PossiblyInvalidArgument
5319
             */
5320 1
            $str = self::fix_simple_utf8(
5321 1
                \rawurldecode(
5322 1
                    self::html_entity_decode(
5323 1
                        self::to_utf8($str),
5324 1
                        \ENT_QUOTES | \ENT_HTML5
5325
                    )
5326
                )
5327
            );
5328
        }
5329
5330 7
        return $str;
5331
    }
5332
5333
    /**
5334
     * Replaces all occurrences of $pattern in $str by $replacement.
5335
     *
5336
     * @param string $str         <p>The input string.</p>
5337
     * @param string $pattern     <p>The regular expression pattern.</p>
5338
     * @param string $replacement <p>The string to replace with.</p>
5339
     * @param string $options     [optional] <p>Matching conditions to be used.</p>
5340
     * @param string $delimiter   [optional] <p>Delimiter the the regex. Default: '/'</p>
5341
     *
5342
     * @psalm-pure
5343
     *
5344
     * @return string
5345
     */
5346 18
    public static function regex_replace(
5347
        string $str,
5348
        string $pattern,
5349
        string $replacement,
5350
        string $options = '',
5351
        string $delimiter = '/'
5352
    ): string {
5353 18
        if ($options === 'msr') {
5354 9
            $options = 'ms';
5355
        }
5356
5357
        // fallback
5358 18
        if (!$delimiter) {
5359
            $delimiter = '/';
5360
        }
5361
5362 18
        return (string) \preg_replace(
5363 18
            $delimiter . $pattern . $delimiter . 'u' . $options,
5364 18
            $replacement,
5365 18
            $str
5366
        );
5367
    }
5368
5369
    /**
5370
     * alias for "UTF8::remove_bom()"
5371
     *
5372
     * @param string $str
5373
     *
5374
     * @psalm-pure
5375
     *
5376
     * @return string
5377
     *
5378
     * @see        UTF8::remove_bom()
5379
     * @deprecated <p>please use "UTF8::remove_bom()"</p>
5380
     */
5381 1
    public static function removeBOM(string $str): string
5382
    {
5383 1
        return self::remove_bom($str);
5384
    }
5385
5386
    /**
5387
     * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
5388
     *
5389
     * EXAMPLE: <code>UTF8::remove_bom("\xEF\xBB\xBFΜπορώ να"); // 'Μπορώ να'</code>
5390
     *
5391
     * @param string $str <p>The input string.</p>
5392
     *
5393
     * @psalm-pure
5394
     *
5395
     * @return string
5396
     *                <p>A string without UTF-BOM.</p>
5397
     */
5398 55
    public static function remove_bom(string $str): string
5399
    {
5400 55
        if ($str === '') {
5401 9
            return '';
5402
        }
5403
5404 55
        $str_length = \strlen($str);
5405 55
        foreach (self::$BOM as $bom_string => $bom_byte_length) {
5406 55
            if (\strncmp($str, $bom_string, $bom_byte_length) === 0) {
5407
                /** @var false|string $str_tmp - needed for PhpStan (stubs error) */
5408 11
                $str_tmp = \substr($str, $bom_byte_length, $str_length);
5409 11
                if ($str_tmp === false) {
5410
                    return '';
5411
                }
5412
5413 11
                $str_length -= (int) $bom_byte_length;
5414
5415 55
                $str = (string) $str_tmp;
5416
            }
5417
        }
5418
5419 55
        return $str;
5420
    }
5421
5422
    /**
5423
     * Removes duplicate occurrences of a string in another string.
5424
     *
5425
     * EXAMPLE: <code>UTF8::remove_duplicates('öäü-κόσμεκόσμε-äöü', 'κόσμε'); // 'öäü-κόσμε-äöü'</code>
5426
     *
5427
     * @param string          $str  <p>The base string.</p>
5428
     * @param string|string[] $what <p>String to search for in the base string.</p>
5429
     *
5430
     * @psalm-pure
5431
     *
5432
     * @return string
5433
     *                <p>A string with removed duplicates.</p>
5434
     */
5435 2
    public static function remove_duplicates(string $str, $what = ' '): string
5436
    {
5437 2
        if (\is_string($what)) {
5438 2
            $what = [$what];
5439
        }
5440
5441
        /**
5442
         * @psalm-suppress RedundantConditionGivenDocblockType
5443
         */
5444 2
        if (\is_array($what)) {
0 ignored issues
show
introduced by
The condition is_array($what) is always true.
Loading history...
5445 2
            foreach ($what as $item) {
5446 2
                $str = (string) \preg_replace('/(' . \preg_quote($item, '/') . ')+/u', $item, $str);
5447
            }
5448
        }
5449
5450 2
        return $str;
5451
    }
5452
5453
    /**
5454
     * Remove html via "strip_tags()" from the string.
5455
     *
5456
     * @param string $str            <p>The input string.</p>
5457
     * @param string $allowable_tags [optional] <p>You can use the optional second parameter to specify tags which
5458
     *                               should not be stripped. Default: null
5459
     *                               </p>
5460
     *
5461
     * @psalm-pure
5462
     *
5463
     * @return string
5464
     *                <p>A string with without html tags.</p>
5465
     */
5466 6
    public static function remove_html(string $str, string $allowable_tags = ''): string
5467
    {
5468 6
        return \strip_tags($str, $allowable_tags);
5469
    }
5470
5471
    /**
5472
     * Remove all breaks [<br> | \r\n | \r | \n | ...] from the string.
5473
     *
5474
     * @param string $str         <p>The input string.</p>
5475
     * @param string $replacement [optional] <p>Default is a empty string.</p>
5476
     *
5477
     * @psalm-pure
5478
     *
5479
     * @return string
5480
     *                <p>A string without breaks.</p>
5481
     */
5482 6
    public static function remove_html_breaks(string $str, string $replacement = ''): string
5483
    {
5484 6
        return (string) \preg_replace("#/\r\n|\r|\n|<br.*/?>#isU", $replacement, $str);
5485
    }
5486
5487
    /**
5488
     * Remove invisible characters from a string.
5489
     *
5490
     * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
5491
     *
5492
     * EXAMPLE: <code>UTF8::remove_invisible_characters("κόσ\0με"); // 'κόσμε'</code>
5493
     *
5494
     * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
5495
     *
5496
     * @param string $str         <p>The input string.</p>
5497
     * @param bool   $url_encoded [optional] <p>
5498
     *                            Try to remove url encoded control character.
5499
     *                            WARNING: maybe contains false-positives e.g. aa%0Baa -> aaaa.
5500
     *                            <br>
5501
     *                            Default: false
5502
     *                            </p>
5503
     * @param string $replacement [optional] <p>The replacement character.</p>
5504
     *
5505
     * @psalm-pure
5506
     *
5507
     * @return string
5508
     *                <p>A string without invisible chars.</p>
5509
     */
5510 91
    public static function remove_invisible_characters(
5511
        string $str,
5512
        bool $url_encoded = false,
5513
        string $replacement = ''
5514
    ): string {
5515 91
        return ASCII::remove_invisible_characters(
5516 91
            $str,
5517 91
            $url_encoded,
5518 91
            $replacement
5519
        );
5520
    }
5521
5522
    /**
5523
     * Returns a new string with the prefix $substring removed, if present.
5524
     *
5525
     * @param string $str       <p>The input string.</p>
5526
     * @param string $substring <p>The prefix to remove.</p>
5527
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
5528
     *
5529
     * @psalm-pure
5530
     *
5531
     * @return string
5532
     *                <p>A string without the prefix $substring.</p>
5533
     */
5534 12
    public static function remove_left(
5535
        string $str,
5536
        string $substring,
5537
        string $encoding = 'UTF-8'
5538
    ): string {
5539
        if (
5540 12
            $substring
5541
            &&
5542 12
            \strpos($str, $substring) === 0
5543
        ) {
5544 6
            if ($encoding === 'UTF-8') {
5545 4
                return (string) \mb_substr(
5546 4
                    $str,
5547 4
                    (int) \mb_strlen($substring)
5548
                );
5549
            }
5550
5551 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
5552
5553 2
            return (string) self::substr(
5554 2
                $str,
5555 2
                (int) self::strlen($substring, $encoding),
5556 2
                null,
5557 2
                $encoding
5558
            );
5559
        }
5560
5561 6
        return $str;
5562
    }
5563
5564
    /**
5565
     * Returns a new string with the suffix $substring removed, if present.
5566
     *
5567
     * @param string $str
5568
     * @param string $substring <p>The suffix to remove.</p>
5569
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
5570
     *
5571
     * @psalm-pure
5572
     *
5573
     * @return string
5574
     *                <p>A string having a $str without the suffix $substring.</p>
5575
     */
5576 12
    public static function remove_right(
5577
        string $str,
5578
        string $substring,
5579
        string $encoding = 'UTF-8'
5580
    ): string {
5581 12
        if ($substring && \substr($str, -\strlen($substring)) === $substring) {
5582 6
            if ($encoding === 'UTF-8') {
5583 4
                return (string) \mb_substr(
5584 4
                    $str,
5585 4
                    0,
5586 4
                    (int) \mb_strlen($str) - (int) \mb_strlen($substring)
5587
                );
5588
            }
5589
5590 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
5591
5592 2
            return (string) self::substr(
5593 2
                $str,
5594 2
                0,
5595 2
                (int) self::strlen($str, $encoding) - (int) self::strlen($substring, $encoding),
5596 2
                $encoding
5597
            );
5598
        }
5599
5600 6
        return $str;
5601
    }
5602
5603
    /**
5604
     * Replaces all occurrences of $search in $str by $replacement.
5605
     *
5606
     * @param string $str            <p>The input string.</p>
5607
     * @param string $search         <p>The needle to search for.</p>
5608
     * @param string $replacement    <p>The string to replace with.</p>
5609
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5610
     *
5611
     * @psalm-pure
5612
     *
5613
     * @return string
5614
     *                <p>A string with replaced parts.</p>
5615
     */
5616 29
    public static function replace(
5617
        string $str,
5618
        string $search,
5619
        string $replacement,
5620
        bool $case_sensitive = true
5621
    ): string {
5622 29
        if ($case_sensitive) {
5623 22
            return \str_replace($search, $replacement, $str);
5624
        }
5625
5626 7
        return self::str_ireplace($search, $replacement, $str);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::str_ireplac...ch, $replacement, $str) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
5627
    }
5628
5629
    /**
5630
     * Replaces all occurrences of $search in $str by $replacement.
5631
     *
5632
     * @param string       $str            <p>The input string.</p>
5633
     * @param array        $search         <p>The elements to search for.</p>
5634
     * @param array|string $replacement    <p>The string to replace with.</p>
5635
     * @param bool         $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5636
     *
5637
     * @psalm-pure
5638
     *
5639
     * @return string
5640
     *                <p>A string with replaced parts.</p>
5641
     */
5642 30
    public static function replace_all(
5643
        string $str,
5644
        array $search,
5645
        $replacement,
5646
        bool $case_sensitive = true
5647
    ): string {
5648 30
        if ($case_sensitive) {
5649 23
            return \str_replace($search, $replacement, $str);
5650
        }
5651
5652 7
        return self::str_ireplace($search, $replacement, $str);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::str_ireplac...ch, $replacement, $str) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
5653
    }
5654
5655
    /**
5656
     * Replace the diamond question mark (�) and invalid-UTF8 chars with the replacement.
5657
     *
5658
     * EXAMPLE: <code>UTF8::replace_diamond_question_mark('中文空白�', ''); // '中文空白'</code>
5659
     *
5660
     * @param string $str                        <p>The input string</p>
5661
     * @param string $replacement_char           <p>The replacement character.</p>
5662
     * @param bool   $process_invalid_utf8_chars <p>Convert invalid UTF-8 chars </p>
5663
     *
5664
     * @psalm-pure
5665
     *
5666
     * @return string
5667
     *                <p>A string without diamond question marks (�).</p>
5668
     */
5669 35
    public static function replace_diamond_question_mark(
5670
        string $str,
5671
        string $replacement_char = '',
5672
        bool $process_invalid_utf8_chars = true
5673
    ): string {
5674 35
        if ($str === '') {
5675 9
            return '';
5676
        }
5677
5678 35
        if ($process_invalid_utf8_chars) {
5679 35
            $replacement_char_helper = $replacement_char;
5680 35
            if ($replacement_char === '') {
5681 35
                $replacement_char_helper = 'none';
5682
            }
5683
5684 35
            if (self::$SUPPORT['mbstring'] === false) {
5685
                // if there is no native support for "mbstring",
5686
                // then we need to clean the string before ...
5687
                $str = self::clean($str);
5688
            }
5689
5690
            /**
5691
             * @psalm-suppress ImpureFunctionCall - we will reset the value in the next step
5692
             */
5693 35
            $save = \mb_substitute_character();
5694
            /** @noinspection PhpUsageOfSilenceOperatorInspection - ignore "Unknown character" warnings, it's working anyway */
5695 35
            @\mb_substitute_character($replacement_char_helper);
0 ignored issues
show
Security Best Practice introduced by
It seems like you do not handle an error condition for mb_substitute_character(). This can introduce security issues, and is generally not recommended. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-unhandled  annotation

5695
            /** @scrutinizer ignore-unhandled */ @\mb_substitute_character($replacement_char_helper);

If you suppress an error, we recommend checking for the error condition explicitly:

// For example instead of
@mkdir($dir);

// Better use
if (@mkdir($dir) === false) {
    throw new \RuntimeException('The directory '.$dir.' could not be created.');
}
Loading history...
5696
            // the polyfill maybe return false, so cast to string
5697 35
            $str = (string) \mb_convert_encoding($str, 'UTF-8', 'UTF-8');
5698 35
            \mb_substitute_character($save);
5699
        }
5700
5701 35
        return \str_replace(
5702
            [
5703 35
                "\xEF\xBF\xBD",
5704
                '�',
5705
            ],
5706
            [
5707 35
                $replacement_char,
5708 35
                $replacement_char,
5709
            ],
5710 35
            $str
5711
        );
5712
    }
5713
5714
    /**
5715
     * Strip whitespace or other characters from the end of a UTF-8 string.
5716
     *
5717
     * EXAMPLE: <code>UTF8::rtrim('-ABC-中文空白-  '); // '-ABC-中文空白-'</code>
5718
     *
5719
     * @param string      $str   <p>The string to be trimmed.</p>
5720
     * @param string|null $chars <p>Optional characters to be stripped.</p>
5721
     *
5722
     * @psalm-pure
5723
     *
5724
     * @return string
5725
     *                <p>A string with unwanted characters stripped from the right.</p>
5726
     */
5727 21
    public static function rtrim(string $str = '', string $chars = null): string
5728
    {
5729 21
        if ($str === '') {
5730 3
            return '';
5731
        }
5732
5733 20
        if (self::$SUPPORT['mbstring'] === true) {
5734 20
            if ($chars !== null) {
5735
                /** @noinspection PregQuoteUsageInspection */
5736 9
                $chars = \preg_quote($chars);
5737 9
                $pattern = "[${chars}]+$";
5738
            } else {
5739 14
                $pattern = '[\\s]+$';
5740
            }
5741
5742
            /** @noinspection PhpComposerExtensionStubsInspection */
5743 20
            return (string) \mb_ereg_replace($pattern, '', $str);
5744
        }
5745
5746
        if ($chars !== null) {
5747
            $chars = \preg_quote($chars, '/');
5748
            $pattern = "[${chars}]+$";
5749
        } else {
5750
            $pattern = '[\\s]+$';
5751
        }
5752
5753
        return self::regex_replace($str, $pattern, '');
5754
    }
5755
5756
    /**
5757
     * WARNING: Print native UTF-8 support (libs) by default, e.g. for debugging.
5758
     *
5759
     * @param bool $useEcho
5760
     *
5761
     * @psalm-pure
5762
     *
5763
     * @return string|void
5764
     */
5765 2
    public static function showSupport(bool $useEcho = true)
5766
    {
5767
        // init
5768 2
        $html = '';
5769
5770 2
        $html .= '<pre>';
5771
        /** @noinspection AlterInForeachInspection */
5772 2
        foreach (self::$SUPPORT as $key => &$value) {
5773 2
            $html .= $key . ' - ' . \print_r($value, true) . "\n<br>";
5774
        }
5775 2
        $html .= '</pre>';
5776
5777 2
        if ($useEcho) {
5778 1
            echo $html;
5779
        }
5780
5781 2
        return $html;
5782
    }
5783
5784
    /**
5785
     * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
5786
     *
5787
     * EXAMPLE: <code>UTF8::single_chr_html_encode('κ'); // '&#954;'</code>
5788
     *
5789
     * @param string $char             <p>The Unicode character to be encoded as numbered entity.</p>
5790
     * @param bool   $keep_ascii_chars <p>Set to <strong>true</strong> to keep ASCII chars.</>
5791
     * @param string $encoding         [optional] <p>Set the charset for e.g. "mb_" function</p>
5792
     *
5793
     * @psalm-pure
5794
     *
5795
     * @return string
5796
     *                <p>The HTML numbered entity for the given character.</p>
5797
     */
5798 2
    public static function single_chr_html_encode(
5799
        string $char,
5800
        bool $keep_ascii_chars = false,
5801
        string $encoding = 'UTF-8'
5802
    ): string {
5803 2
        if ($char === '') {
5804 2
            return '';
5805
        }
5806
5807
        if (
5808 2
            $keep_ascii_chars
5809
            &&
5810 2
            ASCII::is_ascii($char)
5811
        ) {
5812 2
            return $char;
5813
        }
5814
5815 2
        return '&#' . self::ord($char, $encoding) . ';';
5816
    }
5817
5818
    /**
5819
     * @param string $str
5820
     * @param int    $tab_length
5821
     *
5822
     * @psalm-pure
5823
     *
5824
     * @return string
5825
     */
5826 5
    public static function spaces_to_tabs(string $str, int $tab_length = 4): string
5827
    {
5828 5
        if ($tab_length === 4) {
5829 3
            $tab = '    ';
5830 2
        } elseif ($tab_length === 2) {
5831 1
            $tab = '  ';
5832
        } else {
5833 1
            $tab = \str_repeat(' ', $tab_length);
5834
        }
5835
5836 5
        return \str_replace($tab, "\t", $str);
5837
    }
5838
5839
    /**
5840
     * alias for "UTF8::str_split()"
5841
     *
5842
     * @param int|string $str
5843
     * @param int        $length
5844
     * @param bool       $clean_utf8
5845
     *
5846
     * @psalm-pure
5847
     *
5848
     * @return string[]
5849
     *
5850
     * @see        UTF8::str_split()
5851
     * @deprecated <p>please use "UTF8::str_split()"</p>
5852
     */
5853 9
    public static function split(
5854
        $str,
5855
        int $length = 1,
5856
        bool $clean_utf8 = false
5857
    ): array {
5858
        /** @var string[] */
5859 9
        return self::str_split($str, $length, $clean_utf8);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::str_split($str, $length, $clean_utf8) returns an array which contains values of type array|string[] which are incompatible with the documented value type string.
Loading history...
5860
    }
5861
5862
    /**
5863
     * alias for "UTF8::str_starts_with()"
5864
     *
5865
     * @param string $haystack
5866
     * @param string $needle
5867
     *
5868
     * @psalm-pure
5869
     *
5870
     * @return bool
5871
     *
5872
     * @see        UTF8::str_starts_with()
5873
     * @deprecated <p>please use "UTF8::str_starts_with()"</p>
5874
     */
5875 1
    public static function str_begins(string $haystack, string $needle): bool
5876
    {
5877 1
        return self::str_starts_with($haystack, $needle);
5878
    }
5879
5880
    /**
5881
     * Returns a camelCase version of the string. Trims surrounding spaces,
5882
     * capitalizes letters following digits, spaces, dashes and underscores,
5883
     * and removes spaces, dashes, as well as underscores.
5884
     *
5885
     * @param string      $str                           <p>The input string.</p>
5886
     * @param string      $encoding                      [optional] <p>Default: 'UTF-8'</p>
5887
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
5888
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
5889
     *                                                   tr</p>
5890
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
5891
     *                                                   -> ß</p>
5892
     *
5893
     * @psalm-pure
5894
     *
5895
     * @return string
5896
     */
5897 32
    public static function str_camelize(
5898
        string $str,
5899
        string $encoding = 'UTF-8',
5900
        bool $clean_utf8 = false,
5901
        string $lang = null,
5902
        bool $try_to_keep_the_string_length = false
5903
    ): string {
5904 32
        if ($clean_utf8) {
5905
            $str = self::clean($str);
5906
        }
5907
5908 32
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
5909 26
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
5910
        }
5911
5912 32
        $str = self::lcfirst(
5913 32
            \trim($str),
5914 32
            $encoding,
5915 32
            false,
5916 32
            $lang,
5917 32
            $try_to_keep_the_string_length
5918
        );
5919 32
        $str = (string) \preg_replace('/^[-_]+/', '', $str);
5920
5921 32
        $use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
5922
5923 32
        $str = (string) \preg_replace_callback(
5924 32
            '/[-_\\s]+(.)?/u',
5925
            /**
5926
             * @param array $match
5927
             *
5928
             * @psalm-pure
5929
             *
5930
             * @return string
5931
             */
5932
            static function (array $match) use ($use_mb_functions, $encoding, $lang, $try_to_keep_the_string_length): string {
5933 27
                if (isset($match[1])) {
5934 27
                    if ($use_mb_functions) {
5935 27
                        if ($encoding === 'UTF-8') {
5936 27
                            return \mb_strtoupper($match[1]);
5937
                        }
5938
5939
                        return \mb_strtoupper($match[1], $encoding);
5940
                    }
5941
5942
                    return self::strtoupper($match[1], $encoding, false, $lang, $try_to_keep_the_string_length);
5943
                }
5944
5945 1
                return '';
5946 32
            },
5947 32
            $str
5948
        );
5949
5950 32
        return (string) \preg_replace_callback(
5951 32
            '/[\\p{N}]+(.)?/u',
5952
            /**
5953
             * @param array $match
5954
             *
5955
             * @psalm-pure
5956
             *
5957
             * @return string
5958
             */
5959
            static function (array $match) use ($use_mb_functions, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length): string {
5960 6
                if ($use_mb_functions) {
5961 6
                    if ($encoding === 'UTF-8') {
5962 6
                        return \mb_strtoupper($match[0]);
5963
                    }
5964
5965
                    return \mb_strtoupper($match[0], $encoding);
5966
                }
5967
5968
                return self::strtoupper($match[0], $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
5969 32
            },
5970 32
            $str
5971
        );
5972
    }
5973
5974
    /**
5975
     * Returns the string with the first letter of each word capitalized,
5976
     * except for when the word is a name which shouldn't be capitalized.
5977
     *
5978
     * @param string $str
5979
     *
5980
     * @psalm-pure
5981
     *
5982
     * @return string
5983
     *                <p>A string with $str capitalized.</p>
5984
     */
5985 1
    public static function str_capitalize_name(string $str): string
5986
    {
5987 1
        return self::str_capitalize_name_helper(
5988 1
            self::str_capitalize_name_helper(
5989 1
                self::collapse_whitespace($str),
5990 1
                ' '
5991
            ),
5992 1
            '-'
5993
        );
5994
    }
5995
5996
    /**
5997
     * Returns true if the string contains $needle, false otherwise. By default
5998
     * the comparison is case-sensitive, but can be made insensitive by setting
5999
     * $case_sensitive to false.
6000
     *
6001
     * @param string $haystack       <p>The input string.</p>
6002
     * @param string $needle         <p>Substring to look for.</p>
6003
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
6004
     *
6005
     * @psalm-pure
6006
     *
6007
     * @return bool
6008
     *              <p>Whether or not $haystack contains $needle.</p>
6009
     */
6010 21
    public static function str_contains(
6011
        string $haystack,
6012
        string $needle,
6013
        bool $case_sensitive = true
6014
    ): bool {
6015 21
        if ($case_sensitive) {
6016 11
            return \strpos($haystack, $needle) !== false;
6017
        }
6018
6019 10
        return \mb_stripos($haystack, $needle) !== false;
6020
    }
6021
6022
    /**
6023
     * Returns true if the string contains all $needles, false otherwise. By
6024
     * default the comparison is case-sensitive, but can be made insensitive by
6025
     * setting $case_sensitive to false.
6026
     *
6027
     * @param string $haystack       <p>The input string.</p>
6028
     * @param array  $needles        <p>SubStrings to look for.</p>
6029
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
6030
     *
6031
     * @psalm-pure
6032
     *
6033
     * @return bool
6034
     *              <p>Whether or not $haystack contains $needle.</p>
6035
     */
6036 45
    public static function str_contains_all(
6037
        string $haystack,
6038
        array $needles,
6039
        bool $case_sensitive = true
6040
    ): bool {
6041 45
        if ($haystack === '' || $needles === []) {
6042 1
            return false;
6043
        }
6044
6045
        /** @noinspection LoopWhichDoesNotLoopInspection */
6046 44
        foreach ($needles as &$needle) {
6047 44
            if ($case_sensitive) {
6048
                /** @noinspection NestedPositiveIfStatementsInspection */
6049 24
                if (!$needle || \strpos($haystack, $needle) === false) {
6050 12
                    return false;
6051
                }
6052
            }
6053
6054 33
            if (!$needle || \mb_stripos($haystack, $needle) === false) {
6055 33
                return false;
6056
            }
6057
        }
6058
6059 24
        return true;
6060
    }
6061
6062
    /**
6063
     * Returns true if the string contains any $needles, false otherwise. By
6064
     * default the comparison is case-sensitive, but can be made insensitive by
6065
     * setting $case_sensitive to false.
6066
     *
6067
     * @param string $haystack       <p>The input string.</p>
6068
     * @param array  $needles        <p>SubStrings to look for.</p>
6069
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
6070
     *
6071
     * @psalm-pure
6072
     *
6073
     * @return bool
6074
     *              <p>Whether or not $str contains $needle.</p>
6075
     */
6076 46
    public static function str_contains_any(
6077
        string $haystack,
6078
        array $needles,
6079
        bool $case_sensitive = true
6080
    ): bool {
6081 46
        if ($haystack === '' || $needles === []) {
6082 1
            return false;
6083
        }
6084
6085
        /** @noinspection LoopWhichDoesNotLoopInspection */
6086 45
        foreach ($needles as &$needle) {
6087 45
            if (!$needle) {
6088
                continue;
6089
            }
6090
6091 45
            if ($case_sensitive) {
6092 25
                if (\strpos($haystack, $needle) !== false) {
6093 14
                    return true;
6094
                }
6095
6096 13
                continue;
6097
            }
6098
6099 20
            if (\mb_stripos($haystack, $needle) !== false) {
6100 20
                return true;
6101
            }
6102
        }
6103
6104 19
        return false;
6105
    }
6106
6107
    /**
6108
     * Returns a lowercase and trimmed string separated by dashes. Dashes are
6109
     * inserted before uppercase characters (with the exception of the first
6110
     * character of the string), and in place of spaces as well as underscores.
6111
     *
6112
     * @param string $str      <p>The input string.</p>
6113
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6114
     *
6115
     * @psalm-pure
6116
     *
6117
     * @return string
6118
     */
6119 19
    public static function str_dasherize(string $str, string $encoding = 'UTF-8'): string
6120
    {
6121 19
        return self::str_delimit($str, '-', $encoding);
6122
    }
6123
6124
    /**
6125
     * Returns a lowercase and trimmed string separated by the given delimiter.
6126
     * Delimiters are inserted before uppercase characters (with the exception
6127
     * of the first character of the string), and in place of spaces, dashes,
6128
     * and underscores. Alpha delimiters are not converted to lowercase.
6129
     *
6130
     * @param string      $str                           <p>The input string.</p>
6131
     * @param string      $delimiter                     <p>Sequence used to separate parts of the string.</p>
6132
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
6133
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
6134
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
6135
     *                                                   tr</p>
6136
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ ->
6137
     *                                                   ß</p>
6138
     *
6139
     * @psalm-pure
6140
     *
6141
     * @return string
6142
     */
6143 49
    public static function str_delimit(
6144
        string $str,
6145
        string $delimiter,
6146
        string $encoding = 'UTF-8',
6147
        bool $clean_utf8 = false,
6148
        string $lang = null,
6149
        bool $try_to_keep_the_string_length = false
6150
    ): string {
6151 49
        if (self::$SUPPORT['mbstring'] === true) {
6152
            /** @noinspection PhpComposerExtensionStubsInspection */
6153 49
            $str = (string) \mb_ereg_replace('\\B(\\p{Lu})', '-\1', \trim($str));
6154
6155 49
            $use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
6156 49
            if ($use_mb_functions && $encoding === 'UTF-8') {
6157 22
                $str = \mb_strtolower($str);
6158
            } else {
6159 27
                $str = self::strtolower($str, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
6160
            }
6161
6162
            /** @noinspection PhpComposerExtensionStubsInspection */
6163 49
            return (string) \mb_ereg_replace('[\\-_\\s]+', $delimiter, $str);
6164
        }
6165
6166
        $str = (string) \preg_replace('/\\B(\\p{Lu})/u', '-\1', \trim($str));
6167
6168
        $use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
6169
        if ($use_mb_functions && $encoding === 'UTF-8') {
6170
            $str = \mb_strtolower($str);
6171
        } else {
6172
            $str = self::strtolower($str, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
6173
        }
6174
6175
        return (string) \preg_replace('/[\\-_\\s]+/u', $delimiter, $str);
6176
    }
6177
6178
    /**
6179
     * Optimized "mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
6180
     *
6181
     * EXAMPLE: <code>
6182
     * UTF8::str_detect_encoding('中文空白'); // 'UTF-8'
6183
     * UTF8::str_detect_encoding('Abc'); // 'ASCII'
6184
     * </code>
6185
     *
6186
     * @param string $str <p>The input string.</p>
6187
     *
6188
     * @psalm-pure
6189
     *
6190
     * @return false|string
6191
     *                      <p>
6192
     *                      The detected string-encoding e.g. UTF-8 or UTF-16BE,<br>
6193
     *                      otherwise it will return false e.g. for BINARY or not detected encoding.
6194
     *                      </p>
6195
     */
6196 31
    public static function str_detect_encoding($str)
6197
    {
6198
        // init
6199 31
        $str = (string) $str;
6200
6201
        //
6202
        // 1.) check binary strings (010001001...) like UTF-16 / UTF-32 / PDF / Images / ...
6203
        //
6204
6205 31
        if (self::is_binary($str, true)) {
6206 11
            $is_utf32 = self::is_utf32($str, false);
6207 11
            if ($is_utf32 === 1) {
6208
                return 'UTF-32LE';
6209
            }
6210 11
            if ($is_utf32 === 2) {
6211 1
                return 'UTF-32BE';
6212
            }
6213
6214 11
            $is_utf16 = self::is_utf16($str, false);
6215 11
            if ($is_utf16 === 1) {
6216 3
                return 'UTF-16LE';
6217
            }
6218 11
            if ($is_utf16 === 2) {
6219 2
                return 'UTF-16BE';
6220
            }
6221
6222
            // is binary but not "UTF-16" or "UTF-32"
6223 9
            return false;
6224
        }
6225
6226
        //
6227
        // 2.) simple check for ASCII chars
6228
        //
6229
6230 27
        if (ASCII::is_ascii($str)) {
6231 10
            return 'ASCII';
6232
        }
6233
6234
        //
6235
        // 3.) simple check for UTF-8 chars
6236
        //
6237
6238 27
        if (self::is_utf8_string($str)) {
6239 19
            return 'UTF-8';
6240
        }
6241
6242
        //
6243
        // 4.) check via "mb_detect_encoding()"
6244
        //
6245
        // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "mb_detect_encoding()"
6246
6247
        $encoding_detecting_order = [
6248 16
            'ISO-8859-1',
6249
            'ISO-8859-2',
6250
            'ISO-8859-3',
6251
            'ISO-8859-4',
6252
            'ISO-8859-5',
6253
            'ISO-8859-6',
6254
            'ISO-8859-7',
6255
            'ISO-8859-8',
6256
            'ISO-8859-9',
6257
            'ISO-8859-10',
6258
            'ISO-8859-13',
6259
            'ISO-8859-14',
6260
            'ISO-8859-15',
6261
            'ISO-8859-16',
6262
            'WINDOWS-1251',
6263
            'WINDOWS-1252',
6264
            'WINDOWS-1254',
6265
            'CP932',
6266
            'CP936',
6267
            'CP950',
6268
            'CP866',
6269
            'CP850',
6270
            'CP51932',
6271
            'CP50220',
6272
            'CP50221',
6273
            'CP50222',
6274
            'ISO-2022-JP',
6275
            'ISO-2022-KR',
6276
            'JIS',
6277
            'JIS-ms',
6278
            'EUC-CN',
6279
            'EUC-JP',
6280
        ];
6281
6282 16
        if (self::$SUPPORT['mbstring'] === true) {
6283
            // info: do not use the symfony polyfill here
6284 16
            $encoding = \mb_detect_encoding($str, $encoding_detecting_order, true);
6285 16
            if ($encoding) {
6286 16
                return $encoding;
6287
            }
6288
        }
6289
6290
        //
6291
        // 5.) check via "iconv()"
6292
        //
6293
6294
        if (self::$ENCODINGS === null) {
6295
            self::$ENCODINGS = self::getData('encodings');
6296
        }
6297
6298
        foreach (self::$ENCODINGS as $encoding_tmp) {
6299
            // INFO: //IGNORE but still throw notice
6300
            /** @noinspection PhpUsageOfSilenceOperatorInspection */
6301
            if ((string) @\iconv($encoding_tmp, $encoding_tmp . '//IGNORE', $str) === $str) {
6302
                return $encoding_tmp;
6303
            }
6304
        }
6305
6306
        return false;
6307
    }
6308
6309
    /**
6310
     * alias for "UTF8::str_ends_with()"
6311
     *
6312
     * @param string $haystack
6313
     * @param string $needle
6314
     *
6315
     * @psalm-pure
6316
     *
6317
     * @return bool
6318
     *
6319
     * @see        UTF8::str_ends_with()
6320
     * @deprecated <p>please use "UTF8::str_ends_with()"</p>
6321
     */
6322 1
    public static function str_ends(string $haystack, string $needle): bool
6323
    {
6324 1
        return self::str_ends_with($haystack, $needle);
6325
    }
6326
6327
    /**
6328
     * Check if the string ends with the given substring.
6329
     *
6330
     * EXAMPLE: <code>
6331
     * UTF8::str_ends_with('BeginMiddleΚόσμε', 'Κόσμε'); // true
6332
     * UTF8::str_ends_with('BeginMiddleΚόσμε', 'κόσμε'); // false
6333
     * </code>
6334
     *
6335
     * @param string $haystack <p>The string to search in.</p>
6336
     * @param string $needle   <p>The substring to search for.</p>
6337
     *
6338
     * @psalm-pure
6339
     *
6340
     * @return bool
6341
     */
6342 9
    public static function str_ends_with(string $haystack, string $needle): bool
6343
    {
6344 9
        if ($needle === '') {
6345 2
            return true;
6346
        }
6347
6348 9
        if ($haystack === '') {
6349
            return false;
6350
        }
6351
6352 9
        return \substr($haystack, -\strlen($needle)) === $needle;
6353
    }
6354
6355
    /**
6356
     * Returns true if the string ends with any of $substrings, false otherwise.
6357
     *
6358
     * - case-sensitive
6359
     *
6360
     * @param string   $str        <p>The input string.</p>
6361
     * @param string[] $substrings <p>Substrings to look for.</p>
6362
     *
6363
     * @psalm-pure
6364
     *
6365
     * @return bool
6366
     *              <p>Whether or not $str ends with $substring.</p>
6367
     */
6368 7
    public static function str_ends_with_any(string $str, array $substrings): bool
6369
    {
6370 7
        if ($substrings === []) {
6371
            return false;
6372
        }
6373
6374 7
        foreach ($substrings as &$substring) {
6375 7
            if (\substr($str, -\strlen($substring)) === $substring) {
6376 7
                return true;
6377
            }
6378
        }
6379
6380 6
        return false;
6381
    }
6382
6383
    /**
6384
     * Ensures that the string begins with $substring. If it doesn't, it's
6385
     * prepended.
6386
     *
6387
     * @param string $str       <p>The input string.</p>
6388
     * @param string $substring <p>The substring to add if not present.</p>
6389
     *
6390
     * @psalm-pure
6391
     *
6392
     * @return string
6393
     */
6394 10
    public static function str_ensure_left(string $str, string $substring): string
6395
    {
6396
        if (
6397 10
            $substring !== ''
6398
            &&
6399 10
            \strpos($str, $substring) === 0
6400
        ) {
6401 6
            return $str;
6402
        }
6403
6404 4
        return $substring . $str;
6405
    }
6406
6407
    /**
6408
     * Ensures that the string ends with $substring. If it doesn't, it's appended.
6409
     *
6410
     * @param string $str       <p>The input string.</p>
6411
     * @param string $substring <p>The substring to add if not present.</p>
6412
     *
6413
     * @psalm-pure
6414
     *
6415
     * @return string
6416
     */
6417 10
    public static function str_ensure_right(string $str, string $substring): string
6418
    {
6419
        if (
6420 10
            $str === ''
6421
            ||
6422 10
            $substring === ''
6423
            ||
6424 10
            \substr($str, -\strlen($substring)) !== $substring
6425
        ) {
6426 4
            $str .= $substring;
6427
        }
6428
6429 10
        return $str;
6430
    }
6431
6432
    /**
6433
     * Capitalizes the first word of the string, replaces underscores with
6434
     * spaces, and strips '_id'.
6435
     *
6436
     * @param string $str
6437
     *
6438
     * @psalm-pure
6439
     *
6440
     * @return string
6441
     */
6442 3
    public static function str_humanize($str): string
6443
    {
6444 3
        $str = \str_replace(
6445
            [
6446 3
                '_id',
6447
                '_',
6448
            ],
6449
            [
6450 3
                '',
6451
                ' ',
6452
            ],
6453 3
            $str
6454
        );
6455
6456 3
        return self::ucfirst(\trim($str));
6457
    }
6458
6459
    /**
6460
     * alias for "UTF8::str_istarts_with()"
6461
     *
6462
     * @param string $haystack
6463
     * @param string $needle
6464
     *
6465
     * @psalm-pure
6466
     *
6467
     * @return bool
6468
     *
6469
     * @see        UTF8::str_istarts_with()
6470
     * @deprecated <p>please use "UTF8::str_istarts_with()"</p>
6471
     */
6472 1
    public static function str_ibegins(string $haystack, string $needle): bool
6473
    {
6474 1
        return self::str_istarts_with($haystack, $needle);
6475
    }
6476
6477
    /**
6478
     * alias for "UTF8::str_iends_with()"
6479
     *
6480
     * @param string $haystack
6481
     * @param string $needle
6482
     *
6483
     * @psalm-pure
6484
     *
6485
     * @return bool
6486
     *
6487
     * @see        UTF8::str_iends_with()
6488
     * @deprecated <p>please use "UTF8::str_iends_with()"</p>
6489
     */
6490 1
    public static function str_iends(string $haystack, string $needle): bool
6491
    {
6492 1
        return self::str_iends_with($haystack, $needle);
6493
    }
6494
6495
    /**
6496
     * Check if the string ends with the given substring, case-insensitive.
6497
     *
6498
     * EXAMPLE: <code>
6499
     * UTF8::str_iends_with('BeginMiddleΚόσμε', 'Κόσμε'); // true
6500
     * UTF8::str_iends_with('BeginMiddleΚόσμε', 'κόσμε'); // true
6501
     * </code>
6502
     *
6503
     * @param string $haystack <p>The string to search in.</p>
6504
     * @param string $needle   <p>The substring to search for.</p>
6505
     *
6506
     * @psalm-pure
6507
     *
6508
     * @return bool
6509
     */
6510 12
    public static function str_iends_with(string $haystack, string $needle): bool
6511
    {
6512 12
        if ($needle === '') {
6513 2
            return true;
6514
        }
6515
6516 12
        if ($haystack === '') {
6517
            return false;
6518
        }
6519
6520 12
        return self::strcasecmp(\substr($haystack, -\strlen($needle)), $needle) === 0;
6521
    }
6522
6523
    /**
6524
     * Returns true if the string ends with any of $substrings, false otherwise.
6525
     *
6526
     * - case-insensitive
6527
     *
6528
     * @param string   $str        <p>The input string.</p>
6529
     * @param string[] $substrings <p>Substrings to look for.</p>
6530
     *
6531
     * @psalm-pure
6532
     *
6533
     * @return bool
6534
     *              <p>Whether or not $str ends with $substring.</p>
6535
     */
6536 4
    public static function str_iends_with_any(string $str, array $substrings): bool
6537
    {
6538 4
        if ($substrings === []) {
6539
            return false;
6540
        }
6541
6542 4
        foreach ($substrings as &$substring) {
6543 4
            if (self::str_iends_with($str, $substring)) {
6544 4
                return true;
6545
            }
6546
        }
6547
6548
        return false;
6549
    }
6550
6551
    /**
6552
     * Returns the index of the first occurrence of $needle in the string,
6553
     * and false if not found. Accepts an optional offset from which to begin
6554
     * the search.
6555
     *
6556
     * @param string $str      <p>The input string.</p>
6557
     * @param string $needle   <p>Substring to look for.</p>
6558
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
6559
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6560
     *
6561
     * @psalm-pure
6562
     *
6563
     * @return false|int
6564
     *                   <p>The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.</p>
6565
     *
6566
     * @see        UTF8::stripos()
6567
     * @deprecated <p>please use "UTF8::stripos()"</p>
6568
     */
6569 1
    public static function str_iindex_first(
6570
        string $str,
6571
        string $needle,
6572
        int $offset = 0,
6573
        string $encoding = 'UTF-8'
6574
    ) {
6575 1
        return self::stripos(
6576 1
            $str,
6577 1
            $needle,
6578 1
            $offset,
6579 1
            $encoding
6580
        );
6581
    }
6582
6583
    /**
6584
     * Returns the index of the last occurrence of $needle in the string,
6585
     * and false if not found. Accepts an optional offset from which to begin
6586
     * the search. Offsets may be negative to count from the last character
6587
     * in the string.
6588
     *
6589
     * @param string $str      <p>The input string.</p>
6590
     * @param string $needle   <p>Substring to look for.</p>
6591
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
6592
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6593
     *
6594
     * @psalm-pure
6595
     *
6596
     * @return false|int
6597
     *                   <p>The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.</p>
6598
     *
6599
     * @see        UTF8::strripos()
6600
     * @deprecated <p>please use "UTF8::strripos()"</p>
6601
     */
6602 10
    public static function str_iindex_last(
6603
        string $str,
6604
        string $needle,
6605
        int $offset = 0,
6606
        string $encoding = 'UTF-8'
6607
    ) {
6608 10
        return self::strripos(
6609 10
            $str,
6610 10
            $needle,
6611 10
            $offset,
6612 10
            $encoding
6613
        );
6614
    }
6615
6616
    /**
6617
     * Returns the index of the first occurrence of $needle in the string,
6618
     * and false if not found. Accepts an optional offset from which to begin
6619
     * the search.
6620
     *
6621
     * @param string $str      <p>The input string.</p>
6622
     * @param string $needle   <p>Substring to look for.</p>
6623
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
6624
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6625
     *
6626
     * @psalm-pure
6627
     *
6628
     * @return false|int
6629
     *                   <p>The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.</p>
6630
     *
6631
     * @see        UTF8::strpos()
6632
     * @deprecated <p>please use "UTF8::strpos()"</p>
6633
     */
6634 11
    public static function str_index_first(
6635
        string $str,
6636
        string $needle,
6637
        int $offset = 0,
6638
        string $encoding = 'UTF-8'
6639
    ) {
6640 11
        return self::strpos(
6641 11
            $str,
6642 11
            $needle,
6643 11
            $offset,
6644 11
            $encoding
6645
        );
6646
    }
6647
6648
    /**
6649
     * Returns the index of the last occurrence of $needle in the string,
6650
     * and false if not found. Accepts an optional offset from which to begin
6651
     * the search. Offsets may be negative to count from the last character
6652
     * in the string.
6653
     *
6654
     * @param string $str      <p>The input string.</p>
6655
     * @param string $needle   <p>Substring to look for.</p>
6656
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
6657
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6658
     *
6659
     * @psalm-pure
6660
     *
6661
     * @return false|int
6662
     *                   <p>The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.</p>
6663
     *
6664
     * @see        UTF8::strrpos()
6665
     * @deprecated <p>please use "UTF8::strrpos()"</p>
6666
     */
6667 10
    public static function str_index_last(
6668
        string $str,
6669
        string $needle,
6670
        int $offset = 0,
6671
        string $encoding = 'UTF-8'
6672
    ) {
6673 10
        return self::strrpos(
6674 10
            $str,
6675 10
            $needle,
6676 10
            $offset,
6677 10
            $encoding
6678
        );
6679
    }
6680
6681
    /**
6682
     * Inserts $substring into the string at the $index provided.
6683
     *
6684
     * @param string $str       <p>The input string.</p>
6685
     * @param string $substring <p>String to be inserted.</p>
6686
     * @param int    $index     <p>The index at which to insert the substring.</p>
6687
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
6688
     *
6689
     * @psalm-pure
6690
     *
6691
     * @return string
6692
     */
6693 8
    public static function str_insert(
6694
        string $str,
6695
        string $substring,
6696
        int $index,
6697
        string $encoding = 'UTF-8'
6698
    ): string {
6699 8
        if ($encoding === 'UTF-8') {
6700 4
            $len = (int) \mb_strlen($str);
6701 4
            if ($index > $len) {
6702
                return $str;
6703
            }
6704
6705
            /** @noinspection UnnecessaryCastingInspection */
6706 4
            return (string) \mb_substr($str, 0, $index) .
6707 4
                   $substring .
6708 4
                   (string) \mb_substr($str, $index, $len);
6709
        }
6710
6711 4
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6712
6713 4
        $len = (int) self::strlen($str, $encoding);
6714 4
        if ($index > $len) {
6715 1
            return $str;
6716
        }
6717
6718 3
        return ((string) self::substr($str, 0, $index, $encoding)) .
6719 3
               $substring .
6720 3
               ((string) self::substr($str, $index, $len, $encoding));
6721
    }
6722
6723
    /**
6724
     * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
6725
     *
6726
     * EXAMPLE: <code>
6727
     * UTF8::str_ireplace('lIzÆ', 'lise', 'Iñtërnâtiônàlizætiøn'); // 'Iñtërnâtiônàlisetiøn'
6728
     * </code>
6729
     *
6730
     * @see http://php.net/manual/en/function.str-ireplace.php
6731
     *
6732
     * @param string|string[] $search      <p>
6733
     *                                     Every replacement with search array is
6734
     *                                     performed on the result of previous replacement.
6735
     *                                     </p>
6736
     * @param string|string[] $replacement <p>The replacement.</p>
6737
     * @param string|string[] $subject     <p>
6738
     *                                     If subject is an array, then the search and
6739
     *                                     replace is performed with every entry of
6740
     *                                     subject, and the return value is an array as
6741
     *                                     well.
6742
     *                                     </p>
6743
     * @param int             $count       [optional] <p>
6744
     *                                     The number of matched and replaced needles will
6745
     *                                     be returned in count which is passed by
6746
     *                                     reference.
6747
     *                                     </p>
6748
     *
6749
     * @psalm-pure
6750
     *
6751
     * @return string|string[]
6752
     *                         <p>A string or an array of replacements.</p>
6753
     *
6754
     * @template TStrIReplaceSubject
6755
     * @psalm-param TStrIReplaceSubject $subject
6756
     * @psalm-return TStrIReplaceSubject
6757
     */
6758 29
    public static function str_ireplace($search, $replacement, $subject, &$count = null)
6759
    {
6760 29
        $search = (array) $search;
6761
6762
        /** @noinspection AlterInForeachInspection */
6763 29
        foreach ($search as &$s) {
6764 29
            $s = (string) $s;
6765 29
            if ($s === '') {
6766 6
                $s = '/^(?<=.)$/';
6767
            } else {
6768 29
                $s = '/' . \preg_quote($s, '/') . '/ui';
6769
            }
6770
        }
6771
6772
        /**
6773
         * @psalm-suppress PossiblyNullArgument
6774
         * @psalm-var TStrIReplaceSubject $subject
6775
         */
6776 29
        $subject = \preg_replace($search, $replacement, $subject, -1, $count);
6777
6778 29
        return $subject;
6779
    }
6780
6781
    /**
6782
     * Replaces $search from the beginning of string with $replacement.
6783
     *
6784
     * @param string $str         <p>The input string.</p>
6785
     * @param string $search      <p>The string to search for.</p>
6786
     * @param string $replacement <p>The replacement.</p>
6787
     *
6788
     * @psalm-pure
6789
     *
6790
     * @return string
6791
     *                <p>The string after the replacement.</p>
6792
     */
6793 17
    public static function str_ireplace_beginning(string $str, string $search, string $replacement): string
6794
    {
6795 17
        if ($str === '') {
6796 4
            if ($replacement === '') {
6797 2
                return '';
6798
            }
6799
6800 2
            if ($search === '') {
6801 2
                return $replacement;
6802
            }
6803
        }
6804
6805 13
        if ($search === '') {
6806 2
            return $str . $replacement;
6807
        }
6808
6809 11
        $searchLength = \strlen($search);
6810 11
        if (\strncasecmp($str, $search, $searchLength) === 0) {
6811 10
            return $replacement . \substr($str, $searchLength);
6812
        }
6813
6814 1
        return $str;
6815
    }
6816
6817
    /**
6818
     * Replaces $search from the ending of string with $replacement.
6819
     *
6820
     * @param string $str         <p>The input string.</p>
6821
     * @param string $search      <p>The string to search for.</p>
6822
     * @param string $replacement <p>The replacement.</p>
6823
     *
6824
     * @psalm-pure
6825
     *
6826
     * @return string
6827
     *                <p>The string after the replacement.</p>
6828
     */
6829 17
    public static function str_ireplace_ending(string $str, string $search, string $replacement): string
6830
    {
6831 17
        if ($str === '') {
6832 4
            if ($replacement === '') {
6833 2
                return '';
6834
            }
6835
6836 2
            if ($search === '') {
6837 2
                return $replacement;
6838
            }
6839
        }
6840
6841 13
        if ($search === '') {
6842 2
            return $str . $replacement;
6843
        }
6844
6845 11
        if (\stripos($str, $search, \strlen($str) - \strlen($search)) !== false) {
6846 9
            $str = \substr($str, 0, -\strlen($search)) . $replacement;
6847
        }
6848
6849 11
        return $str;
6850
    }
6851
6852
    /**
6853
     * Check if the string starts with the given substring, case-insensitive.
6854
     *
6855
     * EXAMPLE: <code>
6856
     * UTF8::str_istarts_with('ΚόσμεMiddleEnd', 'Κόσμε'); // true
6857
     * UTF8::str_istarts_with('ΚόσμεMiddleEnd', 'κόσμε'); // true
6858
     * </code>
6859
     *
6860
     * @param string $haystack <p>The string to search in.</p>
6861
     * @param string $needle   <p>The substring to search for.</p>
6862
     *
6863
     * @psalm-pure
6864
     *
6865
     * @return bool
6866
     */
6867 13
    public static function str_istarts_with(string $haystack, string $needle): bool
6868
    {
6869 13
        if ($needle === '') {
6870 2
            return true;
6871
        }
6872
6873 13
        if ($haystack === '') {
6874
            return false;
6875
        }
6876
6877 13
        return self::stripos($haystack, $needle) === 0;
6878
    }
6879
6880
    /**
6881
     * Returns true if the string begins with any of $substrings, false otherwise.
6882
     *
6883
     * - case-insensitive
6884
     *
6885
     * @param string $str        <p>The input string.</p>
6886
     * @param array  $substrings <p>Substrings to look for.</p>
6887
     *
6888
     * @psalm-pure
6889
     *
6890
     * @return bool
6891
     *              <p>Whether or not $str starts with $substring.</p>
6892
     */
6893 5
    public static function str_istarts_with_any(string $str, array $substrings): bool
6894
    {
6895 5
        if ($str === '') {
6896
            return false;
6897
        }
6898
6899 5
        if ($substrings === []) {
6900
            return false;
6901
        }
6902
6903 5
        foreach ($substrings as &$substring) {
6904 5
            if (self::str_istarts_with($str, $substring)) {
6905 5
                return true;
6906
            }
6907
        }
6908
6909 1
        return false;
6910
    }
6911
6912
    /**
6913
     * Gets the substring after the first occurrence of a separator.
6914
     *
6915
     * @param string $str       <p>The input string.</p>
6916
     * @param string $separator <p>The string separator.</p>
6917
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6918
     *
6919
     * @psalm-pure
6920
     *
6921
     * @return string
6922
     */
6923 1
    public static function str_isubstr_after_first_separator(
6924
        string $str,
6925
        string $separator,
6926
        string $encoding = 'UTF-8'
6927
    ): string {
6928 1
        if ($separator === '' || $str === '') {
6929 1
            return '';
6930
        }
6931
6932 1
        $offset = self::stripos($str, $separator);
6933 1
        if ($offset === false) {
6934 1
            return '';
6935
        }
6936
6937 1
        if ($encoding === 'UTF-8') {
6938 1
            return (string) \mb_substr(
6939 1
                $str,
6940 1
                $offset + (int) \mb_strlen($separator)
6941
            );
6942
        }
6943
6944
        return (string) self::substr(
6945
            $str,
6946
            $offset + (int) self::strlen($separator, $encoding),
6947
            null,
6948
            $encoding
6949
        );
6950
    }
6951
6952
    /**
6953
     * Gets the substring after the last occurrence of a separator.
6954
     *
6955
     * @param string $str       <p>The input string.</p>
6956
     * @param string $separator <p>The string separator.</p>
6957
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6958
     *
6959
     * @psalm-pure
6960
     *
6961
     * @return string
6962
     */
6963 1
    public static function str_isubstr_after_last_separator(
6964
        string $str,
6965
        string $separator,
6966
        string $encoding = 'UTF-8'
6967
    ): string {
6968 1
        if ($separator === '' || $str === '') {
6969 1
            return '';
6970
        }
6971
6972 1
        $offset = self::strripos($str, $separator);
6973 1
        if ($offset === false) {
6974 1
            return '';
6975
        }
6976
6977 1
        if ($encoding === 'UTF-8') {
6978 1
            return (string) \mb_substr(
6979 1
                $str,
6980 1
                $offset + (int) self::strlen($separator)
6981
            );
6982
        }
6983
6984
        return (string) self::substr(
6985
            $str,
6986
            $offset + (int) self::strlen($separator, $encoding),
6987
            null,
6988
            $encoding
6989
        );
6990
    }
6991
6992
    /**
6993
     * Gets the substring before the first occurrence of a separator.
6994
     *
6995
     * @param string $str       <p>The input string.</p>
6996
     * @param string $separator <p>The string separator.</p>
6997
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6998
     *
6999
     * @psalm-pure
7000
     *
7001
     * @return string
7002
     */
7003 1
    public static function str_isubstr_before_first_separator(
7004
        string $str,
7005
        string $separator,
7006
        string $encoding = 'UTF-8'
7007
    ): string {
7008 1
        if ($separator === '' || $str === '') {
7009 1
            return '';
7010
        }
7011
7012 1
        $offset = self::stripos($str, $separator);
7013 1
        if ($offset === false) {
7014 1
            return '';
7015
        }
7016
7017 1
        if ($encoding === 'UTF-8') {
7018 1
            return (string) \mb_substr($str, 0, $offset);
7019
        }
7020
7021
        return (string) self::substr($str, 0, $offset, $encoding);
7022
    }
7023
7024
    /**
7025
     * Gets the substring before the last occurrence of a separator.
7026
     *
7027
     * @param string $str       <p>The input string.</p>
7028
     * @param string $separator <p>The string separator.</p>
7029
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7030
     *
7031
     * @psalm-pure
7032
     *
7033
     * @return string
7034
     */
7035 1
    public static function str_isubstr_before_last_separator(
7036
        string $str,
7037
        string $separator,
7038
        string $encoding = 'UTF-8'
7039
    ): string {
7040 1
        if ($separator === '' || $str === '') {
7041 1
            return '';
7042
        }
7043
7044 1
        if ($encoding === 'UTF-8') {
7045 1
            $offset = \mb_strripos($str, $separator);
7046 1
            if ($offset === false) {
7047 1
                return '';
7048
            }
7049
7050 1
            return (string) \mb_substr($str, 0, $offset);
7051
        }
7052
7053
        $offset = self::strripos($str, $separator, 0, $encoding);
7054
        if ($offset === false) {
7055
            return '';
7056
        }
7057
7058
        return (string) self::substr($str, 0, $offset, $encoding);
7059
    }
7060
7061
    /**
7062
     * Gets the substring after (or before via "$before_needle") the first occurrence of the "$needle".
7063
     *
7064
     * @param string $str           <p>The input string.</p>
7065
     * @param string $needle        <p>The string to look for.</p>
7066
     * @param bool   $before_needle [optional] <p>Default: false</p>
7067
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
7068
     *
7069
     * @psalm-pure
7070
     *
7071
     * @return string
7072
     */
7073 2
    public static function str_isubstr_first(
7074
        string $str,
7075
        string $needle,
7076
        bool $before_needle = false,
7077
        string $encoding = 'UTF-8'
7078
    ): string {
7079
        if (
7080 2
            $needle === ''
7081
            ||
7082 2
            $str === ''
7083
        ) {
7084 2
            return '';
7085
        }
7086
7087 2
        $part = self::stristr(
7088 2
            $str,
7089 2
            $needle,
7090 2
            $before_needle,
7091 2
            $encoding
7092
        );
7093 2
        if ($part === false) {
7094 2
            return '';
7095
        }
7096
7097 2
        return $part;
7098
    }
7099
7100
    /**
7101
     * Gets the substring after (or before via "$before_needle") the last occurrence of the "$needle".
7102
     *
7103
     * @param string $str           <p>The input string.</p>
7104
     * @param string $needle        <p>The string to look for.</p>
7105
     * @param bool   $before_needle [optional] <p>Default: false</p>
7106
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
7107
     *
7108
     * @psalm-pure
7109
     *
7110
     * @return string
7111
     */
7112 1
    public static function str_isubstr_last(
7113
        string $str,
7114
        string $needle,
7115
        bool $before_needle = false,
7116
        string $encoding = 'UTF-8'
7117
    ): string {
7118
        if (
7119 1
            $needle === ''
7120
            ||
7121 1
            $str === ''
7122
        ) {
7123 1
            return '';
7124
        }
7125
7126 1
        $part = self::strrichr(
7127 1
            $str,
7128 1
            $needle,
7129 1
            $before_needle,
7130 1
            $encoding
7131
        );
7132 1
        if ($part === false) {
7133 1
            return '';
7134
        }
7135
7136 1
        return $part;
7137
    }
7138
7139
    /**
7140
     * Returns the last $n characters of the string.
7141
     *
7142
     * @param string $str      <p>The input string.</p>
7143
     * @param int    $n        <p>Number of characters to retrieve from the end.</p>
7144
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7145
     *
7146
     * @psalm-pure
7147
     *
7148
     * @return string
7149
     */
7150 12
    public static function str_last_char(
7151
        string $str,
7152
        int $n = 1,
7153
        string $encoding = 'UTF-8'
7154
    ): string {
7155 12
        if ($str === '' || $n <= 0) {
7156 4
            return '';
7157
        }
7158
7159 8
        if ($encoding === 'UTF-8') {
7160 4
            return (string) \mb_substr($str, -$n);
7161
        }
7162
7163 4
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
7164
7165 4
        return (string) self::substr($str, -$n, null, $encoding);
7166
    }
7167
7168
    /**
7169
     * Limit the number of characters in a string.
7170
     *
7171
     * @param string $str        <p>The input string.</p>
7172
     * @param int    $length     [optional] <p>Default: 100</p>
7173
     * @param string $str_add_on [optional] <p>Default: …</p>
7174
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
7175
     *
7176
     * @psalm-pure
7177
     *
7178
     * @return string
7179
     */
7180 2
    public static function str_limit(
7181
        string $str,
7182
        int $length = 100,
7183
        string $str_add_on = '…',
7184
        string $encoding = 'UTF-8'
7185
    ): string {
7186 2
        if ($str === '' || $length <= 0) {
7187 2
            return '';
7188
        }
7189
7190 2
        if ($encoding === 'UTF-8') {
7191 2
            if ((int) \mb_strlen($str) <= $length) {
7192 2
                return $str;
7193
            }
7194
7195
            /** @noinspection UnnecessaryCastingInspection */
7196 2
            return (string) \mb_substr($str, 0, $length - (int) self::strlen($str_add_on)) . $str_add_on;
7197
        }
7198
7199
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
7200
7201
        if ((int) self::strlen($str, $encoding) <= $length) {
7202
            return $str;
7203
        }
7204
7205
        return ((string) self::substr($str, 0, $length - (int) self::strlen($str_add_on), $encoding)) . $str_add_on;
7206
    }
7207
7208
    /**
7209
     * Limit the number of characters in a string, but also after the next word.
7210
     *
7211
     * EXAMPLE: <code>UTF8::str_limit_after_word('fòô bàř fòô', 8, ''); // 'fòô bàř'</code>
7212
     *
7213
     * @param string $str        <p>The input string.</p>
7214
     * @param int    $length     [optional] <p>Default: 100</p>
7215
     * @param string $str_add_on [optional] <p>Default: …</p>
7216
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
7217
     *
7218
     * @psalm-pure
7219
     *
7220
     * @return string
7221
     */
7222 6
    public static function str_limit_after_word(
7223
        string $str,
7224
        int $length = 100,
7225
        string $str_add_on = '…',
7226
        string $encoding = 'UTF-8'
7227
    ): string {
7228 6
        if ($str === '' || $length <= 0) {
7229 2
            return '';
7230
        }
7231
7232 6
        if ($encoding === 'UTF-8') {
7233
            /** @noinspection UnnecessaryCastingInspection */
7234 2
            if ((int) \mb_strlen($str) <= $length) {
7235 2
                return $str;
7236
            }
7237
7238 2
            if (\mb_substr($str, $length - 1, 1) === ' ') {
7239 2
                return ((string) \mb_substr($str, 0, $length - 1)) . $str_add_on;
7240
            }
7241
7242 2
            $str = \mb_substr($str, 0, $length);
7243
7244 2
            $array = \explode(' ', $str, -1);
7245 2
            $new_str = \implode(' ', $array);
7246
7247 2
            if ($new_str === '') {
7248 2
                return ((string) \mb_substr($str, 0, $length - 1)) . $str_add_on;
7249
            }
7250
        } else {
7251 4
            if ((int) self::strlen($str, $encoding) <= $length) {
7252
                return $str;
7253
            }
7254
7255 4
            if (self::substr($str, $length - 1, 1, $encoding) === ' ') {
7256 3
                return ((string) self::substr($str, 0, $length - 1, $encoding)) . $str_add_on;
7257
            }
7258
7259
            /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
7260 1
            $str = self::substr($str, 0, $length, $encoding);
7261
            /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
7262 1
            if ($str === false) {
7263
                return '' . $str_add_on;
7264
            }
7265
7266 1
            $array = \explode(' ', $str, -1);
7267 1
            $new_str = \implode(' ', $array);
7268
7269 1
            if ($new_str === '') {
7270
                return ((string) self::substr($str, 0, $length - 1, $encoding)) . $str_add_on;
7271
            }
7272
        }
7273
7274 3
        return $new_str . $str_add_on;
7275
    }
7276
7277
    /**
7278
     * Returns the longest common prefix between the $str1 and $str2.
7279
     *
7280
     * @param string $str1     <p>The input sting.</p>
7281
     * @param string $str2     <p>Second string for comparison.</p>
7282
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7283
     *
7284
     * @psalm-pure
7285
     *
7286
     * @return string
7287
     */
7288 10
    public static function str_longest_common_prefix(
7289
        string $str1,
7290
        string $str2,
7291
        string $encoding = 'UTF-8'
7292
    ): string {
7293
        // init
7294 10
        $longest_common_prefix = '';
7295
7296 10
        if ($encoding === 'UTF-8') {
7297 5
            $max_length = (int) \min(
7298 5
                \mb_strlen($str1),
7299 5
                \mb_strlen($str2)
7300
            );
7301
7302 5
            for ($i = 0; $i < $max_length; ++$i) {
7303 4
                $char = \mb_substr($str1, $i, 1);
7304
7305
                if (
7306 4
                    $char !== false
7307
                    &&
7308 4
                    $char === \mb_substr($str2, $i, 1)
7309
                ) {
7310 3
                    $longest_common_prefix .= $char;
7311
                } else {
7312 3
                    break;
7313
                }
7314
            }
7315
        } else {
7316 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7317
7318 5
            $max_length = (int) \min(
7319 5
                self::strlen($str1, $encoding),
7320 5
                self::strlen($str2, $encoding)
7321
            );
7322
7323 5
            for ($i = 0; $i < $max_length; ++$i) {
7324 4
                $char = self::substr($str1, $i, 1, $encoding);
7325
7326
                if (
7327 4
                    $char !== false
7328
                    &&
7329 4
                    $char === self::substr($str2, $i, 1, $encoding)
7330
                ) {
7331 3
                    $longest_common_prefix .= $char;
7332
                } else {
7333 3
                    break;
7334
                }
7335
            }
7336
        }
7337
7338 10
        return $longest_common_prefix;
7339
    }
7340
7341
    /**
7342
     * Returns the longest common substring between the $str1 and $str2.
7343
     * In the case of ties, it returns that which occurs first.
7344
     *
7345
     * @param string $str1
7346
     * @param string $str2     <p>Second string for comparison.</p>
7347
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7348
     *
7349
     * @psalm-pure
7350
     *
7351
     * @return string
7352
     *                <p>A string with its $str being the longest common substring.</p>
7353
     */
7354 11
    public static function str_longest_common_substring(
7355
        string $str1,
7356
        string $str2,
7357
        string $encoding = 'UTF-8'
7358
    ): string {
7359 11
        if ($str1 === '' || $str2 === '') {
7360 2
            return '';
7361
        }
7362
7363
        // Uses dynamic programming to solve
7364
        // http://en.wikipedia.org/wiki/Longest_common_substring_problem
7365
7366 9
        if ($encoding === 'UTF-8') {
7367 4
            $str_length = (int) \mb_strlen($str1);
7368 4
            $other_length = (int) \mb_strlen($str2);
7369
        } else {
7370 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7371
7372 5
            $str_length = (int) self::strlen($str1, $encoding);
7373 5
            $other_length = (int) self::strlen($str2, $encoding);
7374
        }
7375
7376
        // Return if either string is empty
7377 9
        if ($str_length === 0 || $other_length === 0) {
7378
            return '';
7379
        }
7380
7381 9
        $len = 0;
7382 9
        $end = 0;
7383 9
        $table = \array_fill(
7384 9
            0,
7385 9
            $str_length + 1,
7386 9
            \array_fill(0, $other_length + 1, 0)
7387
        );
7388
7389 9
        if ($encoding === 'UTF-8') {
7390 9
            for ($i = 1; $i <= $str_length; ++$i) {
7391 9
                for ($j = 1; $j <= $other_length; ++$j) {
7392 9
                    $str_char = \mb_substr($str1, $i - 1, 1);
7393 9
                    $other_char = \mb_substr($str2, $j - 1, 1);
7394
7395 9
                    if ($str_char === $other_char) {
7396 8
                        $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
7397 8
                        if ($table[$i][$j] > $len) {
7398 8
                            $len = $table[$i][$j];
7399 8
                            $end = $i;
7400
                        }
7401
                    } else {
7402 9
                        $table[$i][$j] = 0;
7403
                    }
7404
                }
7405
            }
7406
        } else {
7407
            for ($i = 1; $i <= $str_length; ++$i) {
7408
                for ($j = 1; $j <= $other_length; ++$j) {
7409
                    $str_char = self::substr($str1, $i - 1, 1, $encoding);
7410
                    $other_char = self::substr($str2, $j - 1, 1, $encoding);
7411
7412
                    if ($str_char === $other_char) {
7413
                        $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
7414
                        if ($table[$i][$j] > $len) {
7415
                            $len = $table[$i][$j];
7416
                            $end = $i;
7417
                        }
7418
                    } else {
7419
                        $table[$i][$j] = 0;
7420
                    }
7421
                }
7422
            }
7423
        }
7424
7425 9
        if ($encoding === 'UTF-8') {
7426 9
            return (string) \mb_substr($str1, $end - $len, $len);
7427
        }
7428
7429
        return (string) self::substr($str1, $end - $len, $len, $encoding);
7430
    }
7431
7432
    /**
7433
     * Returns the longest common suffix between the $str1 and $str2.
7434
     *
7435
     * @param string $str1
7436
     * @param string $str2     <p>Second string for comparison.</p>
7437
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7438
     *
7439
     * @psalm-pure
7440
     *
7441
     * @return string
7442
     */
7443 10
    public static function str_longest_common_suffix(
7444
        string $str1,
7445
        string $str2,
7446
        string $encoding = 'UTF-8'
7447
    ): string {
7448 10
        if ($str1 === '' || $str2 === '') {
7449 2
            return '';
7450
        }
7451
7452 8
        if ($encoding === 'UTF-8') {
7453 4
            $max_length = (int) \min(
7454 4
                \mb_strlen($str1, $encoding),
7455 4
                \mb_strlen($str2, $encoding)
7456
            );
7457
7458 4
            $longest_common_suffix = '';
7459 4
            for ($i = 1; $i <= $max_length; ++$i) {
7460 4
                $char = \mb_substr($str1, -$i, 1);
7461
7462
                if (
7463 4
                    $char !== false
7464
                    &&
7465 4
                    $char === \mb_substr($str2, -$i, 1)
7466
                ) {
7467 3
                    $longest_common_suffix = $char . $longest_common_suffix;
7468
                } else {
7469 3
                    break;
7470
                }
7471
            }
7472
        } else {
7473 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7474
7475 4
            $max_length = (int) \min(
7476 4
                self::strlen($str1, $encoding),
7477 4
                self::strlen($str2, $encoding)
7478
            );
7479
7480 4
            $longest_common_suffix = '';
7481 4
            for ($i = 1; $i <= $max_length; ++$i) {
7482 4
                $char = self::substr($str1, -$i, 1, $encoding);
7483
7484
                if (
7485 4
                    $char !== false
7486
                    &&
7487 4
                    $char === self::substr($str2, -$i, 1, $encoding)
7488
                ) {
7489 3
                    $longest_common_suffix = $char . $longest_common_suffix;
7490
                } else {
7491 3
                    break;
7492
                }
7493
            }
7494
        }
7495
7496 8
        return $longest_common_suffix;
7497
    }
7498
7499
    /**
7500
     * Returns true if $str matches the supplied pattern, false otherwise.
7501
     *
7502
     * @param string $str     <p>The input string.</p>
7503
     * @param string $pattern <p>Regex pattern to match against.</p>
7504
     *
7505
     * @psalm-pure
7506
     *
7507
     * @return bool
7508
     *              <p>Whether or not $str matches the pattern.</p>
7509
     */
7510 10
    public static function str_matches_pattern(string $str, string $pattern): bool
7511
    {
7512 10
        return (bool) \preg_match('/' . $pattern . '/u', $str);
7513
    }
7514
7515
    /**
7516
     * Returns whether or not a character exists at an index. Offsets may be
7517
     * negative to count from the last character in the string. Implements
7518
     * part of the ArrayAccess interface.
7519
     *
7520
     * @param string $str      <p>The input string.</p>
7521
     * @param int    $offset   <p>The index to check.</p>
7522
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7523
     *
7524
     * @psalm-pure
7525
     *
7526
     * @return bool
7527
     *              <p>Whether or not the index exists.</p>
7528
     */
7529 6
    public static function str_offset_exists(string $str, int $offset, string $encoding = 'UTF-8'): bool
7530
    {
7531
        // init
7532 6
        $length = (int) self::strlen($str, $encoding);
7533
7534 6
        if ($offset >= 0) {
7535 3
            return $length > $offset;
7536
        }
7537
7538 3
        return $length >= \abs($offset);
7539
    }
7540
7541
    /**
7542
     * Returns the character at the given index. Offsets may be negative to
7543
     * count from the last character in the string. Implements part of the
7544
     * ArrayAccess interface, and throws an OutOfBoundsException if the index
7545
     * does not exist.
7546
     *
7547
     * @param string $str      <p>The input string.</p>
7548
     * @param int    $index    <p>The <strong>index</strong> from which to retrieve the char.</p>
7549
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7550
     *
7551
     * @throws \OutOfBoundsException if the positive or negative offset does not exist
7552
     *
7553
     * @return string
7554
     *                <p>The character at the specified index.</p>
7555
     *
7556
     * @psalm-pure
7557
     */
7558 2
    public static function str_offset_get(string $str, int $index, string $encoding = 'UTF-8'): string
7559
    {
7560
        // init
7561 2
        $length = (int) self::strlen($str);
7562
7563
        if (
7564 2
            ($index >= 0 && $length <= $index)
7565
            ||
7566 2
            $length < \abs($index)
7567
        ) {
7568 1
            throw new \OutOfBoundsException('No character exists at the index');
7569
        }
7570
7571 1
        return self::char_at($str, $index, $encoding);
7572
    }
7573
7574
    /**
7575
     * Pad a UTF-8 string to a given length with another string.
7576
     *
7577
     * EXAMPLE: <code>UTF8::str_pad('中文空白', 10, '_', STR_PAD_BOTH); // '___中文空白___'</code>
7578
     *
7579
     * @param string     $str        <p>The input string.</p>
7580
     * @param int        $pad_length <p>The length of return string.</p>
7581
     * @param string     $pad_string [optional] <p>String to use for padding the input string.</p>
7582
     * @param int|string $pad_type   [optional] <p>
7583
     *                               Can be <strong>STR_PAD_RIGHT</strong> (default), [or string "right"]<br>
7584
     *                               <strong>STR_PAD_LEFT</strong> [or string "left"] or<br>
7585
     *                               <strong>STR_PAD_BOTH</strong> [or string "both"]
7586
     *                               </p>
7587
     * @param string     $encoding   [optional] <p>Default: 'UTF-8'</p>
7588
     *
7589
     * @psalm-pure
7590
     *
7591
     * @return string
7592
     *                <p>Returns the padded string.</p>
7593
     */
7594 41
    public static function str_pad(
7595
        string $str,
7596
        int $pad_length,
7597
        string $pad_string = ' ',
7598
        $pad_type = \STR_PAD_RIGHT,
7599
        string $encoding = 'UTF-8'
7600
    ): string {
7601 41
        if ($pad_length === 0 || $pad_string === '') {
7602 1
            return $str;
7603
        }
7604
7605 41
        if ($pad_type !== (int) $pad_type) {
7606 13
            if ($pad_type === 'left') {
7607 3
                $pad_type = \STR_PAD_LEFT;
7608 10
            } elseif ($pad_type === 'right') {
7609 6
                $pad_type = \STR_PAD_RIGHT;
7610 4
            } elseif ($pad_type === 'both') {
7611 3
                $pad_type = \STR_PAD_BOTH;
7612
            } else {
7613 1
                throw new \InvalidArgumentException(
7614 1
                    'Pad expects $pad_type to be "STR_PAD_*" or ' . "to be one of 'left', 'right' or 'both'"
7615
                );
7616
            }
7617
        }
7618
7619 40
        if ($encoding === 'UTF-8') {
7620 25
            $str_length = (int) \mb_strlen($str);
7621
7622 25
            if ($pad_length >= $str_length) {
7623
                switch ($pad_type) {
7624 25
                    case \STR_PAD_LEFT:
7625 8
                        $ps_length = (int) \mb_strlen($pad_string);
7626
7627 8
                        $diff = ($pad_length - $str_length);
7628
7629 8
                        $pre = (string) \mb_substr(
7630 8
                            \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
7631 8
                            0,
7632 8
                            $diff
7633
                        );
7634 8
                        $post = '';
7635
7636 8
                        break;
7637
7638 20
                    case \STR_PAD_BOTH:
7639 14
                        $diff = ($pad_length - $str_length);
7640
7641 14
                        $ps_length_left = (int) \floor($diff / 2);
7642
7643 14
                        $ps_length_right = (int) \ceil($diff / 2);
7644
7645 14
                        $pre = (string) \mb_substr(
7646 14
                            \str_repeat($pad_string, $ps_length_left),
7647 14
                            0,
7648 14
                            $ps_length_left
7649
                        );
7650 14
                        $post = (string) \mb_substr(
7651 14
                            \str_repeat($pad_string, $ps_length_right),
7652 14
                            0,
7653 14
                            $ps_length_right
7654
                        );
7655
7656 14
                        break;
7657
7658 9
                    case \STR_PAD_RIGHT:
7659
                    default:
7660 9
                        $ps_length = (int) \mb_strlen($pad_string);
7661
7662 9
                        $diff = ($pad_length - $str_length);
7663
7664 9
                        $post = (string) \mb_substr(
7665 9
                            \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
7666 9
                            0,
7667 9
                            $diff
7668
                        );
7669 9
                        $pre = '';
7670
                }
7671
7672 25
                return $pre . $str . $post;
7673
            }
7674
7675 3
            return $str;
7676
        }
7677
7678 15
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
7679
7680 15
        $str_length = (int) self::strlen($str, $encoding);
7681
7682 15
        if ($pad_length >= $str_length) {
7683
            switch ($pad_type) {
7684 14
                case \STR_PAD_LEFT:
7685 5
                    $ps_length = (int) self::strlen($pad_string, $encoding);
7686
7687 5
                    $diff = ($pad_length - $str_length);
7688
7689 5
                    $pre = (string) self::substr(
7690 5
                        \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
7691 5
                        0,
7692 5
                        $diff,
7693 5
                        $encoding
7694
                    );
7695 5
                    $post = '';
7696
7697 5
                    break;
7698
7699 9
                case \STR_PAD_BOTH:
7700 3
                    $diff = ($pad_length - $str_length);
7701
7702 3
                    $ps_length_left = (int) \floor($diff / 2);
7703
7704 3
                    $ps_length_right = (int) \ceil($diff / 2);
7705
7706 3
                    $pre = (string) self::substr(
7707 3
                        \str_repeat($pad_string, $ps_length_left),
7708 3
                        0,
7709 3
                        $ps_length_left,
7710 3
                        $encoding
7711
                    );
7712 3
                    $post = (string) self::substr(
7713 3
                        \str_repeat($pad_string, $ps_length_right),
7714 3
                        0,
7715 3
                        $ps_length_right,
7716 3
                        $encoding
7717
                    );
7718
7719 3
                    break;
7720
7721 6
                case \STR_PAD_RIGHT:
7722
                default:
7723 6
                    $ps_length = (int) self::strlen($pad_string, $encoding);
7724
7725 6
                    $diff = ($pad_length - $str_length);
7726
7727 6
                    $post = (string) self::substr(
7728 6
                        \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
7729 6
                        0,
7730 6
                        $diff,
7731 6
                        $encoding
7732
                    );
7733 6
                    $pre = '';
7734
            }
7735
7736 14
            return $pre . $str . $post;
7737
        }
7738
7739 1
        return $str;
7740
    }
7741
7742
    /**
7743
     * Returns a new string of a given length such that both sides of the
7744
     * string are padded. Alias for "UTF8::str_pad()" with a $pad_type of 'both'.
7745
     *
7746
     * @param string $str
7747
     * @param int    $length   <p>Desired string length after padding.</p>
7748
     * @param string $pad_str  [optional] <p>String used to pad, defaults to space. Default: ' '</p>
7749
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7750
     *
7751
     * @psalm-pure
7752
     *
7753
     * @return string
7754
     *                <p>The string with padding applied.</p>
7755
     */
7756 11
    public static function str_pad_both(
7757
        string $str,
7758
        int $length,
7759
        string $pad_str = ' ',
7760
        string $encoding = 'UTF-8'
7761
    ): string {
7762 11
        return self::str_pad(
7763 11
            $str,
7764 11
            $length,
7765 11
            $pad_str,
7766 11
            \STR_PAD_BOTH,
7767 11
            $encoding
7768
        );
7769
    }
7770
7771
    /**
7772
     * Returns a new string of a given length such that the beginning of the
7773
     * string is padded. Alias for "UTF8::str_pad()" with a $pad_type of 'left'.
7774
     *
7775
     * @param string $str
7776
     * @param int    $length   <p>Desired string length after padding.</p>
7777
     * @param string $pad_str  [optional] <p>String used to pad, defaults to space. Default: ' '</p>
7778
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7779
     *
7780
     * @psalm-pure
7781
     *
7782
     * @return string
7783
     *                <p>The string with left padding.</p>
7784
     */
7785 7
    public static function str_pad_left(
7786
        string $str,
7787
        int $length,
7788
        string $pad_str = ' ',
7789
        string $encoding = 'UTF-8'
7790
    ): string {
7791 7
        return self::str_pad(
7792 7
            $str,
7793 7
            $length,
7794 7
            $pad_str,
7795 7
            \STR_PAD_LEFT,
7796 7
            $encoding
7797
        );
7798
    }
7799
7800
    /**
7801
     * Returns a new string of a given length such that the end of the string
7802
     * is padded. Alias for "UTF8::str_pad()" with a $pad_type of 'right'.
7803
     *
7804
     * @param string $str
7805
     * @param int    $length   <p>Desired string length after padding.</p>
7806
     * @param string $pad_str  [optional] <p>String used to pad, defaults to space. Default: ' '</p>
7807
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7808
     *
7809
     * @psalm-pure
7810
     *
7811
     * @return string
7812
     *                <p>The string with right padding.</p>
7813
     */
7814 7
    public static function str_pad_right(
7815
        string $str,
7816
        int $length,
7817
        string $pad_str = ' ',
7818
        string $encoding = 'UTF-8'
7819
    ): string {
7820 7
        return self::str_pad(
7821 7
            $str,
7822 7
            $length,
7823 7
            $pad_str,
7824 7
            \STR_PAD_RIGHT,
7825 7
            $encoding
7826
        );
7827
    }
7828
7829
    /**
7830
     * Repeat a string.
7831
     *
7832
     * EXAMPLE: <code>UTF8::str_repeat("°~\xf0\x90\x28\xbc", 2); // '°~ð(¼°~ð(¼'</code>
7833
     *
7834
     * @param string $str        <p>
7835
     *                           The string to be repeated.
7836
     *                           </p>
7837
     * @param int    $multiplier <p>
7838
     *                           Number of time the input string should be
7839
     *                           repeated.
7840
     *                           </p>
7841
     *                           <p>
7842
     *                           multiplier has to be greater than or equal to 0.
7843
     *                           If the multiplier is set to 0, the function
7844
     *                           will return an empty string.
7845
     *                           </p>
7846
     *
7847
     * @psalm-pure
7848
     *
7849
     * @return string
7850
     *                <p>The repeated string.</p>
7851
     */
7852 9
    public static function str_repeat(string $str, int $multiplier): string
7853
    {
7854 9
        $str = self::filter($str);
7855
7856 9
        return \str_repeat($str, $multiplier);
7857
    }
7858
7859
    /**
7860
     * INFO: This is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe.
7861
     *
7862
     * Replace all occurrences of the search string with the replacement string
7863
     *
7864
     * @see http://php.net/manual/en/function.str-replace.php
7865
     *
7866
     * @param string|string[] $search  <p>
7867
     *                                 The value being searched for, otherwise known as the needle.
7868
     *                                 An array may be used to designate multiple needles.
7869
     *                                 </p>
7870
     * @param string|string[] $replace <p>
7871
     *                                 The replacement value that replaces found search
7872
     *                                 values. An array may be used to designate multiple replacements.
7873
     *                                 </p>
7874
     * @param string|string[] $subject <p>
7875
     *                                 The string or array of strings being searched and replaced on,
7876
     *                                 otherwise known as the haystack.
7877
     *                                 </p>
7878
     *                                 <p>
7879
     *                                 If subject is an array, then the search and
7880
     *                                 replace is performed with every entry of
7881
     *                                 subject, and the return value is an array as
7882
     *                                 well.
7883
     *                                 </p>
7884
     * @param int|null        $count   [optional] <p>
7885
     *                                 If passed, this will hold the number of matched and replaced needles.
7886
     *                                 </p>
7887
     *
7888
     * @psalm-pure
7889
     *
7890
     * @return string|string[]
7891
     *                         <p>This function returns a string or an array with the replaced values.</p>
7892
     *
7893
     * @template TStrReplaceSubject
7894
     * @psalm-param TStrReplaceSubject $subject
7895
     * @psalm-return TStrReplaceSubject
7896
     *
7897
     * @deprecated please use \str_replace() instead
7898
     */
7899 12
    public static function str_replace(
7900
        $search,
7901
        $replace,
7902
        $subject,
7903
        int &$count = null
7904
    ) {
7905
        /**
7906
         * @psalm-suppress PossiblyNullArgument
7907
         * @psalm-var TStrReplaceSubject $return;
7908
         */
7909 12
        $return = \str_replace(
7910 12
            $search,
7911 12
            $replace,
7912 12
            $subject,
7913 12
            $count
7914
        );
7915
7916 12
        return $return;
7917
    }
7918
7919
    /**
7920
     * Replaces $search from the beginning of string with $replacement.
7921
     *
7922
     * @param string $str         <p>The input string.</p>
7923
     * @param string $search      <p>The string to search for.</p>
7924
     * @param string $replacement <p>The replacement.</p>
7925
     *
7926
     * @psalm-pure
7927
     *
7928
     * @return string
7929
     *                <p>A string after the replacements.</p>
7930
     */
7931 17
    public static function str_replace_beginning(
7932
        string $str,
7933
        string $search,
7934
        string $replacement
7935
    ): string {
7936 17
        if ($str === '') {
7937 4
            if ($replacement === '') {
7938 2
                return '';
7939
            }
7940
7941 2
            if ($search === '') {
7942 2
                return $replacement;
7943
            }
7944
        }
7945
7946 13
        if ($search === '') {
7947 2
            return $str . $replacement;
7948
        }
7949
7950 11
        $searchLength = \strlen($search);
7951 11
        if (\strncmp($str, $search, $searchLength) === 0) {
7952 9
            return $replacement . \substr($str, $searchLength);
7953
        }
7954
7955 2
        return $str;
7956
    }
7957
7958
    /**
7959
     * Replaces $search from the ending of string with $replacement.
7960
     *
7961
     * @param string $str         <p>The input string.</p>
7962
     * @param string $search      <p>The string to search for.</p>
7963
     * @param string $replacement <p>The replacement.</p>
7964
     *
7965
     * @psalm-pure
7966
     *
7967
     * @return string
7968
     *                <p>A string after the replacements.</p>
7969
     */
7970 17
    public static function str_replace_ending(
7971
        string $str,
7972
        string $search,
7973
        string $replacement
7974
    ): string {
7975 17
        if ($str === '') {
7976 4
            if ($replacement === '') {
7977 2
                return '';
7978
            }
7979
7980 2
            if ($search === '') {
7981 2
                return $replacement;
7982
            }
7983
        }
7984
7985 13
        if ($search === '') {
7986 2
            return $str . $replacement;
7987
        }
7988
7989 11
        if (\strpos($str, $search, \strlen($str) - \strlen($search)) !== false) {
7990 8
            $str = \substr($str, 0, -\strlen($search)) . $replacement;
7991
        }
7992
7993 11
        return $str;
7994
    }
7995
7996
    /**
7997
     * Replace the first "$search"-term with the "$replace"-term.
7998
     *
7999
     * @param string $search
8000
     * @param string $replace
8001
     * @param string $subject
8002
     *
8003
     * @psalm-pure
8004
     *
8005
     * @return string
8006
     *
8007
     * @psalm-suppress InvalidReturnType
8008
     */
8009 2
    public static function str_replace_first(
8010
        string $search,
8011
        string $replace,
8012
        string $subject
8013
    ): string {
8014 2
        $pos = self::strpos($subject, $search);
8015
8016 2
        if ($pos !== false) {
8017
            /**
8018
             * @psalm-suppress InvalidReturnStatement
8019
             */
8020 2
            return self::substr_replace(
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr_repl...)self::strlen($search)) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
8021 2
                $subject,
8022 2
                $replace,
8023 2
                $pos,
8024 2
                (int) self::strlen($search)
8025
            );
8026
        }
8027
8028 2
        return $subject;
8029
    }
8030
8031
    /**
8032
     * Replace the last "$search"-term with the "$replace"-term.
8033
     *
8034
     * @param string $search
8035
     * @param string $replace
8036
     * @param string $subject
8037
     *
8038
     * @psalm-pure
8039
     *
8040
     * @return string
8041
     *
8042
     * @psalm-suppress InvalidReturnType
8043
     */
8044 2
    public static function str_replace_last(
8045
        string $search,
8046
        string $replace,
8047
        string $subject
8048
    ): string {
8049 2
        $pos = self::strrpos($subject, $search);
8050 2
        if ($pos !== false) {
8051
            /**
8052
             * @psalm-suppress InvalidReturnStatement
8053
             */
8054 2
            return self::substr_replace(
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr_repl...)self::strlen($search)) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
8055 2
                $subject,
8056 2
                $replace,
8057 2
                $pos,
8058 2
                (int) self::strlen($search)
8059
            );
8060
        }
8061
8062 2
        return $subject;
8063
    }
8064
8065
    /**
8066
     * Shuffles all the characters in the string.
8067
     *
8068
     * INFO: uses random algorithm which is weak for cryptography purposes
8069
     *
8070
     * EXAMPLE: <code>UTF8::str_shuffle('fòô bàř fòô'); // 'àòôřb ffòô '</code>
8071
     *
8072
     * @param string $str      <p>The input string</p>
8073
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8074
     *
8075
     * @return string
8076
     *                <p>The shuffled string.</p>
8077
     */
8078 5
    public static function str_shuffle(string $str, string $encoding = 'UTF-8'): string
8079
    {
8080 5
        if ($encoding === 'UTF-8') {
8081 5
            $indexes = \range(0, (int) \mb_strlen($str) - 1);
8082
            /** @noinspection NonSecureShuffleUsageInspection */
8083 5
            \shuffle($indexes);
8084
8085
            // init
8086 5
            $shuffled_str = '';
8087
8088 5
            foreach ($indexes as &$i) {
8089 5
                $tmp_sub_str = \mb_substr($str, $i, 1);
8090 5
                if ($tmp_sub_str !== false) {
8091 5
                    $shuffled_str .= $tmp_sub_str;
8092
                }
8093
            }
8094
        } else {
8095
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8096
8097
            $indexes = \range(0, (int) self::strlen($str, $encoding) - 1);
8098
            /** @noinspection NonSecureShuffleUsageInspection */
8099
            \shuffle($indexes);
8100
8101
            // init
8102
            $shuffled_str = '';
8103
8104
            foreach ($indexes as &$i) {
8105
                $tmp_sub_str = self::substr($str, $i, 1, $encoding);
8106
                if ($tmp_sub_str !== false) {
8107
                    $shuffled_str .= $tmp_sub_str;
8108
                }
8109
            }
8110
        }
8111
8112 5
        return $shuffled_str;
8113
    }
8114
8115
    /**
8116
     * Returns the substring beginning at $start, and up to, but not including
8117
     * the index specified by $end. If $end is omitted, the function extracts
8118
     * the remaining string. If $end is negative, it is computed from the end
8119
     * of the string.
8120
     *
8121
     * @param string   $str
8122
     * @param int      $start    <p>Initial index from which to begin extraction.</p>
8123
     * @param int|null $end      [optional] <p>Index at which to end extraction. Default: null</p>
8124
     * @param string   $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8125
     *
8126
     * @psalm-pure
8127
     *
8128
     * @return false|string
8129
     *                      <p>The extracted substring.</p><p>If <i>str</i> is shorter than <i>start</i>
8130
     *                      characters long, <b>FALSE</b> will be returned.
8131
     */
8132 18
    public static function str_slice(
8133
        string $str,
8134
        int $start,
8135
        int $end = null,
8136
        string $encoding = 'UTF-8'
8137
    ) {
8138 18
        if ($encoding === 'UTF-8') {
8139 7
            if ($end === null) {
8140 1
                $length = (int) \mb_strlen($str);
8141 6
            } elseif ($end >= 0 && $end <= $start) {
8142 2
                return '';
8143 4
            } elseif ($end < 0) {
8144 1
                $length = (int) \mb_strlen($str) + $end - $start;
8145
            } else {
8146 3
                $length = $end - $start;
8147
            }
8148
8149 5
            return \mb_substr($str, $start, $length);
8150
        }
8151
8152 11
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
8153
8154 11
        if ($end === null) {
8155 5
            $length = (int) self::strlen($str, $encoding);
8156 6
        } elseif ($end >= 0 && $end <= $start) {
8157 2
            return '';
8158 4
        } elseif ($end < 0) {
8159 1
            $length = (int) self::strlen($str, $encoding) + $end - $start;
8160
        } else {
8161 3
            $length = $end - $start;
8162
        }
8163
8164 9
        return self::substr($str, $start, $length, $encoding);
8165
    }
8166
8167
    /**
8168
     * Convert a string to e.g.: "snake_case"
8169
     *
8170
     * @param string $str
8171
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8172
     *
8173
     * @psalm-pure
8174
     *
8175
     * @return string
8176
     *                <p>A string in snake_case.</p>
8177
     */
8178 22
    public static function str_snakeize(string $str, string $encoding = 'UTF-8'): string
8179
    {
8180 22
        if ($str === '') {
8181
            return '';
8182
        }
8183
8184 22
        $str = \str_replace(
8185 22
            '-',
8186 22
            '_',
8187 22
            self::normalize_whitespace($str)
8188
        );
8189
8190 22
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8191 19
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8192
        }
8193
8194 22
        $str = (string) \preg_replace_callback(
8195 22
            '/([\\p{N}|\\p{Lu}])/u',
8196
            /**
8197
             * @param string[] $matches
8198
             *
8199
             * @psalm-pure
8200
             *
8201
             * @return string
8202
             */
8203
            static function (array $matches) use ($encoding): string {
8204 9
                $match = $matches[1];
8205 9
                $match_int = (int) $match;
8206
8207 9
                if ((string) $match_int === $match) {
8208 4
                    return '_' . $match . '_';
8209
                }
8210
8211 5
                if ($encoding === 'UTF-8') {
8212 5
                    return '_' . \mb_strtolower($match);
8213
                }
8214
8215
                return '_' . self::strtolower($match, $encoding);
8216 22
            },
8217 22
            $str
8218
        );
8219
8220 22
        $str = (string) \preg_replace(
8221
            [
8222 22
                '/\\s+/u',           // convert spaces to "_"
8223
                '/^\\s+|\\s+$/u', // trim leading & trailing spaces
8224
                '/_+/',                 // remove double "_"
8225
            ],
8226
            [
8227 22
                '_',
8228
                '',
8229
                '_',
8230
            ],
8231 22
            $str
8232
        );
8233
8234 22
        return \trim(\trim($str, '_')); // trim leading & trailing "_" + whitespace
8235
    }
8236
8237
    /**
8238
     * Sort all characters according to code points.
8239
     *
8240
     * EXAMPLE: <code>UTF8::str_sort('  -ABC-中文空白-  '); // '    ---ABC中文白空'</code>
8241
     *
8242
     * @param string $str    <p>A UTF-8 string.</p>
8243
     * @param bool   $unique <p>Sort unique. If <strong>true</strong>, repeated characters are ignored.</p>
8244
     * @param bool   $desc   <p>If <strong>true</strong>, will sort characters in reverse code point order.</p>
8245
     *
8246
     * @psalm-pure
8247
     *
8248
     * @return string
8249
     *                <p>A string of sorted characters.</p>
8250
     */
8251 2
    public static function str_sort(string $str, bool $unique = false, bool $desc = false): string
8252
    {
8253 2
        $array = self::codepoints($str);
8254
8255 2
        if ($unique) {
8256 2
            $array = \array_flip(\array_flip($array));
8257
        }
8258
8259 2
        if ($desc) {
8260 2
            \arsort($array);
0 ignored issues
show
Bug introduced by
It seems like $array can also be of type null; however, parameter $array of arsort() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8260
            \arsort(/** @scrutinizer ignore-type */ $array);
Loading history...
8261
        } else {
8262 2
            \asort($array);
0 ignored issues
show
Bug introduced by
It seems like $array can also be of type null; however, parameter $array of asort() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8262
            \asort(/** @scrutinizer ignore-type */ $array);
Loading history...
8263
        }
8264
8265 2
        return self::string($array);
8266
    }
8267
8268
    /**
8269
     * Convert a string to an array of Unicode characters.
8270
     *
8271
     * EXAMPLE: <code>
8272
     * UTF8::str_split_array(['中文空白', 'test'], 2); // [['中文', '空白'], ['te', 'st']]
8273
     * </code>
8274
     *
8275
     * @param int[]|string[] $input                   <p>The string[] or int[] to split into array.</p>
8276
     * @param int            $length                  [optional] <p>Max character length of each array
8277
     *                                                lement.</p>
8278
     * @param bool           $clean_utf8              [optional] <p>Remove non UTF-8 chars from the
8279
     *                                                string.</p>
8280
     * @param bool           $try_to_use_mb_functions [optional] <p>Set to false, if you don't want to use
8281
     *                                                "mb_substr"</p>
8282
     *
8283
     * @psalm-pure
8284
     *
8285
     * @return string[][]
8286
     *                    <p>An array containing chunks of the input.</p>
8287
     */
8288 1
    public static function str_split_array(
8289
        array $input,
8290
        int $length = 1,
8291
        bool $clean_utf8 = false,
8292
        bool $try_to_use_mb_functions = true
8293
    ): array {
8294 1
        foreach ($input as $k => &$v) {
8295 1
            $v = self::str_split(
8296 1
                $v,
8297 1
                $length,
8298 1
                $clean_utf8,
8299 1
                $try_to_use_mb_functions
8300
            );
8301
        }
8302
8303
        /** @var string[][] $input */
8304 1
        return $input;
8305
    }
8306
8307
    /**
8308
     * Convert a string to an array of unicode characters.
8309
     *
8310
     * EXAMPLE: <code>UTF8::str_split('中文空白'); // array('中', '文', '空', '白')</code>
8311
     *
8312
     * @param int|string $input                   <p>The string or int to split into array.</p>
8313
     * @param int        $length                  [optional] <p>Max character length of each array
8314
     *                                            element.</p>
8315
     * @param bool       $clean_utf8              [optional] <p>Remove non UTF-8 chars from the
8316
     *                                            string.</p>
8317
     * @param bool       $try_to_use_mb_functions [optional] <p>Set to false, if you don't want to use
8318
     *                                            "mb_substr"</p>
8319
     *
8320
     * @psalm-pure
8321
     *
8322
     * @return string[]
8323
     *                  <p>An array containing chunks of chars from the input.</p>
8324
     *
8325
     * @noinspection SuspiciousBinaryOperationInspection
8326
     * @noinspection OffsetOperationsInspection
8327
     */
8328 90
    public static function str_split(
8329
        $input,
8330
        int $length = 1,
8331
        bool $clean_utf8 = false,
8332
        bool $try_to_use_mb_functions = true
8333
    ): array {
8334 90
        if ($length <= 0) {
8335 3
            return [];
8336
        }
8337
8338
        // this is only an old fallback
8339
        /** @noinspection PhpSillyAssignmentInspection - hack for phpstan */
8340
        /** @var int|int[]|string|string[] $input */
8341 89
        $input = $input;
8342 89
        if (\is_array($input)) {
8343
            /**
8344
             * @psalm-suppress InvalidReturnStatement
8345
             */
8346
            return self::str_split_array(
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::str_split_a...ry_to_use_mb_functions) returns the type array<mixed,string[]> which is incompatible with the documented return type string[].
Loading history...
8347
                $input,
8348
                $length,
8349
                $clean_utf8,
8350
                $try_to_use_mb_functions
8351
            );
8352
        }
8353
8354
        // init
8355 89
        $input = (string) $input;
8356
8357 89
        if ($input === '') {
8358 14
            return [];
8359
        }
8360
8361 86
        if ($clean_utf8) {
8362 19
            $input = self::clean($input);
8363
        }
8364
8365
        if (
8366 86
            $try_to_use_mb_functions
8367
            &&
8368 86
            self::$SUPPORT['mbstring'] === true
8369
        ) {
8370 82
            if (\function_exists('mb_str_split')) {
8371
                /**
8372
                 * @psalm-suppress ImpureFunctionCall - why?
8373
                 */
8374 82
                $return = \mb_str_split($input, $length);
8375 82
                if ($return !== false) {
8376 82
                    return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
8377
                }
8378
            }
8379
8380
            $i_max = \mb_strlen($input);
8381
            if ($i_max <= 127) {
8382
                $ret = [];
8383
                for ($i = 0; $i < $i_max; ++$i) {
8384
                    $ret[] = \mb_substr($input, $i, 1);
8385
                }
8386
            } else {
8387
                $return_array = [];
8388
                \preg_match_all('/./us', $input, $return_array);
8389
                $ret = $return_array[0] ?? [];
8390
            }
8391 23
        } elseif (self::$SUPPORT['pcre_utf8'] === true) {
8392 17
            $return_array = [];
8393 17
            \preg_match_all('/./us', $input, $return_array);
8394 17
            $ret = $return_array[0] ?? [];
8395
        } else {
8396
8397
            // fallback
8398
8399 8
            $ret = [];
8400 8
            $len = \strlen($input);
8401
8402
            /** @noinspection ForeachInvariantsInspection */
8403 8
            for ($i = 0; $i < $len; ++$i) {
8404 8
                if (($input[$i] & "\x80") === "\x00") {
8405 8
                    $ret[] = $input[$i];
8406
                } elseif (
8407 8
                    isset($input[$i + 1])
8408
                    &&
8409 8
                    ($input[$i] & "\xE0") === "\xC0"
8410
                ) {
8411 4
                    if (($input[$i + 1] & "\xC0") === "\x80") {
8412 4
                        $ret[] = $input[$i] . $input[$i + 1];
8413
8414 4
                        ++$i;
8415
                    }
8416
                } elseif (
8417 6
                    isset($input[$i + 2])
8418
                    &&
8419 6
                    ($input[$i] & "\xF0") === "\xE0"
8420
                ) {
8421
                    if (
8422 6
                        ($input[$i + 1] & "\xC0") === "\x80"
8423
                        &&
8424 6
                        ($input[$i + 2] & "\xC0") === "\x80"
8425
                    ) {
8426 6
                        $ret[] = $input[$i] . $input[$i + 1] . $input[$i + 2];
8427
8428 6
                        $i += 2;
8429
                    }
8430
                } elseif (
8431
                    isset($input[$i + 3])
8432
                    &&
8433
                    ($input[$i] & "\xF8") === "\xF0"
8434
                ) {
8435
                    if (
8436
                        ($input[$i + 1] & "\xC0") === "\x80"
8437
                        &&
8438
                        ($input[$i + 2] & "\xC0") === "\x80"
8439
                        &&
8440
                        ($input[$i + 3] & "\xC0") === "\x80"
8441
                    ) {
8442
                        $ret[] = $input[$i] . $input[$i + 1] . $input[$i + 2] . $input[$i + 3];
8443
8444
                        $i += 3;
8445
                    }
8446
                }
8447
            }
8448
        }
8449
8450 23
        if ($length > 1) {
8451 2
            $ret = \array_chunk($ret, $length);
8452
8453 2
            return \array_map(
8454
                static function (array &$item): string {
8455 2
                    return \implode('', $item);
8456 2
                },
8457 2
                $ret
8458
            );
8459
        }
8460
8461 23
        if (isset($ret[0]) && $ret[0] === '') {
8462
            return [];
8463
        }
8464
8465 23
        return $ret;
8466
    }
8467
8468
    /**
8469
     * Splits the string with the provided regular expression, returning an
8470
     * array of strings. An optional integer $limit will truncate the
8471
     * results.
8472
     *
8473
     * @param string $str
8474
     * @param string $pattern <p>The regex with which to split the string.</p>
8475
     * @param int    $limit   [optional] <p>Maximum number of results to return. Default: -1 === no limit</p>
8476
     *
8477
     * @psalm-pure
8478
     *
8479
     * @return string[]
8480
     *                  <p>An array of strings.</p>
8481
     */
8482 16
    public static function str_split_pattern(string $str, string $pattern, int $limit = -1): array
8483
    {
8484 16
        if ($limit === 0) {
8485 2
            return [];
8486
        }
8487
8488 14
        if ($pattern === '') {
8489 1
            return [$str];
8490
        }
8491
8492 13
        if (self::$SUPPORT['mbstring'] === true) {
8493 13
            if ($limit >= 0) {
8494
                /** @noinspection PhpComposerExtensionStubsInspection */
8495 8
                $result_tmp = \mb_split($pattern, $str);
8496
8497 8
                $result = [];
8498 8
                foreach ($result_tmp as $item_tmp) {
8499 8
                    if ($limit === 0) {
8500 4
                        break;
8501
                    }
8502 8
                    --$limit;
8503
8504 8
                    $result[] = $item_tmp;
8505
                }
8506
8507 8
                return $result;
8508
            }
8509
8510
            /** @noinspection PhpComposerExtensionStubsInspection */
8511 5
            return \mb_split($pattern, $str);
8512
        }
8513
8514
        if ($limit > 0) {
8515
            ++$limit;
8516
        } else {
8517
            $limit = -1;
8518
        }
8519
8520
        $array = \preg_split('/' . \preg_quote($pattern, '/') . '/u', $str, $limit);
8521
8522
        if ($array === false) {
8523
            return [];
8524
        }
8525
8526
        if ($limit > 0 && \count($array) === $limit) {
8527
            \array_pop($array);
8528
        }
8529
8530
        return $array;
8531
    }
8532
8533
    /**
8534
     * Check if the string starts with the given substring.
8535
     *
8536
     * EXAMPLE: <code>
8537
     * UTF8::str_starts_with('ΚόσμεMiddleEnd', 'Κόσμε'); // true
8538
     * UTF8::str_starts_with('ΚόσμεMiddleEnd', 'κόσμε'); // false
8539
     * </code>
8540
     *
8541
     * @param string $haystack <p>The string to search in.</p>
8542
     * @param string $needle   <p>The substring to search for.</p>
8543
     *
8544
     * @psalm-pure
8545
     *
8546
     * @return bool
8547
     */
8548 19
    public static function str_starts_with(string $haystack, string $needle): bool
8549
    {
8550 19
        if ($needle === '') {
8551 2
            return true;
8552
        }
8553
8554 19
        if ($haystack === '') {
8555
            return false;
8556
        }
8557
8558 19
        return \strncmp($haystack, $needle, \strlen($needle)) === 0;
8559
    }
8560
8561
    /**
8562
     * Returns true if the string begins with any of $substrings, false otherwise.
8563
     *
8564
     * - case-sensitive
8565
     *
8566
     * @param string $str        <p>The input string.</p>
8567
     * @param array  $substrings <p>Substrings to look for.</p>
8568
     *
8569
     * @psalm-pure
8570
     *
8571
     * @return bool
8572
     *              <p>Whether or not $str starts with $substring.</p>
8573
     */
8574 8
    public static function str_starts_with_any(string $str, array $substrings): bool
8575
    {
8576 8
        if ($str === '') {
8577
            return false;
8578
        }
8579
8580 8
        if ($substrings === []) {
8581
            return false;
8582
        }
8583
8584 8
        foreach ($substrings as &$substring) {
8585 8
            if (self::str_starts_with($str, $substring)) {
8586 8
                return true;
8587
            }
8588
        }
8589
8590 6
        return false;
8591
    }
8592
8593
    /**
8594
     * Gets the substring after the first occurrence of a separator.
8595
     *
8596
     * @param string $str       <p>The input string.</p>
8597
     * @param string $separator <p>The string separator.</p>
8598
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
8599
     *
8600
     * @psalm-pure
8601
     *
8602
     * @return string
8603
     */
8604 1
    public static function str_substr_after_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
8605
    {
8606 1
        if ($separator === '' || $str === '') {
8607 1
            return '';
8608
        }
8609
8610 1
        if ($encoding === 'UTF-8') {
8611 1
            $offset = \mb_strpos($str, $separator);
8612 1
            if ($offset === false) {
8613 1
                return '';
8614
            }
8615
8616 1
            return (string) \mb_substr(
8617 1
                $str,
8618 1
                $offset + (int) \mb_strlen($separator)
8619
            );
8620
        }
8621
8622
        $offset = self::strpos($str, $separator, 0, $encoding);
8623
        if ($offset === false) {
8624
            return '';
8625
        }
8626
8627
        return (string) \mb_substr(
8628
            $str,
8629
            $offset + (int) self::strlen($separator, $encoding),
8630
            null,
8631
            $encoding
8632
        );
8633
    }
8634
8635
    /**
8636
     * Gets the substring after the last occurrence of a separator.
8637
     *
8638
     * @param string $str       <p>The input string.</p>
8639
     * @param string $separator <p>The string separator.</p>
8640
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
8641
     *
8642
     * @psalm-pure
8643
     *
8644
     * @return string
8645
     */
8646 1
    public static function str_substr_after_last_separator(
8647
        string $str,
8648
        string $separator,
8649
        string $encoding = 'UTF-8'
8650
    ): string {
8651 1
        if ($separator === '' || $str === '') {
8652 1
            return '';
8653
        }
8654
8655 1
        if ($encoding === 'UTF-8') {
8656 1
            $offset = \mb_strrpos($str, $separator);
8657 1
            if ($offset === false) {
8658 1
                return '';
8659
            }
8660
8661 1
            return (string) \mb_substr(
8662 1
                $str,
8663 1
                $offset + (int) \mb_strlen($separator)
8664
            );
8665
        }
8666
8667
        $offset = self::strrpos($str, $separator, 0, $encoding);
8668
        if ($offset === false) {
8669
            return '';
8670
        }
8671
8672
        return (string) self::substr(
8673
            $str,
8674
            $offset + (int) self::strlen($separator, $encoding),
8675
            null,
8676
            $encoding
8677
        );
8678
    }
8679
8680
    /**
8681
     * Gets the substring before the first occurrence of a separator.
8682
     *
8683
     * @param string $str       <p>The input string.</p>
8684
     * @param string $separator <p>The string separator.</p>
8685
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
8686
     *
8687
     * @psalm-pure
8688
     *
8689
     * @return string
8690
     */
8691 1
    public static function str_substr_before_first_separator(
8692
        string $str,
8693
        string $separator,
8694
        string $encoding = 'UTF-8'
8695
    ): string {
8696 1
        if ($separator === '' || $str === '') {
8697 1
            return '';
8698
        }
8699
8700 1
        if ($encoding === 'UTF-8') {
8701 1
            $offset = \mb_strpos($str, $separator);
8702 1
            if ($offset === false) {
8703 1
                return '';
8704
            }
8705
8706 1
            return (string) \mb_substr(
8707 1
                $str,
8708 1
                0,
8709 1
                $offset
8710
            );
8711
        }
8712
8713
        $offset = self::strpos($str, $separator, 0, $encoding);
8714
        if ($offset === false) {
8715
            return '';
8716
        }
8717
8718
        return (string) self::substr(
8719
            $str,
8720
            0,
8721
            $offset,
8722
            $encoding
8723
        );
8724
    }
8725
8726
    /**
8727
     * Gets the substring before the last occurrence of a separator.
8728
     *
8729
     * @param string $str       <p>The input string.</p>
8730
     * @param string $separator <p>The string separator.</p>
8731
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
8732
     *
8733
     * @psalm-pure
8734
     *
8735
     * @return string
8736
     */
8737 1
    public static function str_substr_before_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
8738
    {
8739 1
        if ($separator === '' || $str === '') {
8740 1
            return '';
8741
        }
8742
8743 1
        if ($encoding === 'UTF-8') {
8744 1
            $offset = \mb_strrpos($str, $separator);
8745 1
            if ($offset === false) {
8746 1
                return '';
8747
            }
8748
8749 1
            return (string) \mb_substr(
8750 1
                $str,
8751 1
                0,
8752 1
                $offset
8753
            );
8754
        }
8755
8756
        $offset = self::strrpos($str, $separator, 0, $encoding);
8757
        if ($offset === false) {
8758
            return '';
8759
        }
8760
8761
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
8762
8763
        return (string) self::substr(
8764
            $str,
8765
            0,
8766
            $offset,
8767
            $encoding
8768
        );
8769
    }
8770
8771
    /**
8772
     * Gets the substring after (or before via "$before_needle") the first occurrence of the "$needle".
8773
     *
8774
     * @param string $str           <p>The input string.</p>
8775
     * @param string $needle        <p>The string to look for.</p>
8776
     * @param bool   $before_needle [optional] <p>Default: false</p>
8777
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
8778
     *
8779
     * @psalm-pure
8780
     *
8781
     * @return string
8782
     */
8783 2
    public static function str_substr_first(
8784
        string $str,
8785
        string $needle,
8786
        bool $before_needle = false,
8787
        string $encoding = 'UTF-8'
8788
    ): string {
8789 2
        if ($str === '' || $needle === '') {
8790 2
            return '';
8791
        }
8792
8793 2
        if ($encoding === 'UTF-8') {
8794 2
            if ($before_needle) {
8795 1
                $part = \mb_strstr(
8796 1
                    $str,
8797 1
                    $needle,
8798 1
                    $before_needle
8799
                );
8800
            } else {
8801 1
                $part = \mb_strstr(
8802 1
                    $str,
8803 2
                    $needle
8804
                );
8805
            }
8806
        } else {
8807
            $part = self::strstr(
8808
                $str,
8809
                $needle,
8810
                $before_needle,
8811
                $encoding
8812
            );
8813
        }
8814
8815 2
        return $part === false ? '' : $part;
8816
    }
8817
8818
    /**
8819
     * Gets the substring after (or before via "$before_needle") the last occurrence of the "$needle".
8820
     *
8821
     * @param string $str           <p>The input string.</p>
8822
     * @param string $needle        <p>The string to look for.</p>
8823
     * @param bool   $before_needle [optional] <p>Default: false</p>
8824
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
8825
     *
8826
     * @psalm-pure
8827
     *
8828
     * @return string
8829
     */
8830 2
    public static function str_substr_last(
8831
        string $str,
8832
        string $needle,
8833
        bool $before_needle = false,
8834
        string $encoding = 'UTF-8'
8835
    ): string {
8836 2
        if ($str === '' || $needle === '') {
8837 2
            return '';
8838
        }
8839
8840 2
        if ($encoding === 'UTF-8') {
8841 2
            if ($before_needle) {
8842 1
                $part = \mb_strrchr(
8843 1
                    $str,
8844 1
                    $needle,
8845 1
                    $before_needle
8846
                );
8847
            } else {
8848 1
                $part = \mb_strrchr(
8849 1
                    $str,
8850 2
                    $needle
8851
                );
8852
            }
8853
        } else {
8854
            $part = self::strrchr(
8855
                $str,
8856
                $needle,
8857
                $before_needle,
8858
                $encoding
8859
            );
8860
        }
8861
8862 2
        return $part === false ? '' : $part;
8863
    }
8864
8865
    /**
8866
     * Surrounds $str with the given substring.
8867
     *
8868
     * @param string $str
8869
     * @param string $substring <p>The substring to add to both sides.</p>
8870
     *
8871
     * @psalm-pure
8872
     *
8873
     * @return string
8874
     *                <p>A string with the substring both prepended and appended.</p>
8875
     */
8876 5
    public static function str_surround(string $str, string $substring): string
8877
    {
8878 5
        return $substring . $str . $substring;
8879
    }
8880
8881
    /**
8882
     * Returns a trimmed string with the first letter of each word capitalized.
8883
     * Also accepts an array, $ignore, allowing you to list words not to be
8884
     * capitalized.
8885
     *
8886
     * @param string              $str
8887
     * @param array|string[]|null $ignore                        [optional] <p>An array of words not to capitalize or
8888
     *                                                           null. Default: null</p>
8889
     * @param string              $encoding                      [optional] <p>Default: 'UTF-8'</p>
8890
     * @param bool                $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the
8891
     *                                                           string.</p>
8892
     * @param string|null         $lang                          [optional] <p>Set the language for special cases: az,
8893
     *                                                           el, lt, tr</p>
8894
     * @param bool                $try_to_keep_the_string_length [optional] <p>true === try to keep the string length:
8895
     *                                                           e.g. ẞ -> ß</p>
8896
     * @param bool                $use_trim_first                [optional] <p>true === trim the input string,
8897
     *                                                           first</p>
8898
     * @param string|null         $word_define_chars             [optional] <p>An string of chars that will be used as
8899
     *                                                           whitespace separator === words.</p>
8900
     *
8901
     * @psalm-pure
8902
     *
8903
     * @return string
8904
     *                <p>The titleized string.</p>
8905
     *
8906
     * @noinspection PhpTooManyParametersInspection
8907
     */
8908 10
    public static function str_titleize(
8909
        string $str,
8910
        array $ignore = null,
8911
        string $encoding = 'UTF-8',
8912
        bool $clean_utf8 = false,
8913
        string $lang = null,
8914
        bool $try_to_keep_the_string_length = false,
8915
        bool $use_trim_first = true,
8916
        string $word_define_chars = null
8917
    ): string {
8918 10
        if ($str === '') {
8919
            return '';
8920
        }
8921
8922 10
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8923 9
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8924
        }
8925
8926 10
        if ($use_trim_first) {
8927 10
            $str = \trim($str);
8928
        }
8929
8930 10
        if ($clean_utf8) {
8931
            $str = self::clean($str);
8932
        }
8933
8934 10
        $use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
8935
8936 10
        if ($word_define_chars) {
8937 4
            $word_define_chars = \preg_quote($word_define_chars, '/');
8938
        } else {
8939 6
            $word_define_chars = '';
8940
        }
8941
8942 10
        $str = (string) \preg_replace_callback(
8943 10
            '/([^\\s' . $word_define_chars . ']+)/u',
8944
            static function (array $match) use ($try_to_keep_the_string_length, $lang, $ignore, $use_mb_functions, $encoding): string {
8945 10
                if ($ignore !== null && \in_array($match[0], $ignore, true)) {
8946 4
                    return $match[0];
8947
                }
8948
8949 10
                if ($use_mb_functions) {
8950 10
                    if ($encoding === 'UTF-8') {
8951 10
                        return \mb_strtoupper(\mb_substr($match[0], 0, 1))
8952 10
                               . \mb_strtolower(\mb_substr($match[0], 1));
8953
                    }
8954
8955
                    return \mb_strtoupper(\mb_substr($match[0], 0, 1, $encoding), $encoding)
8956
                           . \mb_strtolower(\mb_substr($match[0], 1, null, $encoding), $encoding);
8957
                }
8958
8959
                return self::ucfirst(
8960
                    self::strtolower(
8961
                        $match[0],
8962
                        $encoding,
8963
                        false,
8964
                        $lang,
8965
                        $try_to_keep_the_string_length
8966
                    ),
8967
                    $encoding,
8968
                    false,
8969
                    $lang,
8970
                    $try_to_keep_the_string_length
8971
                );
8972 10
            },
8973 10
            $str
8974
        );
8975
8976 10
        return $str;
8977
    }
8978
8979
    /**
8980
     * Convert a string into a obfuscate string.
8981
     *
8982
     * EXAMPLE: <code>
8983
     *
8984
     * UTF8::str_obfuscate('[email protected]', 0.5, '*', ['@', '.']); // e.g. "l***@m**lleke*.*r*"
8985
     * </code>
8986
     *
8987
     * @param string   $str
8988
     * @param float    $percent
8989
     * @param string   $obfuscateChar
8990
     * @param string[] $keepChars
8991
     *
8992
     * @psalm-pure
8993
     *
8994
     * @return string
8995
     *                <p>The obfuscate string.</p>
8996
     */
8997 1
    public static function str_obfuscate(
8998
        string $str,
8999
        float $percent = 0.5,
9000
        string $obfuscateChar = '*',
9001
        array $keepChars = []
9002
    ): string {
9003 1
        $obfuscateCharHelper = "\u{2603}";
9004 1
        $str = \str_replace($obfuscateChar, $obfuscateCharHelper, $str);
9005
9006 1
        $chars = self::chars($str);
9007 1
        $charsMax = \count($chars);
9008 1
        $charsMaxChange = \round($charsMax * $percent);
9009 1
        $charsCounter = 0;
9010 1
        $charKeyDone = [];
9011
9012 1
        while ($charsCounter < $charsMaxChange) {
9013 1
            foreach ($chars as $charKey => $char) {
9014 1
                if (isset($charKeyDone[$charKey])) {
9015 1
                    continue;
9016
                }
9017
9018 1
                if (\random_int(0, 100) > 50) {
9019 1
                    continue;
9020
                }
9021
9022 1
                if ($char === $obfuscateChar) {
9023
                    continue;
9024
                }
9025
9026 1
                ++$charsCounter;
9027 1
                $charKeyDone[$charKey] = true;
9028
9029 1
                if ($charsCounter > $charsMaxChange) {
9030
                    break;
9031
                }
9032
9033 1
                if (\in_array($char, $keepChars, true)) {
9034 1
                    continue;
9035
                }
9036
9037 1
                $chars[$charKey] = $obfuscateChar;
9038
            }
9039
        }
9040
9041 1
        $str = \implode('', $chars);
9042
9043 1
        return \str_replace($obfuscateCharHelper, $obfuscateChar, $str);
9044
    }
9045
9046
    /**
9047
     * Returns a trimmed string in proper title case.
9048
     *
9049
     * Also accepts an array, $ignore, allowing you to list words not to be
9050
     * capitalized.
9051
     *
9052
     * Adapted from John Gruber's script.
9053
     *
9054
     * @see https://gist.github.com/gruber/9f9e8650d68b13ce4d78
9055
     *
9056
     * @param string $str
9057
     * @param array  $ignore   <p>An array of words not to capitalize.</p>
9058
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
9059
     *
9060
     * @psalm-pure
9061
     *
9062
     * @return string
9063
     *                <p>The titleized string.</p>
9064
     */
9065 35
    public static function str_titleize_for_humans(
9066
        string $str,
9067
        array $ignore = [],
9068
        string $encoding = 'UTF-8'
9069
    ): string {
9070 35
        if ($str === '') {
9071
            return '';
9072
        }
9073
9074
        $small_words = [
9075 35
            '(?<!q&)a',
9076
            'an',
9077
            'and',
9078
            'as',
9079
            'at(?!&t)',
9080
            'but',
9081
            'by',
9082
            'en',
9083
            'for',
9084
            'if',
9085
            'in',
9086
            'of',
9087
            'on',
9088
            'or',
9089
            'the',
9090
            'to',
9091
            'v[.]?',
9092
            'via',
9093
            'vs[.]?',
9094
        ];
9095
9096 35
        if ($ignore !== []) {
9097 1
            $small_words = \array_merge($small_words, $ignore);
9098
        }
9099
9100 35
        $small_words_rx = \implode('|', $small_words);
9101 35
        $apostrophe_rx = '(?x: [\'’] [[:lower:]]* )?';
9102
9103 35
        $str = \trim($str);
9104
9105 35
        if (!self::has_lowercase($str)) {
9106 2
            $str = self::strtolower($str, $encoding);
9107
        }
9108
9109
        // the main substitutions
9110
        /** @noinspection RegExpDuplicateAlternationBranch - false-positive - https://youtrack.jetbrains.com/issue/WI-51002 */
9111 35
        $str = (string) \preg_replace_callback(
9112
            '~\\b (_*) (?:                                                           # 1. Leading underscore and
9113
                        ( (?<=[ ][/\\\\]) [[:alpha:]]+ [-_[:alpha:]/\\\\]+ |                # 2. file path or 
9114 35
                          [-_[:alpha:]]+ [@.:] [-_[:alpha:]@.:/]+ ' . $apostrophe_rx . ' )  #    URL, domain, or email
9115
                        |
9116 35
                        ( (?i: ' . $small_words_rx . ' ) ' . $apostrophe_rx . ' )           # 3. or small word (case-insensitive)
9117
                        |
9118 35
                        ( [[:alpha:]] [[:lower:]\'’()\[\]{}]* ' . $apostrophe_rx . ' )     # 4. or word w/o internal caps
9119
                        |
9120 35
                        ( [[:alpha:]] [[:alpha:]\'’()\[\]{}]* ' . $apostrophe_rx . ' )     # 5. or some other word
9121
                      ) (_*) \\b                                                          # 6. With trailing underscore
9122
                    ~ux',
9123
            /**
9124
             * @param string[] $matches
9125
             *
9126
             * @psalm-pure
9127
             *
9128
             * @return string
9129
             */
9130
            static function (array $matches) use ($encoding): string {
9131
                // preserve leading underscore
9132 35
                $str = $matches[1];
9133 35
                if ($matches[2]) {
9134
                    // preserve URLs, domains, emails and file paths
9135 5
                    $str .= $matches[2];
9136 35
                } elseif ($matches[3]) {
9137
                    // lower-case small words
9138 25
                    $str .= self::strtolower($matches[3], $encoding);
9139 35
                } elseif ($matches[4]) {
9140
                    // capitalize word w/o internal caps
9141 34
                    $str .= static::ucfirst($matches[4], $encoding);
9142
                } else {
9143
                    // preserve other kinds of word (iPhone)
9144 7
                    $str .= $matches[5];
9145
                }
9146
                // preserve trailing underscore
9147 35
                $str .= $matches[6];
9148
9149 35
                return $str;
9150 35
            },
9151 35
            $str
9152
        );
9153
9154
        // Exceptions for small words: capitalize at start of title...
9155 35
        $str = (string) \preg_replace_callback(
9156
            '~(  \\A [[:punct:]]*            # start of title...
9157
                      |  [:.;?!][ ]+                # or of subsentence...
9158
                      |  [ ][\'"“‘(\[][ ]* )        # or of inserted subphrase...
9159 35
                      ( ' . $small_words_rx . ' ) \\b # ...followed by small word
9160
                     ~uxi',
9161
            /**
9162
             * @param string[] $matches
9163
             *
9164
             * @psalm-pure
9165
             *
9166
             * @return string
9167
             */
9168
            static function (array $matches) use ($encoding): string {
9169 11
                return $matches[1] . static::ucfirst($matches[2], $encoding);
9170 35
            },
9171 35
            $str
9172
        );
9173
9174
        // ...and end of title
9175 35
        $str = (string) \preg_replace_callback(
9176 35
            '~\\b ( ' . $small_words_rx . ' ) # small word...
9177
                      (?= [[:punct:]]* \Z          # ...at the end of the title...
9178
                      |   [\'"’”)\]] [ ] )         # ...or of an inserted subphrase?
9179
                     ~uxi',
9180
            /**
9181
             * @param string[] $matches
9182
             *
9183
             * @psalm-pure
9184
             *
9185
             * @return string
9186
             */
9187
            static function (array $matches) use ($encoding): string {
9188 3
                return static::ucfirst($matches[1], $encoding);
9189 35
            },
9190 35
            $str
9191
        );
9192
9193
        // Exceptions for small words in hyphenated compound words.
9194
        // e.g. "in-flight" -> In-Flight
9195 35
        $str = (string) \preg_replace_callback(
9196
            '~\\b
9197
                        (?<! -)                   # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (in-flight)
9198 35
                        ( ' . $small_words_rx . ' )
9199
                        (?= -[[:alpha:]]+)        # lookahead for "-someword"
9200
                       ~uxi',
9201
            /**
9202
             * @param string[] $matches
9203
             *
9204
             * @psalm-pure
9205
             *
9206
             * @return string
9207
             */
9208
            static function (array $matches) use ($encoding): string {
9209
                return static::ucfirst($matches[1], $encoding);
9210 35
            },
9211 35
            $str
9212
        );
9213
9214
        // e.g. "Stand-in" -> "Stand-In" (Stand is already capped at this point)
9215 35
        $str = (string) \preg_replace_callback(
9216
            '~\\b
9217
                      (?<!…)                    # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (stand-in)
9218
                      ( [[:alpha:]]+- )         # $1 = first word and hyphen, should already be properly capped
9219 35
                      ( ' . $small_words_rx . ' ) # ...followed by small word
9220
                      (?!	- )                 # Negative lookahead for another -
9221
                     ~uxi',
9222
            /**
9223
             * @param string[] $matches
9224
             *
9225
             * @psalm-pure
9226
             *
9227
             * @return string
9228
             */
9229
            static function (array $matches) use ($encoding): string {
9230
                return $matches[1] . static::ucfirst($matches[2], $encoding);
9231 35
            },
9232 35
            $str
9233
        );
9234
9235 35
        return $str;
9236
    }
9237
9238
    /**
9239
     * Get a binary representation of a specific string.
9240
     *
9241
     * EXAPLE: <code>UTF8::str_to_binary('😃'); // '11110000100111111001100010000011'</code>
9242
     *
9243
     * @param string $str <p>The input string.</p>
9244
     *
9245
     * @psalm-pure
9246
     *
9247
     * @return false|string
9248
     *                      <p>false on error</p>
9249
     */
9250 2
    public static function str_to_binary(string $str)
9251
    {
9252
        /** @var array|false $value - needed for PhpStan (stubs error) */
9253 2
        $value = \unpack('H*', $str);
9254 2
        if ($value === false) {
9255
            return false;
9256
        }
9257
9258
        /** @noinspection OffsetOperationsInspection */
9259 2
        return \base_convert($value[1], 16, 2);
9260
    }
9261
9262
    /**
9263
     * @param string   $str
9264
     * @param bool     $remove_empty_values <p>Remove empty values.</p>
9265
     * @param int|null $remove_short_values <p>The min. string length or null to disable</p>
9266
     *
9267
     * @psalm-pure
9268
     *
9269
     * @return string[]
9270
     */
9271 17
    public static function str_to_lines(string $str, bool $remove_empty_values = false, int $remove_short_values = null): array
9272
    {
9273 17
        if ($str === '') {
9274 1
            return $remove_empty_values ? [] : [''];
9275
        }
9276
9277 16
        if (self::$SUPPORT['mbstring'] === true) {
9278
            /** @noinspection PhpComposerExtensionStubsInspection */
9279 16
            $return = \mb_split("[\r\n]{1,2}", $str);
9280
        } else {
9281
            $return = \preg_split("/[\r\n]{1,2}/u", $str);
9282
        }
9283
9284 16
        if ($return === false) {
9285
            return $remove_empty_values ? [] : [''];
9286
        }
9287
9288
        if (
9289 16
            $remove_short_values === null
9290
            &&
9291 16
            !$remove_empty_values
9292
        ) {
9293 16
            return $return;
9294
        }
9295
9296
        return self::reduce_string_array(
9297
            $return,
9298
            $remove_empty_values,
9299
            $remove_short_values
9300
        );
9301
    }
9302
9303
    /**
9304
     * Convert a string into an array of words.
9305
     *
9306
     * EXAMPLE: <code>UTF8::str_to_words('中文空白 oöäü#s', '#') // array('', '中文空白', ' ', 'oöäü#s', '')</code>
9307
     *
9308
     * @param string   $str
9309
     * @param string   $char_list           <p>Additional chars for the definition of "words".</p>
9310
     * @param bool     $remove_empty_values <p>Remove empty values.</p>
9311
     * @param int|null $remove_short_values <p>The min. string length or null to disable</p>
9312
     *
9313
     * @psalm-pure
9314
     *
9315
     * @return string[]
9316
     */
9317 13
    public static function str_to_words(
9318
        string $str,
9319
        string $char_list = '',
9320
        bool $remove_empty_values = false,
9321
        int $remove_short_values = null
9322
    ): array {
9323 13
        if ($str === '') {
9324 4
            return $remove_empty_values ? [] : [''];
9325
        }
9326
9327 13
        $char_list = self::rxClass($char_list, '\pL');
9328
9329 13
        $return = \preg_split("/({$char_list}+(?:[\p{Pd}’']{$char_list}+)*)/u", $str, -1, \PREG_SPLIT_DELIM_CAPTURE);
9330 13
        if ($return === false) {
9331
            return $remove_empty_values ? [] : [''];
9332
        }
9333
9334
        if (
9335 13
            $remove_short_values === null
9336
            &&
9337 13
            !$remove_empty_values
9338
        ) {
9339 13
            return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
9340
        }
9341
9342 2
        $tmp_return = self::reduce_string_array(
9343 2
            $return,
9344 2
            $remove_empty_values,
9345 2
            $remove_short_values
9346
        );
9347
9348 2
        foreach ($tmp_return as &$item) {
9349 2
            $item = (string) $item;
9350
        }
9351
9352 2
        return $tmp_return;
9353
    }
9354
9355
    /**
9356
     * alias for "UTF8::to_ascii()"
9357
     *
9358
     * @param string $str
9359
     * @param string $unknown
9360
     * @param bool   $strict
9361
     *
9362
     * @psalm-pure
9363
     *
9364
     * @return string
9365
     *
9366
     * @see        UTF8::to_ascii()
9367
     * @deprecated <p>please use "UTF8::to_ascii()"</p>
9368
     */
9369 7
    public static function str_transliterate(
9370
        string $str,
9371
        string $unknown = '?',
9372
        bool $strict = false
9373
    ): string {
9374 7
        return self::to_ascii($str, $unknown, $strict);
9375
    }
9376
9377
    /**
9378
     * Truncates the string to a given length. If $substring is provided, and
9379
     * truncating occurs, the string is further truncated so that the substring
9380
     * may be appended without exceeding the desired length.
9381
     *
9382
     * @param string $str
9383
     * @param int    $length    <p>Desired length of the truncated string.</p>
9384
     * @param string $substring [optional] <p>The substring to append if it can fit. Default: ''</p>
9385
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
9386
     *
9387
     * @psalm-pure
9388
     *
9389
     * @return string
9390
     *                <p>A string after truncating.</p>
9391
     */
9392 22
    public static function str_truncate(
9393
        string $str,
9394
        int $length,
9395
        string $substring = '',
9396
        string $encoding = 'UTF-8'
9397
    ): string {
9398 22
        if ($str === '') {
9399
            return '';
9400
        }
9401
9402 22
        if ($encoding === 'UTF-8') {
9403 10
            if ($length >= (int) \mb_strlen($str)) {
9404 2
                return $str;
9405
            }
9406
9407 8
            if ($substring !== '') {
9408 4
                $length -= (int) \mb_strlen($substring);
9409
9410
                /** @noinspection UnnecessaryCastingInspection */
9411 4
                return (string) \mb_substr($str, 0, $length) . $substring;
9412
            }
9413
9414
            /** @noinspection UnnecessaryCastingInspection */
9415 4
            return (string) \mb_substr($str, 0, $length);
9416
        }
9417
9418 12
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
9419
9420 12
        if ($length >= (int) self::strlen($str, $encoding)) {
9421 2
            return $str;
9422
        }
9423
9424 10
        if ($substring !== '') {
9425 6
            $length -= (int) self::strlen($substring, $encoding);
9426
        }
9427
9428
        return (
9429 10
               (string) self::substr(
9430 10
                   $str,
9431 10
                   0,
9432 10
                   $length,
9433 10
                   $encoding
9434
               )
9435 10
               ) . $substring;
9436
    }
9437
9438
    /**
9439
     * Truncates the string to a given length, while ensuring that it does not
9440
     * split words. If $substring is provided, and truncating occurs, the
9441
     * string is further truncated so that the substring may be appended without
9442
     * exceeding the desired length.
9443
     *
9444
     * @param string $str
9445
     * @param int    $length                                 <p>Desired length of the truncated string.</p>
9446
     * @param string $substring                              [optional] <p>The substring to append if it can fit.
9447
     *                                                       Default:
9448
     *                                                       ''</p>
9449
     * @param string $encoding                               [optional] <p>Default: 'UTF-8'</p>
9450
     * @param bool   $ignore_do_not_split_words_for_one_word [optional] <p>Default: false</p>
9451
     *
9452
     * @psalm-pure
9453
     *
9454
     * @return string
9455
     *                <p>A string after truncating.</p>
9456
     */
9457 47
    public static function str_truncate_safe(
9458
        string $str,
9459
        int $length,
9460
        string $substring = '',
9461
        string $encoding = 'UTF-8',
9462
        bool $ignore_do_not_split_words_for_one_word = false
9463
    ): string {
9464 47
        if ($str === '' || $length <= 0) {
9465 1
            return $substring;
9466
        }
9467
9468 47
        if ($encoding === 'UTF-8') {
9469 21
            if ($length >= (int) \mb_strlen($str)) {
9470 5
                return $str;
9471
            }
9472
9473
            // need to further trim the string so we can append the substring
9474 17
            $length -= (int) \mb_strlen($substring);
9475 17
            if ($length <= 0) {
9476 1
                return $substring;
9477
            }
9478
9479
            /** @var false|string $truncated - needed for PhpStan (stubs error) */
9480 17
            $truncated = \mb_substr($str, 0, $length);
9481 17
            if ($truncated === false) {
9482
                return '';
9483
            }
9484
9485
            // if the last word was truncated
9486 17
            $space_position = \mb_strpos($str, ' ', $length - 1);
9487 17
            if ($space_position !== $length) {
9488
                // find pos of the last occurrence of a space, get up to that
9489 13
                $last_position = \mb_strrpos($truncated, ' ', 0);
9490
9491
                if (
9492 13
                    $last_position !== false
9493
                    ||
9494
                    (
9495 3
                        $space_position !== false
9496
                        &&
9497 13
                         !$ignore_do_not_split_words_for_one_word
9498
                    )
9499
                ) {
9500 17
                    $truncated = (string) \mb_substr($truncated, 0, (int) $last_position);
9501
                }
9502
            }
9503
        } else {
9504 26
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9505
9506 26
            if ($length >= (int) self::strlen($str, $encoding)) {
9507 4
                return $str;
9508
            }
9509
9510
            // need to further trim the string so we can append the substring
9511 22
            $length -= (int) self::strlen($substring, $encoding);
9512 22
            if ($length <= 0) {
9513
                return $substring;
9514
            }
9515
9516 22
            $truncated = self::substr($str, 0, $length, $encoding);
9517
9518 22
            if ($truncated === false) {
9519
                return '';
9520
            }
9521
9522
            // if the last word was truncated
9523 22
            $space_position = self::strpos($str, ' ', $length - 1, $encoding);
9524 22
            if ($space_position !== $length) {
9525
                // find pos of the last occurrence of a space, get up to that
9526 12
                $last_position = self::strrpos($truncated, ' ', 0, $encoding);
9527
9528
                if (
9529 12
                    $last_position !== false
9530
                    ||
9531
                    (
9532 4
                        $space_position !== false
9533
                        &&
9534 12
                        !$ignore_do_not_split_words_for_one_word
9535
                    )
9536
                ) {
9537 9
                    $truncated = (string) self::substr($truncated, 0, (int) $last_position, $encoding);
9538
                }
9539
            }
9540
        }
9541
9542 39
        return $truncated . $substring;
9543
    }
9544
9545
    /**
9546
     * Returns a lowercase and trimmed string separated by underscores.
9547
     * Underscores are inserted before uppercase characters (with the exception
9548
     * of the first character of the string), and in place of spaces as well as
9549
     * dashes.
9550
     *
9551
     * @param string $str
9552
     *
9553
     * @psalm-pure
9554
     *
9555
     * @return string
9556
     *                <p>The underscored string.</p>
9557
     */
9558 16
    public static function str_underscored(string $str): string
9559
    {
9560 16
        return self::str_delimit($str, '_');
9561
    }
9562
9563
    /**
9564
     * Returns an UpperCamelCase version of the supplied string. It trims
9565
     * surrounding spaces, capitalizes letters following digits, spaces, dashes
9566
     * and underscores, and removes spaces, dashes, underscores.
9567
     *
9568
     * @param string      $str                           <p>The input string.</p>
9569
     * @param string      $encoding                      [optional] <p>Default: 'UTF-8'</p>
9570
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
9571
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
9572
     *                                                   tr</p>
9573
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
9574
     *                                                   -> ß</p>
9575
     *
9576
     * @psalm-pure
9577
     *
9578
     * @return string
9579
     *                <p>A string in UpperCamelCase.</p>
9580
     */
9581 13
    public static function str_upper_camelize(
9582
        string $str,
9583
        string $encoding = 'UTF-8',
9584
        bool $clean_utf8 = false,
9585
        string $lang = null,
9586
        bool $try_to_keep_the_string_length = false
9587
    ): string {
9588 13
        return self::ucfirst(self::str_camelize($str, $encoding), $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
9589
    }
9590
9591
    /**
9592
     * alias for "UTF8::ucfirst()"
9593
     *
9594
     * @param string      $str
9595
     * @param string      $encoding
9596
     * @param bool        $clean_utf8
9597
     * @param string|null $lang
9598
     * @param bool        $try_to_keep_the_string_length
9599
     *
9600
     * @psalm-pure
9601
     *
9602
     * @return string
9603
     *
9604
     * @see        UTF8::ucfirst()
9605
     * @deprecated <p>please use "UTF8::ucfirst()"</p>
9606
     */
9607 5
    public static function str_upper_first(
9608
        string $str,
9609
        string $encoding = 'UTF-8',
9610
        bool $clean_utf8 = false,
9611
        string $lang = null,
9612
        bool $try_to_keep_the_string_length = false
9613
    ): string {
9614 5
        return self::ucfirst(
9615 5
            $str,
9616 5
            $encoding,
9617 5
            $clean_utf8,
9618 5
            $lang,
9619 5
            $try_to_keep_the_string_length
9620
        );
9621
    }
9622
9623
    /**
9624
     * Get the number of words in a specific string.
9625
     *
9626
     * EXAMPLES: <code>
9627
     * // format: 0 -> return only word count (int)
9628
     * //
9629
     * UTF8::str_word_count('中文空白 öäü abc#c'); // 4
9630
     * UTF8::str_word_count('中文空白 öäü abc#c', 0, '#'); // 3
9631
     *
9632
     * // format: 1 -> return words (array)
9633
     * //
9634
     * UTF8::str_word_count('中文空白 öäü abc#c', 1); // array('中文空白', 'öäü', 'abc', 'c')
9635
     * UTF8::str_word_count('中文空白 öäü abc#c', 1, '#'); // array('中文空白', 'öäü', 'abc#c')
9636
     *
9637
     * // format: 2 -> return words with offset (array)
9638
     * //
9639
     * UTF8::str_word_count('中文空白 öäü ab#c', 2); // array(0 => '中文空白', 5 => 'öäü', 9 => 'abc', 13 => 'c')
9640
     * UTF8::str_word_count('中文空白 öäü ab#c', 2, '#'); // array(0 => '中文空白', 5 => 'öäü', 9 => 'abc#c')
9641
     * </code>
9642
     *
9643
     * @param string $str       <p>The input string.</p>
9644
     * @param int    $format    [optional] <p>
9645
     *                          <strong>0</strong> => return a number of words (default)<br>
9646
     *                          <strong>1</strong> => return an array of words<br>
9647
     *                          <strong>2</strong> => return an array of words with word-offset as key
9648
     *                          </p>
9649
     * @param string $char_list [optional] <p>Additional chars that contains to words and do not start a new word.</p>
9650
     *
9651
     * @psalm-pure
9652
     *
9653
     * @return int|string[]
9654
     *                      <p>The number of words in the string.</p>
9655
     */
9656 2
    public static function str_word_count(string $str, int $format = 0, string $char_list = '')
9657
    {
9658 2
        $str_parts = self::str_to_words($str, $char_list);
9659
9660 2
        $len = \count($str_parts);
9661
9662 2
        if ($format === 1) {
9663 2
            $number_of_words = [];
9664 2
            for ($i = 1; $i < $len; $i += 2) {
9665 2
                $number_of_words[] = $str_parts[$i];
9666
            }
9667 2
        } elseif ($format === 2) {
9668 2
            $number_of_words = [];
9669 2
            $offset = (int) self::strlen($str_parts[0]);
9670 2
            for ($i = 1; $i < $len; $i += 2) {
9671 2
                $number_of_words[$offset] = $str_parts[$i];
9672 2
                $offset += (int) self::strlen($str_parts[$i]) + (int) self::strlen($str_parts[$i + 1]);
9673
            }
9674
        } else {
9675 2
            $number_of_words = (int) (($len - 1) / 2);
9676
        }
9677
9678 2
        return $number_of_words;
9679
    }
9680
9681
    /**
9682
     * Case-insensitive string comparison.
9683
     *
9684
     * INFO: Case-insensitive version of UTF8::strcmp()
9685
     *
9686
     * EXAMPLE: <code>UTF8::strcasecmp("iñtërnâtiôn\nàlizætiøn", "Iñtërnâtiôn\nàlizætiøn"); // 0</code>
9687
     *
9688
     * @param string $str1     <p>The first string.</p>
9689
     * @param string $str2     <p>The second string.</p>
9690
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
9691
     *
9692
     * @psalm-pure
9693
     *
9694
     * @return int
9695
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
9696
     *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
9697
     *             <strong>0</strong> if they are equal
9698
     */
9699 23
    public static function strcasecmp(
9700
        string $str1,
9701
        string $str2,
9702
        string $encoding = 'UTF-8'
9703
    ): int {
9704 23
        return self::strcmp(
9705 23
            self::strtocasefold(
9706 23
                $str1,
9707 23
                true,
9708 23
                false,
9709 23
                $encoding,
9710 23
                null,
9711 23
                false
9712
            ),
9713 23
            self::strtocasefold(
9714 23
                $str2,
9715 23
                true,
9716 23
                false,
9717 23
                $encoding,
9718 23
                null,
9719 23
                false
9720
            )
9721
        );
9722
    }
9723
9724
    /**
9725
     * alias for "UTF8::strstr()"
9726
     *
9727
     * @param string $haystack
9728
     * @param string $needle
9729
     * @param bool   $before_needle
9730
     * @param string $encoding
9731
     * @param bool   $clean_utf8
9732
     *
9733
     * @psalm-pure
9734
     *
9735
     * @return false|string
9736
     *
9737
     * @see        UTF8::strstr()
9738
     * @deprecated <p>please use "UTF8::strstr()"</p>
9739
     */
9740 2
    public static function strchr(
9741
        string $haystack,
9742
        string $needle,
9743
        bool $before_needle = false,
9744
        string $encoding = 'UTF-8',
9745
        bool $clean_utf8 = false
9746
    ) {
9747 2
        return self::strstr(
9748 2
            $haystack,
9749 2
            $needle,
9750 2
            $before_needle,
9751 2
            $encoding,
9752 2
            $clean_utf8
9753
        );
9754
    }
9755
9756
    /**
9757
     * Case-sensitive string comparison.
9758
     *
9759
     * EXAMPLE: <code>UTF8::strcmp("iñtërnâtiôn\nàlizætiøn", "iñtërnâtiôn\nàlizætiøn"); // 0</code>
9760
     *
9761
     * @param string $str1 <p>The first string.</p>
9762
     * @param string $str2 <p>The second string.</p>
9763
     *
9764
     * @psalm-pure
9765
     *
9766
     * @return int
9767
     *             <strong>&lt; 0</strong> if str1 is less than str2<br>
9768
     *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
9769
     *             <strong>0</strong> if they are equal
9770
     */
9771 29
    public static function strcmp(string $str1, string $str2): int
9772
    {
9773 29
        if ($str1 === $str2) {
9774 21
            return 0;
9775
        }
9776
9777 24
        return \strcmp(
9778 24
            \Normalizer::normalize($str1, \Normalizer::NFD),
9779 24
            \Normalizer::normalize($str2, \Normalizer::NFD)
9780
        );
9781
    }
9782
9783
    /**
9784
     * Find length of initial segment not matching mask.
9785
     *
9786
     * @param string   $str
9787
     * @param string   $char_list
9788
     * @param int      $offset
9789
     * @param int|null $length
9790
     * @param string   $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
9791
     *
9792
     * @psalm-pure
9793
     *
9794
     * @return int
9795
     */
9796 12
    public static function strcspn(
9797
        string $str,
9798
        string $char_list,
9799
        int $offset = 0,
9800
        int $length = null,
9801
        string $encoding = 'UTF-8'
9802
    ): int {
9803 12
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9804
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9805
        }
9806
9807 12
        if ($char_list === '') {
9808 2
            return (int) self::strlen($str, $encoding);
9809
        }
9810
9811 11
        if ($offset || $length !== null) {
9812 3
            if ($encoding === 'UTF-8') {
9813 3
                if ($length === null) {
9814 2
                    $str_tmp = \mb_substr($str, $offset);
9815
                } else {
9816 3
                    $str_tmp = \mb_substr($str, $offset, $length);
9817
                }
9818
            } else {
9819
                $str_tmp = self::substr($str, $offset, $length, $encoding);
9820
            }
9821
9822 3
            if ($str_tmp === false) {
9823
                return 0;
9824
            }
9825
9826
            /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
9827 3
            $str = $str_tmp;
9828
        }
9829
9830 11
        if ($str === '') {
9831 2
            return 0;
9832
        }
9833
9834 10
        $matches = [];
9835 10
        if (\preg_match('/^(.*?)' . self::rxClass($char_list) . '/us', $str, $matches)) {
9836 9
            $return = self::strlen($matches[1], $encoding);
9837 9
            if ($return === false) {
9838
                return 0;
9839
            }
9840
9841 9
            return $return;
9842
        }
9843
9844 2
        return (int) self::strlen($str, $encoding);
9845
    }
9846
9847
    /**
9848
     * alias for "UTF8::stristr()"
9849
     *
9850
     * @param string $haystack
9851
     * @param string $needle
9852
     * @param bool   $before_needle
9853
     * @param string $encoding
9854
     * @param bool   $clean_utf8
9855
     *
9856
     * @psalm-pure
9857
     *
9858
     * @return false|string
9859
     *
9860
     * @see        UTF8::stristr()
9861
     * @deprecated <p>please use "UTF8::stristr()"</p>
9862
     */
9863 1
    public static function strichr(
9864
        string $haystack,
9865
        string $needle,
9866
        bool $before_needle = false,
9867
        string $encoding = 'UTF-8',
9868
        bool $clean_utf8 = false
9869
    ) {
9870 1
        return self::stristr(
9871 1
            $haystack,
9872 1
            $needle,
9873 1
            $before_needle,
9874 1
            $encoding,
9875 1
            $clean_utf8
9876
        );
9877
    }
9878
9879
    /**
9880
     * Create a UTF-8 string from code points.
9881
     *
9882
     * INFO: opposite to UTF8::codepoints()
9883
     *
9884
     * EXAMPLE: <code>UTF8::string(array(246, 228, 252)); // 'öäü'</code>
9885
     *
9886
     * @param int|int[]|string|string[] $intOrHex <p>Integer or Hexadecimal codepoints.</p>
9887
     *
9888
     * @psalm-param int[]|numeric-string[]|int|numeric-string $intOrHex
9889
     *
9890
     * @psalm-pure
9891
     *
9892
     * @return string
9893
     *                <p>A UTF-8 encoded string.</p>
9894
     */
9895 4
    public static function string($intOrHex): string
9896
    {
9897 4
        if ($intOrHex === []) {
9898 4
            return '';
9899
        }
9900
9901 4
        if (!\is_array($intOrHex)) {
9902 1
            $intOrHex = [$intOrHex];
9903
        }
9904
9905 4
        $str = '';
9906 4
        foreach ($intOrHex as $strPart) {
9907 4
            $str .= '&#' . (int) $strPart . ';';
9908
        }
9909
9910 4
        return self::html_entity_decode($str, \ENT_QUOTES | \ENT_HTML5);
9911
    }
9912
9913
    /**
9914
     * Checks if string starts with "BOM" (Byte Order Mark Character) character.
9915
     *
9916
     * EXAMPLE: <code>UTF8::string_has_bom("\xef\xbb\xbf foobar"); // true</code>
9917
     *
9918
     * @param string $str <p>The input string.</p>
9919
     *
9920
     * @psalm-pure
9921
     *
9922
     * @return bool
9923
     *              <p>
9924
     *              <strong>true</strong> if the string has BOM at the start,<br>
9925
     *              <strong>false</strong> otherwise
9926
     *              </p>
9927
     */
9928 6
    public static function string_has_bom(string $str): bool
9929
    {
9930
        /** @noinspection PhpUnusedLocalVariableInspection */
9931 6
        foreach (self::$BOM as $bom_string => &$bom_byte_length) {
9932 6
            if (\strncmp($str, $bom_string, $bom_byte_length) === 0) {
9933 6
                return true;
9934
            }
9935
        }
9936
9937 6
        return false;
9938
    }
9939
9940
    /**
9941
     * Strip HTML and PHP tags from a string + clean invalid UTF-8.
9942
     *
9943
     * EXAMPLE: <code>UTF8::strip_tags("<span>κόσμε\xa0\xa1</span>"); // 'κόσμε'</code>
9944
     *
9945
     * @see http://php.net/manual/en/function.strip-tags.php
9946
     *
9947
     * @param string      $str            <p>
9948
     *                                    The input string.
9949
     *                                    </p>
9950
     * @param string|null $allowable_tags [optional] <p>
9951
     *                                    You can use the optional second parameter to specify tags which should
9952
     *                                    not be stripped.
9953
     *                                    </p>
9954
     *                                    <p>
9955
     *                                    HTML comments and PHP tags are also stripped. This is hardcoded and
9956
     *                                    can not be changed with allowable_tags.
9957
     *                                    </p>
9958
     * @param bool        $clean_utf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
9959
     *
9960
     * @psalm-pure
9961
     *
9962
     * @return string
9963
     *                <p>The stripped string.</p>
9964
     */
9965 4
    public static function strip_tags(
9966
        string $str,
9967
        string $allowable_tags = null,
9968
        bool $clean_utf8 = false
9969
    ): string {
9970 4
        if ($str === '') {
9971 1
            return '';
9972
        }
9973
9974 4
        if ($clean_utf8) {
9975 2
            $str = self::clean($str);
9976
        }
9977
9978 4
        if ($allowable_tags === null) {
9979 4
            return \strip_tags($str);
9980
        }
9981
9982 2
        return \strip_tags($str, $allowable_tags);
9983
    }
9984
9985
    /**
9986
     * Strip all whitespace characters. This includes tabs and newline
9987
     * characters, as well as multibyte whitespace such as the thin space
9988
     * and ideographic space.
9989
     *
9990
     * EXAMPLE: <code>UTF8::strip_whitespace('   Ο     συγγραφέας  '); // 'Οσυγγραφέας'</code>
9991
     *
9992
     * @param string $str
9993
     *
9994
     * @psalm-pure
9995
     *
9996
     * @return string
9997
     */
9998 36
    public static function strip_whitespace(string $str): string
9999
    {
10000 36
        if ($str === '') {
10001 3
            return '';
10002
        }
10003
10004 33
        return (string) \preg_replace('/[[:space:]]+/u', '', $str);
10005
    }
10006
10007
    /**
10008
     * Find the position of the first occurrence of a substring in a string, case-insensitive.
10009
     *
10010
     * INFO: use UTF8::stripos_in_byte() for the byte-length
10011
     *
10012
     * EXAMPLE: <code>UTF8::stripos('aσσb', 'ΣΣ'); // 1</code> (σσ == ΣΣ)
10013
     *
10014
     * @see http://php.net/manual/en/function.mb-stripos.php
10015
     *
10016
     * @param string $haystack   <p>The string from which to get the position of the first occurrence of needle.</p>
10017
     * @param string $needle     <p>The string to find in haystack.</p>
10018
     * @param int    $offset     [optional] <p>The position in haystack to start searching.</p>
10019
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
10020
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10021
     *
10022
     * @psalm-pure
10023
     *
10024
     * @return false|int
10025
     *                   Return the <strong>(int)</strong> numeric position of the first occurrence of needle in the
10026
     *                   haystack string,<br> or <strong>false</strong> if needle is not found
10027
     */
10028 25
    public static function stripos(
10029
        string $haystack,
10030
        string $needle,
10031
        int $offset = 0,
10032
        string $encoding = 'UTF-8',
10033
        bool $clean_utf8 = false
10034
    ) {
10035 25
        if ($haystack === '' || $needle === '') {
10036 5
            return false;
10037
        }
10038
10039 24
        if ($clean_utf8) {
10040
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10041
            // if invalid characters are found in $haystack before $needle
10042 1
            $haystack = self::clean($haystack);
10043 1
            $needle = self::clean($needle);
10044
        }
10045
10046 24
        if (self::$SUPPORT['mbstring'] === true) {
10047 24
            if ($encoding === 'UTF-8') {
10048 24
                return \mb_stripos($haystack, $needle, $offset);
10049
            }
10050
10051 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10052
10053 2
            return \mb_stripos($haystack, $needle, $offset, $encoding);
10054
        }
10055
10056 2
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
10057
10058
        if (
10059 2
            $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
10060
            &&
10061 2
            $offset >= 0 // grapheme_stripos() can't handle negative offset
10062
            &&
10063 2
            self::$SUPPORT['intl'] === true
10064
        ) {
10065
            $return_tmp = \grapheme_stripos($haystack, $needle, $offset);
10066
            if ($return_tmp !== false) {
10067
                return $return_tmp;
10068
            }
10069
        }
10070
10071
        //
10072
        // fallback for ascii only
10073
        //
10074
10075 2
        if (ASCII::is_ascii($haystack . $needle)) {
10076
            return \stripos($haystack, $needle, $offset);
10077
        }
10078
10079
        //
10080
        // fallback via vanilla php
10081
        //
10082
10083 2
        $haystack = self::strtocasefold($haystack, true, false, $encoding, null, false);
10084 2
        $needle = self::strtocasefold($needle, true, false, $encoding, null, false);
10085
10086 2
        return self::strpos($haystack, $needle, $offset, $encoding);
10087
    }
10088
10089
    /**
10090
     * Returns all of haystack starting from and including the first occurrence of needle to the end.
10091
     *
10092
     * EXAMPLE: <code>
10093
     * $str = 'iñtërnâtiônàlizætiøn';
10094
     * $search = 'NÂT';
10095
     *
10096
     * UTF8::stristr($str, $search)); // 'nâtiônàlizætiøn'
10097
     * UTF8::stristr($str, $search, true)); // 'iñtër'
10098
     * </code>
10099
     *
10100
     * @param string $haystack      <p>The input string. Must be valid UTF-8.</p>
10101
     * @param string $needle        <p>The string to look for. Must be valid UTF-8.</p>
10102
     * @param bool   $before_needle [optional] <p>
10103
     *                              If <b>TRUE</b>, it returns the part of the
10104
     *                              haystack before the first occurrence of the needle (excluding the needle).
10105
     *                              </p>
10106
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
10107
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
10108
     *
10109
     * @psalm-pure
10110
     *
10111
     * @return false|string
10112
     *                      <p>A sub-string,<br>or <strong>false</strong> if needle is not found.</p>
10113
     */
10114 12
    public static function stristr(
10115
        string $haystack,
10116
        string $needle,
10117
        bool $before_needle = false,
10118
        string $encoding = 'UTF-8',
10119
        bool $clean_utf8 = false
10120
    ) {
10121 12
        if ($haystack === '' || $needle === '') {
10122 3
            return false;
10123
        }
10124
10125 9
        if ($clean_utf8) {
10126
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10127
            // if invalid characters are found in $haystack before $needle
10128 1
            $needle = self::clean($needle);
10129 1
            $haystack = self::clean($haystack);
10130
        }
10131
10132 9
        if (!$needle) {
10133
            return $haystack;
10134
        }
10135
10136 9
        if (self::$SUPPORT['mbstring'] === true) {
10137 9
            if ($encoding === 'UTF-8') {
10138 9
                return \mb_stristr($haystack, $needle, $before_needle);
10139
            }
10140
10141 1
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10142
10143 1
            return \mb_stristr($haystack, $needle, $before_needle, $encoding);
10144
        }
10145
10146
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
10147
10148
        if (
10149
            $encoding !== 'UTF-8'
10150
            &&
10151
            self::$SUPPORT['mbstring'] === false
10152
        ) {
10153
            /**
10154
             * @psalm-suppress ImpureFunctionCall - is is only a warning
10155
             */
10156
            \trigger_error('UTF8::stristr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10157
        }
10158
10159
        if (
10160
            $encoding === 'UTF-8' // INFO: "grapheme_stristr()" can't handle other encodings
10161
            &&
10162
            self::$SUPPORT['intl'] === true
10163
        ) {
10164
            $return_tmp = \grapheme_stristr($haystack, $needle, $before_needle);
10165
            if ($return_tmp !== false) {
10166
                return $return_tmp;
10167
            }
10168
        }
10169
10170
        if (ASCII::is_ascii($needle . $haystack)) {
10171
            return \stristr($haystack, $needle, $before_needle);
10172
        }
10173
10174
        \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/usi', $haystack, $match);
10175
10176
        if (!isset($match[1])) {
10177
            return false;
10178
        }
10179
10180
        if ($before_needle) {
10181
            return $match[1];
10182
        }
10183
10184
        return self::substr($haystack, (int) self::strlen($match[1], $encoding), null, $encoding);
10185
    }
10186
10187
    /**
10188
     * Get the string length, not the byte-length!
10189
     *
10190
     * INFO: use UTF8::strwidth() for the char-length
10191
     *
10192
     * EXAMPLE: <code>UTF8::strlen("Iñtërnâtiôn\xE9àlizætiøn")); // 20</code>
10193
     *
10194
     * @see http://php.net/manual/en/function.mb-strlen.php
10195
     *
10196
     * @param string $str        <p>The string being checked for length.</p>
10197
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
10198
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10199
     *
10200
     * @psalm-pure
10201
     *
10202
     * @return false|int
10203
     *                   <p>
10204
     *                   The number <strong>(int)</strong> of characters in the string $str having character encoding
10205
     *                   $encoding.
10206
     *                   (One multi-byte character counted as +1).
10207
     *                   <br>
10208
     *                   Can return <strong>false</strong>, if e.g. mbstring is not installed and we process invalid
10209
     *                   chars.
10210
     *                   </p>
10211
     */
10212 174
    public static function strlen(
10213
        string $str,
10214
        string $encoding = 'UTF-8',
10215
        bool $clean_utf8 = false
10216
    ) {
10217 174
        if ($str === '') {
10218 21
            return 0;
10219
        }
10220
10221 172
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10222 12
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10223
        }
10224
10225 172
        if ($clean_utf8) {
10226
            // "mb_strlen" and "\iconv_strlen" returns wrong length,
10227
            // if invalid characters are found in $str
10228 4
            $str = self::clean($str);
10229
        }
10230
10231
        //
10232
        // fallback via mbstring
10233
        //
10234
10235 172
        if (self::$SUPPORT['mbstring'] === true) {
10236 166
            if ($encoding === 'UTF-8') {
10237
                /** @noinspection PhpUsageOfSilenceOperatorInspection - ignore warnings, it's working anyway */
10238 166
                return @\mb_strlen($str);
10239
            }
10240
10241
            /** @noinspection PhpUsageOfSilenceOperatorInspection - ignore warnings, it's working anyway */
10242 4
            return @\mb_strlen($str, $encoding);
10243
        }
10244
10245
        //
10246
        // fallback for binary || ascii only
10247
        //
10248
10249
        if (
10250 8
            $encoding === 'CP850'
10251
            ||
10252 8
            $encoding === 'ASCII'
10253
        ) {
10254
            return \strlen($str);
10255
        }
10256
10257
        if (
10258 8
            $encoding !== 'UTF-8'
10259
            &&
10260 8
            self::$SUPPORT['mbstring'] === false
10261
            &&
10262 8
            self::$SUPPORT['iconv'] === false
10263
        ) {
10264
            /**
10265
             * @psalm-suppress ImpureFunctionCall - is is only a warning
10266
             */
10267 2
            \trigger_error('UTF8::strlen() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10268
        }
10269
10270
        //
10271
        // fallback via iconv
10272
        //
10273
10274 8
        if (self::$SUPPORT['iconv'] === true) {
10275
            $return_tmp = \iconv_strlen($str, $encoding);
10276
            if ($return_tmp !== false) {
10277
                return $return_tmp;
10278
            }
10279
        }
10280
10281
        //
10282
        // fallback via intl
10283
        //
10284
10285
        if (
10286 8
            $encoding === 'UTF-8' // INFO: "grapheme_strlen()" can't handle other encodings
10287
            &&
10288 8
            self::$SUPPORT['intl'] === true
10289
        ) {
10290
            $return_tmp = \grapheme_strlen($str);
10291
            if ($return_tmp !== null) {
10292
                return $return_tmp;
10293
            }
10294
        }
10295
10296
        //
10297
        // fallback for ascii only
10298
        //
10299
10300 8
        if (ASCII::is_ascii($str)) {
10301 4
            return \strlen($str);
10302
        }
10303
10304
        //
10305
        // fallback via vanilla php
10306
        //
10307
10308 8
        \preg_match_all('/./us', $str, $parts);
10309
10310 8
        $return_tmp = \count($parts[0]);
10311 8
        if ($return_tmp === 0) {
10312
            return false;
10313
        }
10314
10315 8
        return $return_tmp;
10316
    }
10317
10318
    /**
10319
     * Get string length in byte.
10320
     *
10321
     * @param string $str
10322
     *
10323
     * @psalm-pure
10324
     *
10325
     * @return int
10326
     */
10327 1
    public static function strlen_in_byte(string $str): int
10328
    {
10329 1
        if ($str === '') {
10330
            return 0;
10331
        }
10332
10333 1
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10334
            // "mb_" is available if overload is used, so use it ...
10335
            return \mb_strlen($str, 'CP850'); // 8-BIT
10336
        }
10337
10338 1
        return \strlen($str);
10339
    }
10340
10341
    /**
10342
     * Case-insensitive string comparisons using a "natural order" algorithm.
10343
     *
10344
     * INFO: natural order version of UTF8::strcasecmp()
10345
     *
10346
     * EXAMPLES: <code>
10347
     * UTF8::strnatcasecmp('2', '10Hello WORLD 中文空白!'); // -1
10348
     * UTF8::strcasecmp('2Hello world 中文空白!', '10Hello WORLD 中文空白!'); // 1
10349
     *
10350
     * UTF8::strnatcasecmp('10Hello world 中文空白!', '2Hello WORLD 中文空白!'); // 1
10351
     * UTF8::strcasecmp('10Hello world 中文空白!', '2Hello WORLD 中文空白!'); // -1
10352
     * </code>
10353
     *
10354
     * @param string $str1     <p>The first string.</p>
10355
     * @param string $str2     <p>The second string.</p>
10356
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
10357
     *
10358
     * @psalm-pure
10359
     *
10360
     * @return int
10361
     *             <strong>&lt; 0</strong> if str1 is less than str2<br>
10362
     *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
10363
     *             <strong>0</strong> if they are equal
10364
     */
10365 2
    public static function strnatcasecmp(string $str1, string $str2, string $encoding = 'UTF-8'): int
10366
    {
10367 2
        return self::strnatcmp(
10368 2
            self::strtocasefold($str1, true, false, $encoding, null, false),
10369 2
            self::strtocasefold($str2, true, false, $encoding, null, false)
10370
        );
10371
    }
10372
10373
    /**
10374
     * String comparisons using a "natural order" algorithm
10375
     *
10376
     * INFO: natural order version of UTF8::strcmp()
10377
     *
10378
     * EXAMPLES: <code>
10379
     * UTF8::strnatcmp('2Hello world 中文空白!', '10Hello WORLD 中文空白!'); // -1
10380
     * UTF8::strcmp('2Hello world 中文空白!', '10Hello WORLD 中文空白!'); // 1
10381
     *
10382
     * UTF8::strnatcmp('10Hello world 中文空白!', '2Hello WORLD 中文空白!'); // 1
10383
     * UTF8::strcmp('10Hello world 中文空白!', '2Hello WORLD 中文空白!'); // -1
10384
     * </code>
10385
     *
10386
     * @see http://php.net/manual/en/function.strnatcmp.php
10387
     *
10388
     * @param string $str1 <p>The first string.</p>
10389
     * @param string $str2 <p>The second string.</p>
10390
     *
10391
     * @psalm-pure
10392
     *
10393
     * @return int
10394
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
10395
     *             <strong>&gt; 0</strong> if str1 is greater than str2;<br>
10396
     *             <strong>0</strong> if they are equal
10397
     */
10398 4
    public static function strnatcmp(string $str1, string $str2): int
10399
    {
10400 4
        if ($str1 === $str2) {
10401 4
            return 0;
10402
        }
10403
10404 4
        return \strnatcmp(
10405 4
            (string) self::strtonatfold($str1),
10406 4
            (string) self::strtonatfold($str2)
10407
        );
10408
    }
10409
10410
    /**
10411
     * Case-insensitive string comparison of the first n characters.
10412
     *
10413
     * EXAMPLE: <code>
10414
     * UTF8::strcasecmp("iñtërnâtiôn\nàlizætiøn321", "iñtërnâtiôn\nàlizætiøn123", 5); // 0
10415
     * </code>
10416
     *
10417
     * @see http://php.net/manual/en/function.strncasecmp.php
10418
     *
10419
     * @param string $str1     <p>The first string.</p>
10420
     * @param string $str2     <p>The second string.</p>
10421
     * @param int    $len      <p>The length of strings to be used in the comparison.</p>
10422
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
10423
     *
10424
     * @psalm-pure
10425
     *
10426
     * @return int
10427
     *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
10428
     *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
10429
     *             <strong>0</strong> if they are equal
10430
     */
10431 2
    public static function strncasecmp(
10432
        string $str1,
10433
        string $str2,
10434
        int $len,
10435
        string $encoding = 'UTF-8'
10436
    ): int {
10437 2
        return self::strncmp(
10438 2
            self::strtocasefold($str1, true, false, $encoding, null, false),
10439 2
            self::strtocasefold($str2, true, false, $encoding, null, false),
10440 2
            $len
10441
        );
10442
    }
10443
10444
    /**
10445
     * String comparison of the first n characters.
10446
     *
10447
     * EXAMPLE: <code>
10448
     * UTF8::strncmp("Iñtërnâtiôn\nàlizætiøn321", "Iñtërnâtiôn\nàlizætiøn123", 5); // 0
10449
     * </code>
10450
     *
10451
     * @see http://php.net/manual/en/function.strncmp.php
10452
     *
10453
     * @param string $str1     <p>The first string.</p>
10454
     * @param string $str2     <p>The second string.</p>
10455
     * @param int    $len      <p>Number of characters to use in the comparison.</p>
10456
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
10457
     *
10458
     * @psalm-pure
10459
     *
10460
     * @return int
10461
     *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
10462
     *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
10463
     *             <strong>0</strong> if they are equal
10464
     */
10465 4
    public static function strncmp(
10466
        string $str1,
10467
        string $str2,
10468
        int $len,
10469
        string $encoding = 'UTF-8'
10470
    ): int {
10471 4
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10472
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10473
        }
10474
10475 4
        if ($encoding === 'UTF-8') {
10476 4
            $str1 = (string) \mb_substr($str1, 0, $len);
10477 4
            $str2 = (string) \mb_substr($str2, 0, $len);
10478
        } else {
10479
            $str1 = (string) self::substr($str1, 0, $len, $encoding);
10480
            $str2 = (string) self::substr($str2, 0, $len, $encoding);
10481
        }
10482
10483 4
        return self::strcmp($str1, $str2);
10484
    }
10485
10486
    /**
10487
     * Search a string for any of a set of characters.
10488
     *
10489
     * EXAMPLE: <code>UTF8::strpbrk('-中文空白-', '白'); // '白-'</code>
10490
     *
10491
     * @see http://php.net/manual/en/function.strpbrk.php
10492
     *
10493
     * @param string $haystack  <p>The string where char_list is looked for.</p>
10494
     * @param string $char_list <p>This parameter is case-sensitive.</p>
10495
     *
10496
     * @psalm-pure
10497
     *
10498
     * @return false|string
10499
     *                      <p>The string starting from the character found, or false if it is not found.</p>
10500
     */
10501 2
    public static function strpbrk(string $haystack, string $char_list)
10502
    {
10503 2
        if ($haystack === '' || $char_list === '') {
10504 2
            return false;
10505
        }
10506
10507 2
        if (\preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
10508 2
            return \substr($haystack, (int) \strpos($haystack, $m[0]));
10509
        }
10510
10511 2
        return false;
10512
    }
10513
10514
    /**
10515
     * Find the position of the first occurrence of a substring in a string.
10516
     *
10517
     * INFO: use UTF8::strpos_in_byte() for the byte-length
10518
     *
10519
     * EXAMPLE: <code>UTF8::strpos('ABC-ÖÄÜ-中文空白-中文空白', '中'); // 8</code>
10520
     *
10521
     * @see http://php.net/manual/en/function.mb-strpos.php
10522
     *
10523
     * @param string     $haystack   <p>The string from which to get the position of the first occurrence of needle.</p>
10524
     * @param int|string $needle     <p>The string to find in haystack.<br>Or a code point as int.</p>
10525
     * @param int        $offset     [optional] <p>The search offset. If it is not specified, 0 is used.</p>
10526
     * @param string     $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
10527
     * @param bool       $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10528
     *
10529
     * @psalm-pure
10530
     *
10531
     * @return false|int
10532
     *                   The <strong>(int)</strong> numeric position of the first occurrence of needle in the haystack
10533
     *                   string.<br> If needle is not found it returns false.
10534
     */
10535 52
    public static function strpos(
10536
        string $haystack,
10537
        $needle,
10538
        int $offset = 0,
10539
        string $encoding = 'UTF-8',
10540
        bool $clean_utf8 = false
10541
    ) {
10542 52
        if ($haystack === '') {
10543 3
            return false;
10544
        }
10545
10546
        // iconv and mbstring do not support integer $needle
10547 51
        if ((int) $needle === $needle) {
10548
            $needle = (string) self::chr($needle);
10549
        }
10550 51
        $needle = (string) $needle;
10551
10552 51
        if ($needle === '') {
10553 2
            return false;
10554
        }
10555
10556 51
        if ($clean_utf8) {
10557
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10558
            // if invalid characters are found in $haystack before $needle
10559 3
            $needle = self::clean($needle);
10560 3
            $haystack = self::clean($haystack);
10561
        }
10562
10563 51
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10564 10
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10565
        }
10566
10567
        //
10568
        // fallback via mbstring
10569
        //
10570
10571 51
        if (self::$SUPPORT['mbstring'] === true) {
10572 49
            if ($encoding === 'UTF-8') {
10573
                /** @noinspection PhpUsageOfSilenceOperatorInspection - Offset not contained in string */
10574 49
                return @\mb_strpos($haystack, $needle, $offset);
10575
            }
10576
10577
            /** @noinspection PhpUsageOfSilenceOperatorInspection - Offset not contained in string */
10578 2
            return @\mb_strpos($haystack, $needle, $offset, $encoding);
10579
        }
10580
10581
        //
10582
        // fallback for binary || ascii only
10583
        //
10584
        if (
10585 4
            $encoding === 'CP850'
10586
            ||
10587 4
            $encoding === 'ASCII'
10588
        ) {
10589 2
            return \strpos($haystack, $needle, $offset);
10590
        }
10591
10592
        if (
10593 4
            $encoding !== 'UTF-8'
10594
            &&
10595 4
            self::$SUPPORT['iconv'] === false
10596
            &&
10597 4
            self::$SUPPORT['mbstring'] === false
10598
        ) {
10599
            /**
10600
             * @psalm-suppress ImpureFunctionCall - is is only a warning
10601
             */
10602 2
            \trigger_error('UTF8::strpos() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10603
        }
10604
10605
        //
10606
        // fallback via intl
10607
        //
10608
10609
        if (
10610 4
            $encoding === 'UTF-8' // INFO: "grapheme_strpos()" can't handle other encodings
10611
            &&
10612 4
            $offset >= 0 // grapheme_strpos() can't handle negative offset
10613
            &&
10614 4
            self::$SUPPORT['intl'] === true
10615
        ) {
10616
            $return_tmp = \grapheme_strpos($haystack, $needle, $offset);
10617
            if ($return_tmp !== false) {
10618
                return $return_tmp;
10619
            }
10620
        }
10621
10622
        //
10623
        // fallback via iconv
10624
        //
10625
10626
        if (
10627 4
            $offset >= 0 // iconv_strpos() can't handle negative offset
10628
            &&
10629 4
            self::$SUPPORT['iconv'] === true
10630
        ) {
10631
            // ignore invalid negative offset to keep compatibility
10632
            // with php < 5.5.35, < 5.6.21, < 7.0.6
10633
            $return_tmp = \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
10634
            if ($return_tmp !== false) {
10635
                return $return_tmp;
10636
            }
10637
        }
10638
10639
        //
10640
        // fallback for ascii only
10641
        //
10642
10643 4
        if (ASCII::is_ascii($haystack . $needle)) {
10644
            /** @noinspection PhpUsageOfSilenceOperatorInspection - Offset not contained in string */
10645 2
            return @\strpos($haystack, $needle, $offset);
10646
        }
10647
10648
        //
10649
        // fallback via vanilla php
10650
        //
10651
10652 4
        $haystack_tmp = self::substr($haystack, $offset, null, $encoding);
10653 4
        if ($haystack_tmp === false) {
10654
            $haystack_tmp = '';
10655
        }
10656 4
        $haystack = (string) $haystack_tmp;
10657
10658 4
        if ($offset < 0) {
10659
            $offset = 0;
10660
        }
10661
10662 4
        $pos = \strpos($haystack, $needle);
10663 4
        if ($pos === false) {
10664 2
            return false;
10665
        }
10666
10667 4
        if ($pos) {
10668 4
            return $offset + (int) self::strlen(\substr($haystack, 0, $pos), $encoding);
10669
        }
10670
10671 2
        return $offset + 0;
10672
    }
10673
10674
    /**
10675
     * Find the position of the first occurrence of a substring in a string.
10676
     *
10677
     * @param string $haystack <p>
10678
     *                         The string being checked.
10679
     *                         </p>
10680
     * @param string $needle   <p>
10681
     *                         The position counted from the beginning of haystack.
10682
     *                         </p>
10683
     * @param int    $offset   [optional] <p>
10684
     *                         The search offset. If it is not specified, 0 is used.
10685
     *                         </p>
10686
     *
10687
     * @psalm-pure
10688
     *
10689
     * @return false|int
10690
     *                   <p>The numeric position of the first occurrence of needle in the
10691
     *                   haystack string. If needle is not found, it returns false.</p>
10692
     */
10693 2
    public static function strpos_in_byte(string $haystack, string $needle, int $offset = 0)
10694
    {
10695 2
        if ($haystack === '' || $needle === '') {
10696
            return false;
10697
        }
10698
10699 2
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10700
            // "mb_" is available if overload is used, so use it ...
10701
            return \mb_strpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
10702
        }
10703
10704 2
        return \strpos($haystack, $needle, $offset);
10705
    }
10706
10707
    /**
10708
     * Find the position of the first occurrence of a substring in a string, case-insensitive.
10709
     *
10710
     * @param string $haystack <p>
10711
     *                         The string being checked.
10712
     *                         </p>
10713
     * @param string $needle   <p>
10714
     *                         The position counted from the beginning of haystack.
10715
     *                         </p>
10716
     * @param int    $offset   [optional] <p>
10717
     *                         The search offset. If it is not specified, 0 is used.
10718
     *                         </p>
10719
     *
10720
     * @psalm-pure
10721
     *
10722
     * @return false|int
10723
     *                   <p>The numeric position of the first occurrence of needle in the
10724
     *                   haystack string. If needle is not found, it returns false.</p>
10725
     */
10726 2
    public static function stripos_in_byte(string $haystack, string $needle, int $offset = 0)
10727
    {
10728 2
        if ($haystack === '' || $needle === '') {
10729
            return false;
10730
        }
10731
10732 2
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10733
            // "mb_" is available if overload is used, so use it ...
10734
            return \mb_stripos($haystack, $needle, $offset, 'CP850'); // 8-BIT
10735
        }
10736
10737 2
        return \stripos($haystack, $needle, $offset);
10738
    }
10739
10740
    /**
10741
     * Find the last occurrence of a character in a string within another.
10742
     *
10743
     * EXAMPLE: <code>UTF8::strrchr('κόσμεκόσμε-äöü', 'κόσμε'); // 'κόσμε-äöü'</code>
10744
     *
10745
     * @see http://php.net/manual/en/function.mb-strrchr.php
10746
     *
10747
     * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
10748
     * @param string $needle        <p>The string to find in haystack</p>
10749
     * @param bool   $before_needle [optional] <p>
10750
     *                              Determines which portion of haystack
10751
     *                              this function returns.
10752
     *                              If set to true, it returns all of haystack
10753
     *                              from the beginning to the last occurrence of needle.
10754
     *                              If set to false, it returns all of haystack
10755
     *                              from the last occurrence of needle to the end,
10756
     *                              </p>
10757
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
10758
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
10759
     *
10760
     * @psalm-pure
10761
     *
10762
     * @return false|string
10763
     *                      <p>The portion of haystack or false if needle is not found.</p>
10764
     */
10765 2
    public static function strrchr(
10766
        string $haystack,
10767
        string $needle,
10768
        bool $before_needle = false,
10769
        string $encoding = 'UTF-8',
10770
        bool $clean_utf8 = false
10771
    ) {
10772 2
        if ($haystack === '' || $needle === '') {
10773 2
            return false;
10774
        }
10775
10776 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10777 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10778
        }
10779
10780 2
        if ($clean_utf8) {
10781
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10782
            // if invalid characters are found in $haystack before $needle
10783 2
            $needle = self::clean($needle);
10784 2
            $haystack = self::clean($haystack);
10785
        }
10786
10787
        //
10788
        // fallback via mbstring
10789
        //
10790
10791 2
        if (self::$SUPPORT['mbstring'] === true) {
10792 2
            if ($encoding === 'UTF-8') {
10793 2
                return \mb_strrchr($haystack, $needle, $before_needle);
10794
            }
10795
10796 2
            return \mb_strrchr($haystack, $needle, $before_needle, $encoding);
10797
        }
10798
10799
        //
10800
        // fallback for binary || ascii only
10801
        //
10802
10803
        if (
10804
            !$before_needle
10805
            &&
10806
            (
10807
                $encoding === 'CP850'
10808
                ||
10809
                $encoding === 'ASCII'
10810
            )
10811
        ) {
10812
            return \strrchr($haystack, $needle);
10813
        }
10814
10815
        if (
10816
            $encoding !== 'UTF-8'
10817
            &&
10818
            self::$SUPPORT['mbstring'] === false
10819
        ) {
10820
            /**
10821
             * @psalm-suppress ImpureFunctionCall - is is only a warning
10822
             */
10823
            \trigger_error('UTF8::strrchr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10824
        }
10825
10826
        //
10827
        // fallback via iconv
10828
        //
10829
10830
        if (self::$SUPPORT['iconv'] === true) {
10831
            $needle_tmp = self::substr($needle, 0, 1, $encoding);
10832
            if ($needle_tmp === false) {
10833
                return false;
10834
            }
10835
            $needle = (string) $needle_tmp;
10836
10837
            $pos = \iconv_strrpos($haystack, $needle, $encoding);
10838
            if ($pos === false) {
10839
                return false;
10840
            }
10841
10842
            if ($before_needle) {
10843
                return self::substr($haystack, 0, $pos, $encoding);
10844
            }
10845
10846
            return self::substr($haystack, $pos, null, $encoding);
10847
        }
10848
10849
        //
10850
        // fallback via vanilla php
10851
        //
10852
10853
        $needle_tmp = self::substr($needle, 0, 1, $encoding);
10854
        if ($needle_tmp === false) {
10855
            return false;
10856
        }
10857
        $needle = (string) $needle_tmp;
10858
10859
        $pos = self::strrpos($haystack, $needle, 0, $encoding);
10860
        if ($pos === false) {
10861
            return false;
10862
        }
10863
10864
        if ($before_needle) {
10865
            return self::substr($haystack, 0, $pos, $encoding);
10866
        }
10867
10868
        return self::substr($haystack, $pos, null, $encoding);
10869
    }
10870
10871
    /**
10872
     * Reverses characters order in the string.
10873
     *
10874
     * EXAMPLE: <code>UTF8::strrev('κ-öäü'); // 'üäö-κ'</code>
10875
     *
10876
     * @param string $str      <p>The input string.</p>
10877
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
10878
     *
10879
     * @psalm-pure
10880
     *
10881
     * @return string
10882
     *                <p>The string with characters in the reverse sequence.</p>
10883
     */
10884 10
    public static function strrev(string $str, string $encoding = 'UTF-8'): string
10885
    {
10886 10
        if ($str === '') {
10887 4
            return '';
10888
        }
10889
10890
        // init
10891 8
        $reversed = '';
10892
10893 8
        $str = self::emoji_encode($str, true);
10894
10895 8
        if ($encoding === 'UTF-8') {
10896 8
            if (self::$SUPPORT['intl'] === true) {
10897
                // try "grapheme" first: https://stackoverflow.com/questions/17496493/strrev-dosent-support-utf-8
10898 8
                $i = (int) \grapheme_strlen($str);
10899 8
                while ($i--) {
10900 8
                    $reversed_tmp = \grapheme_substr($str, $i, 1);
10901 8
                    if ($reversed_tmp !== false) {
10902 8
                        $reversed .= $reversed_tmp;
10903
                    }
10904
                }
10905
            } else {
10906
                $i = (int) \mb_strlen($str);
10907 8
                while ($i--) {
10908
                    $reversed_tmp = \mb_substr($str, $i, 1);
10909
                    if ($reversed_tmp !== false) {
10910
                        $reversed .= $reversed_tmp;
10911
                    }
10912
                }
10913
            }
10914
        } else {
10915
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10916
10917
            $i = (int) self::strlen($str, $encoding);
10918
            while ($i--) {
10919
                $reversed_tmp = self::substr($str, $i, 1, $encoding);
10920
                if ($reversed_tmp !== false) {
10921
                    $reversed .= $reversed_tmp;
10922
                }
10923
            }
10924
        }
10925
10926 8
        return self::emoji_decode($reversed, true);
10927
    }
10928
10929
    /**
10930
     * Find the last occurrence of a character in a string within another, case-insensitive.
10931
     *
10932
     * EXAMPLE: <code>UTF8::strrichr('Aκόσμεκόσμε-äöü', 'aκόσμε'); // 'Aκόσμεκόσμε-äöü'</code>
10933
     *
10934
     * @see http://php.net/manual/en/function.mb-strrichr.php
10935
     *
10936
     * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
10937
     * @param string $needle        <p>The string to find in haystack.</p>
10938
     * @param bool   $before_needle [optional] <p>
10939
     *                              Determines which portion of haystack
10940
     *                              this function returns.
10941
     *                              If set to true, it returns all of haystack
10942
     *                              from the beginning to the last occurrence of needle.
10943
     *                              If set to false, it returns all of haystack
10944
     *                              from the last occurrence of needle to the end,
10945
     *                              </p>
10946
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
10947
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
10948
     *
10949
     * @psalm-pure
10950
     *
10951
     * @return false|string
10952
     *                      <p>The portion of haystack or<br>false if needle is not found.</p>
10953
     */
10954 3
    public static function strrichr(
10955
        string $haystack,
10956
        string $needle,
10957
        bool $before_needle = false,
10958
        string $encoding = 'UTF-8',
10959
        bool $clean_utf8 = false
10960
    ) {
10961 3
        if ($haystack === '' || $needle === '') {
10962 2
            return false;
10963
        }
10964
10965 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10966 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10967
        }
10968
10969 3
        if ($clean_utf8) {
10970
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10971
            // if invalid characters are found in $haystack before $needle
10972 2
            $needle = self::clean($needle);
10973 2
            $haystack = self::clean($haystack);
10974
        }
10975
10976
        //
10977
        // fallback via mbstring
10978
        //
10979
10980 3
        if (self::$SUPPORT['mbstring'] === true) {
10981 3
            if ($encoding === 'UTF-8') {
10982 3
                return \mb_strrichr($haystack, $needle, $before_needle);
10983
            }
10984
10985 2
            return \mb_strrichr($haystack, $needle, $before_needle, $encoding);
10986
        }
10987
10988
        //
10989
        // fallback via vanilla php
10990
        //
10991
10992
        $needle_tmp = self::substr($needle, 0, 1, $encoding);
10993
        if ($needle_tmp === false) {
10994
            return false;
10995
        }
10996
        $needle = (string) $needle_tmp;
10997
10998
        $pos = self::strripos($haystack, $needle, 0, $encoding);
10999
        if ($pos === false) {
11000
            return false;
11001
        }
11002
11003
        if ($before_needle) {
11004
            return self::substr($haystack, 0, $pos, $encoding);
11005
        }
11006
11007
        return self::substr($haystack, $pos, null, $encoding);
11008
    }
11009
11010
    /**
11011
     * Find the position of the last occurrence of a substring in a string, case-insensitive.
11012
     *
11013
     * EXAMPLE: <code>UTF8::strripos('ABC-ÖÄÜ-中文空白-中文空白', '中'); // 13</code>
11014
     *
11015
     * @param string     $haystack   <p>The string to look in.</p>
11016
     * @param int|string $needle     <p>The string to look for.</p>
11017
     * @param int        $offset     [optional] <p>Number of characters to ignore in the beginning or end.</p>
11018
     * @param string     $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
11019
     * @param bool       $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
11020
     *
11021
     * @psalm-pure
11022
     *
11023
     * @return false|int
11024
     *                   <p>The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
11025
     *                   string.<br>If needle is not found, it returns false.</p>
11026
     */
11027 14
    public static function strripos(
11028
        string $haystack,
11029
        $needle,
11030
        int $offset = 0,
11031
        string $encoding = 'UTF-8',
11032
        bool $clean_utf8 = false
11033
    ) {
11034 14
        if ($haystack === '') {
11035
            return false;
11036
        }
11037
11038
        // iconv and mbstring do not support integer $needle
11039 14
        if ((int) $needle === $needle && $needle >= 0) {
11040
            $needle = (string) self::chr($needle);
11041
        }
11042 14
        $needle = (string) $needle;
11043
11044 14
        if ($needle === '') {
11045
            return false;
11046
        }
11047
11048 14
        if ($clean_utf8) {
11049
            // mb_strripos() && iconv_strripos() is not tolerant to invalid characters
11050 3
            $needle = self::clean($needle);
11051 3
            $haystack = self::clean($haystack);
11052
        }
11053
11054 14
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
11055 9
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11056
        }
11057
11058
        //
11059
        // fallback via mbstrig
11060
        //
11061
11062 14
        if (self::$SUPPORT['mbstring'] === true) {
11063 14
            if ($encoding === 'UTF-8') {
11064 14
                return \mb_strripos($haystack, $needle, $offset);
11065
            }
11066
11067
            return \mb_strripos($haystack, $needle, $offset, $encoding);
11068
        }
11069
11070
        //
11071
        // fallback for binary || ascii only
11072
        //
11073
11074
        if (
11075
            $encoding === 'CP850'
11076
            ||
11077
            $encoding === 'ASCII'
11078
        ) {
11079
            return \strripos($haystack, $needle, $offset);
11080
        }
11081
11082
        if (
11083
            $encoding !== 'UTF-8'
11084
            &&
11085
            self::$SUPPORT['mbstring'] === false
11086
        ) {
11087
            /**
11088
             * @psalm-suppress ImpureFunctionCall - is is only a warning
11089
             */
11090
            \trigger_error('UTF8::strripos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
11091
        }
11092
11093
        //
11094
        // fallback via intl
11095
        //
11096
11097
        if (
11098
            $encoding === 'UTF-8' // INFO: "grapheme_strripos()" can't handle other encodings
11099
            &&
11100
            $offset >= 0 // grapheme_strripos() can't handle negative offset
11101
            &&
11102
            self::$SUPPORT['intl'] === true
11103
        ) {
11104
            $return_tmp = \grapheme_strripos($haystack, $needle, $offset);
11105
            if ($return_tmp !== false) {
11106
                return $return_tmp;
11107
            }
11108
        }
11109
11110
        //
11111
        // fallback for ascii only
11112
        //
11113
11114
        if (ASCII::is_ascii($haystack . $needle)) {
11115
            return \strripos($haystack, $needle, $offset);
11116
        }
11117
11118
        //
11119
        // fallback via vanilla php
11120
        //
11121
11122
        $haystack = self::strtocasefold($haystack, true, false, $encoding);
11123
        $needle = self::strtocasefold($needle, true, false, $encoding);
11124
11125
        return self::strrpos($haystack, $needle, $offset, $encoding, $clean_utf8);
11126
    }
11127
11128
    /**
11129
     * Finds position of last occurrence of a string within another, case-insensitive.
11130
     *
11131
     * @param string $haystack <p>
11132
     *                         The string from which to get the position of the last occurrence
11133
     *                         of needle.
11134
     *                         </p>
11135
     * @param string $needle   <p>
11136
     *                         The string to find in haystack.
11137
     *                         </p>
11138
     * @param int    $offset   [optional] <p>
11139
     *                         The position in haystack
11140
     *                         to start searching.
11141
     *                         </p>
11142
     *
11143
     * @psalm-pure
11144
     *
11145
     * @return false|int
11146
     *                   <p>eturn the numeric position of the last occurrence of needle in the
11147
     *                   haystack string, or false if needle is not found.</p>
11148
     */
11149 2
    public static function strripos_in_byte(string $haystack, string $needle, int $offset = 0)
11150
    {
11151 2
        if ($haystack === '' || $needle === '') {
11152
            return false;
11153
        }
11154
11155 2
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
11156
            // "mb_" is available if overload is used, so use it ...
11157
            return \mb_strripos($haystack, $needle, $offset, 'CP850'); // 8-BIT
11158
        }
11159
11160 2
        return \strripos($haystack, $needle, $offset);
11161
    }
11162
11163
    /**
11164
     * Find the position of the last occurrence of a substring in a string.
11165
     *
11166
     * EXAMPLE: <code>UTF8::strrpos('ABC-ÖÄÜ-中文空白-中文空白', '中'); // 13</code>
11167
     *
11168
     * @see http://php.net/manual/en/function.mb-strrpos.php
11169
     *
11170
     * @param string     $haystack   <p>The string being checked, for the last occurrence of needle</p>
11171
     * @param int|string $needle     <p>The string to find in haystack.<br>Or a code point as int.</p>
11172
     * @param int        $offset     [optional] <p>May be specified to begin searching an arbitrary number of characters
11173
     *                               into the string. Negative values will stop searching at an arbitrary point prior to
11174
     *                               the end of the string.
11175
     *                               </p>
11176
     * @param string     $encoding   [optional] <p>Set the charset.</p>
11177
     * @param bool       $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
11178
     *
11179
     * @psalm-pure
11180
     *
11181
     * @return false|int
11182
     *                   <p>The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
11183
     *                   string.<br>If needle is not found, it returns false.</p>
11184
     */
11185 35
    public static function strrpos(
11186
        string $haystack,
11187
        $needle,
11188
        int $offset = 0,
11189
        string $encoding = 'UTF-8',
11190
        bool $clean_utf8 = false
11191
    ) {
11192 35
        if ($haystack === '') {
11193 3
            return false;
11194
        }
11195
11196
        // iconv and mbstring do not support integer $needle
11197 34
        if ((int) $needle === $needle && $needle >= 0) {
11198 1
            $needle = (string) self::chr($needle);
11199
        }
11200 34
        $needle = (string) $needle;
11201
11202 34
        if ($needle === '') {
11203 2
            return false;
11204
        }
11205
11206 34
        if ($clean_utf8) {
11207
            // mb_strrpos && iconv_strrpos is not tolerant to invalid characters
11208 4
            $needle = self::clean($needle);
11209 4
            $haystack = self::clean($haystack);
11210
        }
11211
11212 34
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
11213 8
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11214
        }
11215
11216
        //
11217
        // fallback via mbstring
11218
        //
11219
11220 34
        if (self::$SUPPORT['mbstring'] === true) {
11221 34
            if ($encoding === 'UTF-8') {
11222 34
                return \mb_strrpos($haystack, $needle, $offset);
11223
            }
11224
11225 2
            return \mb_strrpos($haystack, $needle, $offset, $encoding);
11226
        }
11227
11228
        //
11229
        // fallback for binary || ascii only
11230
        //
11231
11232
        if (
11233
            $encoding === 'CP850'
11234
            ||
11235
            $encoding === 'ASCII'
11236
        ) {
11237
            return \strrpos($haystack, $needle, $offset);
11238
        }
11239
11240
        if (
11241
            $encoding !== 'UTF-8'
11242
            &&
11243
            self::$SUPPORT['mbstring'] === false
11244
        ) {
11245
            /**
11246
             * @psalm-suppress ImpureFunctionCall - is is only a warning
11247
             */
11248
            \trigger_error('UTF8::strrpos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
11249
        }
11250
11251
        //
11252
        // fallback via intl
11253
        //
11254
11255
        if (
11256
            $offset >= 0 // grapheme_strrpos() can't handle negative offset
11257
            &&
11258
            $encoding === 'UTF-8' // INFO: "grapheme_strrpos()" can't handle other encodings
11259
            &&
11260
            self::$SUPPORT['intl'] === true
11261
        ) {
11262
            $return_tmp = \grapheme_strrpos($haystack, $needle, $offset);
11263
            if ($return_tmp !== false) {
11264
                return $return_tmp;
11265
            }
11266
        }
11267
11268
        //
11269
        // fallback for ascii only
11270
        //
11271
11272
        if (ASCII::is_ascii($haystack . $needle)) {
11273
            return \strrpos($haystack, $needle, $offset);
11274
        }
11275
11276
        //
11277
        // fallback via vanilla php
11278
        //
11279
11280
        $haystack_tmp = null;
11281
        if ($offset > 0) {
11282
            $haystack_tmp = self::substr($haystack, $offset);
11283
        } elseif ($offset < 0) {
11284
            $haystack_tmp = self::substr($haystack, 0, $offset);
11285
            $offset = 0;
11286
        }
11287
11288
        if ($haystack_tmp !== null) {
11289
            if ($haystack_tmp === false) {
11290
                $haystack_tmp = '';
11291
            }
11292
            $haystack = (string) $haystack_tmp;
11293
        }
11294
11295
        $pos = \strrpos($haystack, $needle);
11296
        if ($pos === false) {
11297
            return false;
11298
        }
11299
11300
        /** @var false|string $str_tmp - needed for PhpStan (stubs error) */
11301
        $str_tmp = \substr($haystack, 0, $pos);
11302
        if ($str_tmp === false) {
11303
            return false;
11304
        }
11305
11306
        return $offset + (int) self::strlen($str_tmp);
11307
    }
11308
11309
    /**
11310
     * Find the position of the last occurrence of a substring in a string.
11311
     *
11312
     * @param string $haystack <p>
11313
     *                         The string being checked, for the last occurrence
11314
     *                         of needle.
11315
     *                         </p>
11316
     * @param string $needle   <p>
11317
     *                         The string to find in haystack.
11318
     *                         </p>
11319
     * @param int    $offset   [optional] <p>May be specified to begin searching an arbitrary number of characters into
11320
     *                         the string. Negative values will stop searching at an arbitrary point
11321
     *                         prior to the end of the string.
11322
     *                         </p>
11323
     *
11324
     * @psalm-pure
11325
     *
11326
     * @return false|int
11327
     *                   <p>The numeric position of the last occurrence of needle in the
11328
     *                   haystack string. If needle is not found, it returns false.</p>
11329
     */
11330 2
    public static function strrpos_in_byte(string $haystack, string $needle, int $offset = 0)
11331
    {
11332 2
        if ($haystack === '' || $needle === '') {
11333
            return false;
11334
        }
11335
11336 2
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
11337
            // "mb_" is available if overload is used, so use it ...
11338
            return \mb_strrpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
11339
        }
11340
11341 2
        return \strrpos($haystack, $needle, $offset);
11342
    }
11343
11344
    /**
11345
     * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
11346
     * mask.
11347
     *
11348
     * EXAMPLE: <code>UTF8::strspn('iñtërnâtiônàlizætiøn', 'itñ'); // '3'</code>
11349
     *
11350
     * @param string   $str      <p>The input string.</p>
11351
     * @param string   $mask     <p>The mask of chars</p>
11352
     * @param int      $offset   [optional]
11353
     * @param int|null $length   [optional]
11354
     * @param string   $encoding [optional] <p>Set the charset.</p>
11355
     *
11356
     * @psalm-pure
11357
     *
11358
     * @return false|int
11359
     */
11360 10
    public static function strspn(
11361
        string $str,
11362
        string $mask,
11363
        int $offset = 0,
11364
        int $length = null,
11365
        string $encoding = 'UTF-8'
11366
    ) {
11367 10
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
11368
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11369
        }
11370
11371 10
        if ($offset || $length !== null) {
11372 2
            if ($encoding === 'UTF-8') {
11373 2
                if ($length === null) {
11374
                    $str = (string) \mb_substr($str, $offset);
11375
                } else {
11376 2
                    $str = (string) \mb_substr($str, $offset, $length);
11377
                }
11378
            } else {
11379
                $str = (string) self::substr($str, $offset, $length, $encoding);
11380
            }
11381
        }
11382
11383 10
        if ($str === '' || $mask === '') {
11384 2
            return 0;
11385
        }
11386
11387 8
        $matches = [];
11388
11389 8
        return \preg_match('/^' . self::rxClass($mask) . '+/u', $str, $matches) ? (int) self::strlen($matches[0], $encoding) : 0;
11390
    }
11391
11392
    /**
11393
     * Returns part of haystack string from the first occurrence of needle to the end of haystack.
11394
     *
11395
     * EXAMPLE: <code>
11396
     * $str = 'iñtërnâtiônàlizætiøn';
11397
     * $search = 'nât';
11398
     *
11399
     * UTF8::strstr($str, $search)); // 'nâtiônàlizætiøn'
11400
     * UTF8::strstr($str, $search, true)); // 'iñtër'
11401
     * </code>
11402
     *
11403
     * @param string $haystack      <p>The input string. Must be valid UTF-8.</p>
11404
     * @param string $needle        <p>The string to look for. Must be valid UTF-8.</p>
11405
     * @param bool   $before_needle [optional] <p>
11406
     *                              If <b>TRUE</b>, strstr() returns the part of the
11407
     *                              haystack before the first occurrence of the needle (excluding the needle).
11408
     *                              </p>
11409
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
11410
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
11411
     *
11412
     * @psalm-pure
11413
     *
11414
     * @return false|string
11415
     *                      <p>A sub-string,<br>or <strong>false</strong> if needle is not found.</p>
11416
     */
11417 3
    public static function strstr(
11418
        string $haystack,
11419
        string $needle,
11420
        bool $before_needle = false,
11421
        string $encoding = 'UTF-8',
11422
        bool $clean_utf8 = false
11423
    ) {
11424 3
        if ($haystack === '' || $needle === '') {
11425 2
            return false;
11426
        }
11427
11428 3
        if ($clean_utf8) {
11429
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11430
            // if invalid characters are found in $haystack before $needle
11431
            $needle = self::clean($needle);
11432
            $haystack = self::clean($haystack);
11433
        }
11434
11435 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
11436 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11437
        }
11438
11439
        //
11440
        // fallback via mbstring
11441
        //
11442
11443 3
        if (self::$SUPPORT['mbstring'] === true) {
11444 3
            if ($encoding === 'UTF-8') {
11445 3
                return \mb_strstr($haystack, $needle, $before_needle);
11446
            }
11447
11448 2
            return \mb_strstr($haystack, $needle, $before_needle, $encoding);
11449
        }
11450
11451
        //
11452
        // fallback for binary || ascii only
11453
        //
11454
11455
        if (
11456
            $encoding === 'CP850'
11457
            ||
11458
            $encoding === 'ASCII'
11459
        ) {
11460
            return \strstr($haystack, $needle, $before_needle);
11461
        }
11462
11463
        if (
11464
            $encoding !== 'UTF-8'
11465
            &&
11466
            self::$SUPPORT['mbstring'] === false
11467
        ) {
11468
            /**
11469
             * @psalm-suppress ImpureFunctionCall - is is only a warning
11470
             */
11471
            \trigger_error('UTF8::strstr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
11472
        }
11473
11474
        //
11475
        // fallback via intl
11476
        //
11477
11478
        if (
11479
            $encoding === 'UTF-8' // INFO: "grapheme_strstr()" can't handle other encodings
11480
            &&
11481
            self::$SUPPORT['intl'] === true
11482
        ) {
11483
            $return_tmp = \grapheme_strstr($haystack, $needle, $before_needle);
11484
            if ($return_tmp !== false) {
11485
                return $return_tmp;
11486
            }
11487
        }
11488
11489
        //
11490
        // fallback for ascii only
11491
        //
11492
11493
        if (ASCII::is_ascii($haystack . $needle)) {
11494
            return \strstr($haystack, $needle, $before_needle);
11495
        }
11496
11497
        //
11498
        // fallback via vanilla php
11499
        //
11500
11501
        \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/us', $haystack, $match);
11502
11503
        if (!isset($match[1])) {
11504
            return false;
11505
        }
11506
11507
        if ($before_needle) {
11508
            return $match[1];
11509
        }
11510
11511
        return self::substr($haystack, (int) self::strlen($match[1]));
11512
    }
11513
11514
    /**
11515
     * Finds first occurrence of a string within another.
11516
     *
11517
     * @param string $haystack      <p>
11518
     *                              The string from which to get the first occurrence
11519
     *                              of needle.
11520
     *                              </p>
11521
     * @param string $needle        <p>
11522
     *                              The string to find in haystack.
11523
     *                              </p>
11524
     * @param bool   $before_needle [optional] <p>
11525
     *                              Determines which portion of haystack
11526
     *                              this function returns.
11527
     *                              If set to true, it returns all of haystack
11528
     *                              from the beginning to the first occurrence of needle.
11529
     *                              If set to false, it returns all of haystack
11530
     *                              from the first occurrence of needle to the end,
11531
     *                              </p>
11532
     *
11533
     * @psalm-pure
11534
     *
11535
     * @return false|string
11536
     *                      <p>The portion of haystack,
11537
     *                      or false if needle is not found.</p>
11538
     */
11539 2
    public static function strstr_in_byte(
11540
        string $haystack,
11541
        string $needle,
11542
        bool $before_needle = false
11543
    ) {
11544 2
        if ($haystack === '' || $needle === '') {
11545
            return false;
11546
        }
11547
11548 2
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
11549
            // "mb_" is available if overload is used, so use it ...
11550
            return \mb_strstr($haystack, $needle, $before_needle, 'CP850'); // 8-BIT
11551
        }
11552
11553 2
        return \strstr($haystack, $needle, $before_needle);
11554
    }
11555
11556
    /**
11557
     * Unicode transformation for case-less matching.
11558
     *
11559
     * EXAMPLE: <code>UTF8::strtocasefold('ǰ◌̱'); // 'ǰ◌̱'</code>
11560
     *
11561
     * @see http://unicode.org/reports/tr21/tr21-5.html
11562
     *
11563
     * @param string      $str        <p>The input string.</p>
11564
     * @param bool        $full       [optional] <p>
11565
     *                                <b>true</b>, replace full case folding chars (default)<br>
11566
     *                                <b>false</b>, use only limited static array [UTF8::$COMMON_CASE_FOLD]
11567
     *                                </p>
11568
     * @param bool        $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
11569
     * @param string      $encoding   [optional] <p>Set the charset.</p>
11570
     * @param string|null $lang       [optional] <p>Set the language for special cases: az, el, lt, tr</p>
11571
     * @param bool        $lower      [optional] <p>Use lowercase string, otherwise use uppercase string. PS: uppercase
11572
     *                                is for some languages better ...</p>
11573
     *
11574
     * @psalm-pure
11575
     *
11576
     * @return string
11577
     */
11578 32
    public static function strtocasefold(
11579
        string $str,
11580
        bool $full = true,
11581
        bool $clean_utf8 = false,
11582
        string $encoding = 'UTF-8',
11583
        string $lang = null,
11584
        bool $lower = true
11585
    ): string {
11586 32
        if ($str === '') {
11587 5
            return '';
11588
        }
11589
11590 31
        if ($clean_utf8) {
11591
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11592
            // if invalid characters are found in $haystack before $needle
11593 2
            $str = self::clean($str);
11594
        }
11595
11596 31
        $str = self::fixStrCaseHelper($str, $lower, $full);
11597
11598 31
        if ($lang === null && $encoding === 'UTF-8') {
11599 31
            if ($lower) {
11600 2
                return \mb_strtolower($str);
11601
            }
11602
11603 29
            return \mb_strtoupper($str);
11604
        }
11605
11606 2
        if ($lower) {
11607
            return self::strtolower($str, $encoding, false, $lang);
11608
        }
11609
11610 2
        return self::strtoupper($str, $encoding, false, $lang);
11611
    }
11612
11613
    /**
11614
     * Make a string lowercase.
11615
     *
11616
     * EXAMPLE: <code>UTF8::strtolower('DÉJÀ Σσς Iıİi'); // 'déjà σσς iıii'</code>
11617
     *
11618
     * @see http://php.net/manual/en/function.mb-strtolower.php
11619
     *
11620
     * @param string      $str                           <p>The string being lowercased.</p>
11621
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
11622
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
11623
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
11624
     *                                                   tr</p>
11625
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
11626
     *                                                   -> ß</p>
11627
     *
11628
     * @psalm-pure
11629
     *
11630
     * @return string
11631
     *                <p>String with all alphabetic characters converted to lowercase.</p>
11632
     */
11633 73
    public static function strtolower(
11634
        $str,
11635
        string $encoding = 'UTF-8',
11636
        bool $clean_utf8 = false,
11637
        string $lang = null,
11638
        bool $try_to_keep_the_string_length = false
11639
    ): string {
11640
        // init
11641 73
        $str = (string) $str;
11642
11643 73
        if ($str === '') {
11644 1
            return '';
11645
        }
11646
11647 72
        if ($clean_utf8) {
11648
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11649
            // if invalid characters are found in $haystack before $needle
11650 2
            $str = self::clean($str);
11651
        }
11652
11653
        // hack for old php version or for the polyfill ...
11654 72
        if ($try_to_keep_the_string_length) {
11655
            $str = self::fixStrCaseHelper($str, true);
11656
        }
11657
11658 72
        if ($lang === null && $encoding === 'UTF-8') {
11659 13
            return \mb_strtolower($str);
11660
        }
11661
11662 61
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
11663
11664 61
        if ($lang !== null) {
11665 2
            if (self::$SUPPORT['intl'] === true) {
11666 2
                if (self::$INTL_TRANSLITERATOR_LIST === null) {
11667
                    self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
11668
                }
11669
11670 2
                $language_code = $lang . '-Lower';
11671 2
                if (!\in_array($language_code, self::$INTL_TRANSLITERATOR_LIST, true)) {
11672
                    /**
11673
                     * @psalm-suppress ImpureFunctionCall - is is only a warning
11674
                     */
11675
                    \trigger_error('UTF8::strtolower() cannot handle special language: ' . $lang . ' | supported: ' . \print_r(self::$INTL_TRANSLITERATOR_LIST, true), \E_USER_WARNING);
11676
11677
                    $language_code = 'Any-Lower';
11678
                }
11679
11680
                /** @noinspection PhpComposerExtensionStubsInspection */
11681
                /** @noinspection UnnecessaryCastingInspection */
11682 2
                return (string) \transliterator_transliterate($language_code, $str);
11683
            }
11684
11685
            /**
11686
             * @psalm-suppress ImpureFunctionCall - is is only a warning
11687
             */
11688
            \trigger_error('UTF8::strtolower() without intl cannot handle the "lang" parameter: ' . $lang, \E_USER_WARNING);
11689
        }
11690
11691
        // always fallback via symfony polyfill
11692 61
        return \mb_strtolower($str, $encoding);
11693
    }
11694
11695
    /**
11696
     * Make a string uppercase.
11697
     *
11698
     * EXAMPLE: <code>UTF8::strtoupper('Déjà Σσς Iıİi'); // 'DÉJÀ ΣΣΣ IIİI'</code>
11699
     *
11700
     * @see http://php.net/manual/en/function.mb-strtoupper.php
11701
     *
11702
     * @param string      $str                           <p>The string being uppercased.</p>
11703
     * @param string      $encoding                      [optional] <p>Set the charset.</p>
11704
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
11705
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
11706
     *                                                   tr</p>
11707
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
11708
     *                                                   -> ß</p>
11709
     *
11710
     * @psalm-pure
11711
     *
11712
     * @return string
11713
     *                <p>String with all alphabetic characters converted to uppercase.</p>
11714
     */
11715 17
    public static function strtoupper(
11716
        $str,
11717
        string $encoding = 'UTF-8',
11718
        bool $clean_utf8 = false,
11719
        string $lang = null,
11720
        bool $try_to_keep_the_string_length = false
11721
    ): string {
11722
        // init
11723 17
        $str = (string) $str;
11724
11725 17
        if ($str === '') {
11726 1
            return '';
11727
        }
11728
11729 16
        if ($clean_utf8) {
11730
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11731
            // if invalid characters are found in $haystack before $needle
11732 2
            $str = self::clean($str);
11733
        }
11734
11735
        // hack for old php version or for the polyfill ...
11736 16
        if ($try_to_keep_the_string_length) {
11737 2
            $str = self::fixStrCaseHelper($str);
11738
        }
11739
11740 16
        if ($lang === null && $encoding === 'UTF-8') {
11741 8
            return \mb_strtoupper($str);
11742
        }
11743
11744 10
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
11745
11746 10
        if ($lang !== null) {
11747 2
            if (self::$SUPPORT['intl'] === true) {
11748 2
                if (self::$INTL_TRANSLITERATOR_LIST === null) {
11749
                    self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
11750
                }
11751
11752 2
                $language_code = $lang . '-Upper';
11753 2
                if (!\in_array($language_code, self::$INTL_TRANSLITERATOR_LIST, true)) {
11754
                    /**
11755
                     * @psalm-suppress ImpureFunctionCall - is is only a warning
11756
                     */
11757
                    \trigger_error('UTF8::strtoupper() without intl for special language: ' . $lang, \E_USER_WARNING);
11758
11759
                    $language_code = 'Any-Upper';
11760
                }
11761
11762
                /** @noinspection PhpComposerExtensionStubsInspection */
11763
                /** @noinspection UnnecessaryCastingInspection */
11764 2
                return (string) \transliterator_transliterate($language_code, $str);
11765
            }
11766
11767
            /**
11768
             * @psalm-suppress ImpureFunctionCall - is is only a warning
11769
             */
11770
            \trigger_error('UTF8::strtolower() without intl cannot handle the "lang"-parameter: ' . $lang, \E_USER_WARNING);
11771
        }
11772
11773
        // always fallback via symfony polyfill
11774 10
        return \mb_strtoupper($str, $encoding);
11775
    }
11776
11777
    /**
11778
     * Translate characters or replace sub-strings.
11779
     *
11780
     * EXAMPLE:
11781
     * <code>
11782
     * $array = [
11783
     *     'Hello'   => '○●◎',
11784
     *     '中文空白' => 'earth',
11785
     * ];
11786
     * UTF8::strtr('Hello 中文空白', $array); // '○●◎ earth'
11787
     * </code>
11788
     *
11789
     * @see http://php.net/manual/en/function.strtr.php
11790
     *
11791
     * @param string          $str  <p>The string being translated.</p>
11792
     * @param string|string[] $from <p>The string replacing from.</p>
11793
     * @param string|string[] $to   [optional] <p>The string being translated to to.</p>
11794
     *
11795
     * @psalm-pure
11796
     *
11797
     * @return string
11798
     *                <p>This function returns a copy of str, translating all occurrences of each character in "from"
11799
     *                to the corresponding character in "to".</p>
11800
     */
11801 2
    public static function strtr(string $str, $from, $to = ''): string
11802
    {
11803 2
        if ($str === '') {
11804
            return '';
11805
        }
11806
11807 2
        if ($from === $to) {
11808
            return $str;
11809
        }
11810
11811 2
        if ($to !== '') {
11812 2
            if (!\is_array($from)) {
11813 2
                $from = self::str_split($from);
11814
            }
11815
11816 2
            if (!\is_array($to)) {
11817 2
                $to = self::str_split($to);
11818
            }
11819
11820 2
            $count_from = \count($from);
11821 2
            $count_to = \count($to);
11822
11823 2
            if ($count_from !== $count_to) {
11824 2
                if ($count_from > $count_to) {
11825 2
                    $from = \array_slice($from, 0, $count_to);
11826 2
                } elseif ($count_from < $count_to) {
11827 2
                    $to = \array_slice($to, 0, $count_from);
11828
                }
11829
            }
11830
11831 2
            $from = \array_combine($from, $to);
11832
            /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
11833 2
            if ($from === false) {
11834
                throw new \InvalidArgumentException('The number of elements for each array isn\'t equal or the arrays are empty: (from: ' . \print_r($from, true) . ' | to: ' . \print_r($to, true) . ')');
11835
            }
11836
        }
11837
11838 2
        if (\is_string($from)) {
11839 2
            return \str_replace($from, $to, $str);
0 ignored issues
show
Bug introduced by
It seems like $to can also be of type array<mixed,array> and array<mixed,string[]>; however, parameter $replace of str_replace() does only seem to accept string|string[], maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

11839
            return \str_replace($from, /** @scrutinizer ignore-type */ $to, $str);
Loading history...
11840
        }
11841
11842 2
        return \strtr($str, $from);
11843
    }
11844
11845
    /**
11846
     * Return the width of a string.
11847
     *
11848
     * INFO: use UTF8::strlen() for the byte-length
11849
     *
11850
     * EXAMPLE: <code>UTF8::strwidth("Iñtërnâtiôn\xE9àlizætiøn")); // 21</code>
11851
     *
11852
     * @param string $str        <p>The input string.</p>
11853
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
11854
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
11855
     *
11856
     * @psalm-pure
11857
     *
11858
     * @return int
11859
     */
11860 2
    public static function strwidth(
11861
        string $str,
11862
        string $encoding = 'UTF-8',
11863
        bool $clean_utf8 = false
11864
    ): int {
11865 2
        if ($str === '') {
11866 2
            return 0;
11867
        }
11868
11869 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
11870 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11871
        }
11872
11873 2
        if ($clean_utf8) {
11874
            // iconv and mbstring are not tolerant to invalid encoding
11875
            // further, their behaviour is inconsistent with that of PHP's substr
11876 2
            $str = self::clean($str);
11877
        }
11878
11879
        //
11880
        // fallback via mbstring
11881
        //
11882
11883 2
        if (self::$SUPPORT['mbstring'] === true) {
11884 2
            if ($encoding === 'UTF-8') {
11885 2
                return \mb_strwidth($str);
11886
            }
11887
11888
            return \mb_strwidth($str, $encoding);
11889
        }
11890
11891
        //
11892
        // fallback via vanilla php
11893
        //
11894
11895
        if ($encoding !== 'UTF-8') {
11896
            $str = self::encode('UTF-8', $str, false, $encoding);
11897
        }
11898
11899
        $wide = 0;
11900
        $str = (string) \preg_replace('/[\x{1100}-\x{115F}\x{2329}\x{232A}\x{2E80}-\x{303E}\x{3040}-\x{A4CF}\x{AC00}-\x{D7A3}\x{F900}-\x{FAFF}\x{FE10}-\x{FE19}\x{FE30}-\x{FE6F}\x{FF00}-\x{FF60}\x{FFE0}-\x{FFE6}\x{20000}-\x{2FFFD}\x{30000}-\x{3FFFD}]/u', '', $str, -1, $wide);
11901
11902
        return ($wide << 1) + (int) self::strlen($str);
11903
    }
11904
11905
    /**
11906
     * Get part of a string.
11907
     *
11908
     * EXAMPLE: <code>UTF8::substr('中文空白', 1, 2); // '文空'</code>
11909
     *
11910
     * @see http://php.net/manual/en/function.mb-substr.php
11911
     *
11912
     * @param string   $str        <p>The string being checked.</p>
11913
     * @param int      $offset     <p>The first position used in str.</p>
11914
     * @param int|null $length     [optional] <p>The maximum length of the returned string.</p>
11915
     * @param string   $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
11916
     * @param bool     $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
11917
     *
11918
     * @psalm-pure
11919
     *
11920
     * @return false|string
11921
     *                      The portion of <i>str</i> specified by the <i>offset</i> and
11922
     *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
11923
     *                      characters long, <b>FALSE</b> will be returned.
11924
     */
11925 172
    public static function substr(
11926
        string $str,
11927
        int $offset = 0,
11928
        int $length = null,
11929
        string $encoding = 'UTF-8',
11930
        bool $clean_utf8 = false
11931
    ) {
11932
        // empty string
11933 172
        if ($str === '' || $length === 0) {
11934 8
            return '';
11935
        }
11936
11937 168
        if ($clean_utf8) {
11938
            // iconv and mbstring are not tolerant to invalid encoding
11939
            // further, their behaviour is inconsistent with that of PHP's substr
11940 2
            $str = self::clean($str);
11941
        }
11942
11943
        // whole string
11944 168
        if (!$offset && $length === null) {
11945 7
            return $str;
11946
        }
11947
11948 163
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
11949 19
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11950
        }
11951
11952
        //
11953
        // fallback via mbstring
11954
        //
11955
11956 163
        if (self::$SUPPORT['mbstring'] === true && $encoding === 'UTF-8') {
11957 161
            if ($length === null) {
11958 64
                return \mb_substr($str, $offset);
11959
            }
11960
11961 102
            return \mb_substr($str, $offset, $length);
11962
        }
11963
11964
        //
11965
        // fallback for binary || ascii only
11966
        //
11967
11968
        if (
11969 4
            $encoding === 'CP850'
11970
            ||
11971 4
            $encoding === 'ASCII'
11972
        ) {
11973
            if ($length === null) {
11974
                return \substr($str, $offset);
11975
            }
11976
11977
            return \substr($str, $offset, $length);
11978
        }
11979
11980
        // otherwise we need the string-length
11981 4
        $str_length = 0;
11982 4
        if ($offset || $length === null) {
11983 4
            $str_length = self::strlen($str, $encoding);
11984
        }
11985
11986
        // e.g.: invalid chars + mbstring not installed
11987 4
        if ($str_length === false) {
11988
            return false;
11989
        }
11990
11991
        // empty string
11992 4
        if ($offset === $str_length && !$length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type integer|null is loosely compared to false; this is ambiguous if the integer can be 0. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
11993
            return '';
11994
        }
11995
11996
        // impossible
11997 4
        if ($offset && $offset > $str_length) {
11998
            return '';
11999
        }
12000
12001 4
        $length = $length ?? (int) $str_length;
12002
12003
        if (
12004 4
            $encoding !== 'UTF-8'
12005
            &&
12006 4
            self::$SUPPORT['mbstring'] === false
12007
        ) {
12008
            /**
12009
             * @psalm-suppress ImpureFunctionCall - is is only a warning
12010
             */
12011 2
            \trigger_error('UTF8::substr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
12012
        }
12013
12014
        //
12015
        // fallback via intl
12016
        //
12017
12018
        if (
12019 4
            $encoding === 'UTF-8' // INFO: "grapheme_substr()" can't handle other encodings
12020
            &&
12021 4
            $offset >= 0 // grapheme_substr() can't handle negative offset
12022
            &&
12023 4
            self::$SUPPORT['intl'] === true
12024
        ) {
12025
            $return_tmp = \grapheme_substr($str, $offset, $length);
12026
            if ($return_tmp !== false) {
12027
                return $return_tmp;
12028
            }
12029
        }
12030
12031
        //
12032
        // fallback via iconv
12033
        //
12034
12035
        if (
12036 4
            $length >= 0 // "iconv_substr()" can't handle negative length
12037
            &&
12038 4
            self::$SUPPORT['iconv'] === true
12039
        ) {
12040
            $return_tmp = \iconv_substr($str, $offset, $length);
12041
            if ($return_tmp !== false) {
12042
                return $return_tmp;
12043
            }
12044
        }
12045
12046
        //
12047
        // fallback for ascii only
12048
        //
12049
12050 4
        if (ASCII::is_ascii($str)) {
12051
            return \substr($str, $offset, $length);
12052
        }
12053
12054
        //
12055
        // fallback via vanilla php
12056
        //
12057
12058
        // split to array, and remove invalid characters
12059 4
        $array = self::str_split($str);
12060
12061
        // extract relevant part, and join to make sting again
12062 4
        return \implode('', \array_slice($array, $offset, $length));
12063
    }
12064
12065
    /**
12066
     * Binary-safe comparison of two strings from an offset, up to a length of characters.
12067
     *
12068
     * EXAMPLE: <code>
12069
     * UTF8::substr_compare("○●◎\r", '●◎', 0, 2); // -1
12070
     * UTF8::substr_compare("○●◎\r", '◎●', 1, 2); // 1
12071
     * UTF8::substr_compare("○●◎\r", '●◎', 1, 2); // 0
12072
     * </code>
12073
     *
12074
     * @param string   $str1               <p>The main string being compared.</p>
12075
     * @param string   $str2               <p>The secondary string being compared.</p>
12076
     * @param int      $offset             [optional] <p>The start position for the comparison. If negative, it starts
12077
     *                                     counting from the end of the string.</p>
12078
     * @param int|null $length             [optional] <p>The length of the comparison. The default value is the largest
12079
     *                                     of the length of the str compared to the length of main_str less the
12080
     *                                     offset.</p>
12081
     * @param bool     $case_insensitivity [optional] <p>If case_insensitivity is TRUE, comparison is case
12082
     *                                     insensitive.</p>
12083
     * @param string   $encoding           [optional] <p>Set the charset for e.g. "mb_" function</p>
12084
     *
12085
     * @psalm-pure
12086
     *
12087
     * @return int
12088
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
12089
     *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
12090
     *             <strong>0</strong> if they are equal
12091
     */
12092 2
    public static function substr_compare(
12093
        string $str1,
12094
        string $str2,
12095
        int $offset = 0,
12096
        int $length = null,
12097
        bool $case_insensitivity = false,
12098
        string $encoding = 'UTF-8'
12099
    ): int {
12100
        if (
12101 2
            $offset !== 0
12102
            ||
12103 2
            $length !== null
12104
        ) {
12105 2
            if ($encoding === 'UTF-8') {
12106 2
                if ($length === null) {
12107 2
                    $str1 = (string) \mb_substr($str1, $offset);
12108
                } else {
12109 2
                    $str1 = (string) \mb_substr($str1, $offset, $length);
12110
                }
12111 2
                $str2 = (string) \mb_substr($str2, 0, (int) self::strlen($str1));
12112
            } else {
12113
                $encoding = self::normalize_encoding($encoding, 'UTF-8');
12114
12115
                $str1 = (string) self::substr($str1, $offset, $length, $encoding);
12116
                $str2 = (string) self::substr($str2, 0, (int) self::strlen($str1), $encoding);
12117
            }
12118
        }
12119
12120 2
        if ($case_insensitivity) {
12121 2
            return self::strcasecmp($str1, $str2, $encoding);
12122
        }
12123
12124 2
        return self::strcmp($str1, $str2);
12125
    }
12126
12127
    /**
12128
     * Count the number of substring occurrences.
12129
     *
12130
     * EXAMPLE: <code>UTF8::substr_count('中文空白', '文空', 1, 2); // 1</code>
12131
     *
12132
     * @see http://php.net/manual/en/function.substr-count.php
12133
     *
12134
     * @param string   $haystack   <p>The string to search in.</p>
12135
     * @param string   $needle     <p>The substring to search for.</p>
12136
     * @param int      $offset     [optional] <p>The offset where to start counting.</p>
12137
     * @param int|null $length     [optional] <p>
12138
     *                             The maximum length after the specified offset to search for the
12139
     *                             substring. It outputs a warning if the offset plus the length is
12140
     *                             greater than the haystack length.
12141
     *                             </p>
12142
     * @param string   $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
12143
     * @param bool     $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
12144
     *
12145
     * @psalm-pure
12146
     *
12147
     * @return false|int
12148
     *                   <p>This functions returns an integer or false if there isn't a string.</p>
12149
     */
12150 5
    public static function substr_count(
12151
        string $haystack,
12152
        string $needle,
12153
        int $offset = 0,
12154
        int $length = null,
12155
        string $encoding = 'UTF-8',
12156
        bool $clean_utf8 = false
12157
    ) {
12158 5
        if ($haystack === '' || $needle === '') {
12159 2
            return false;
12160
        }
12161
12162 5
        if ($length === 0) {
12163 2
            return 0;
12164
        }
12165
12166 5
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
12167 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
12168
        }
12169
12170 5
        if ($clean_utf8) {
12171
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
12172
            // if invalid characters are found in $haystack before $needle
12173
            $needle = self::clean($needle);
12174
            $haystack = self::clean($haystack);
12175
        }
12176
12177 5
        if ($offset || $length > 0) {
12178 2
            if ($length === null) {
12179 2
                $length_tmp = self::strlen($haystack, $encoding);
12180 2
                if ($length_tmp === false) {
12181
                    return false;
12182
                }
12183 2
                $length = (int) $length_tmp;
12184
            }
12185
12186 2
            if ($encoding === 'UTF-8') {
12187 2
                $haystack = (string) \mb_substr($haystack, $offset, $length);
12188
            } else {
12189 2
                $haystack = (string) \mb_substr($haystack, $offset, $length, $encoding);
12190
            }
12191
        }
12192
12193
        if (
12194 5
            $encoding !== 'UTF-8'
12195
            &&
12196 5
            self::$SUPPORT['mbstring'] === false
12197
        ) {
12198
            /**
12199
             * @psalm-suppress ImpureFunctionCall - is is only a warning
12200
             */
12201
            \trigger_error('UTF8::substr_count() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
12202
        }
12203
12204 5
        if (self::$SUPPORT['mbstring'] === true) {
12205 5
            if ($encoding === 'UTF-8') {
12206 5
                return \mb_substr_count($haystack, $needle);
12207
            }
12208
12209 2
            return \mb_substr_count($haystack, $needle, $encoding);
12210
        }
12211
12212
        \preg_match_all('/' . \preg_quote($needle, '/') . '/us', $haystack, $matches, \PREG_SET_ORDER);
12213
12214
        return \count($matches);
12215
    }
12216
12217
    /**
12218
     * Count the number of substring occurrences.
12219
     *
12220
     * @param string   $haystack <p>
12221
     *                           The string being checked.
12222
     *                           </p>
12223
     * @param string   $needle   <p>
12224
     *                           The string being found.
12225
     *                           </p>
12226
     * @param int      $offset   [optional] <p>
12227
     *                           The offset where to start counting
12228
     *                           </p>
12229
     * @param int|null $length   [optional] <p>
12230
     *                           The maximum length after the specified offset to search for the
12231
     *                           substring. It outputs a warning if the offset plus the length is
12232
     *                           greater than the haystack length.
12233
     *                           </p>
12234
     *
12235
     * @psalm-pure
12236
     *
12237
     * @return false|int
12238
     *                   <p>The number of times the
12239
     *                   needle substring occurs in the
12240
     *                   haystack string.</p>
12241
     */
12242 4
    public static function substr_count_in_byte(
12243
        string $haystack,
12244
        string $needle,
12245
        int $offset = 0,
12246
        int $length = null
12247
    ) {
12248 4
        if ($haystack === '' || $needle === '') {
12249 1
            return 0;
12250
        }
12251
12252
        if (
12253 3
            ($offset || $length !== null)
12254
            &&
12255 3
            self::$SUPPORT['mbstring_func_overload'] === true
12256
        ) {
12257
            if ($length === null) {
12258
                $length_tmp = self::strlen($haystack);
12259
                if ($length_tmp === false) {
12260
                    return false;
12261
                }
12262
                $length = (int) $length_tmp;
12263
            }
12264
12265
            if (
12266
                (
12267
                    $length !== 0
12268
                    &&
12269
                    $offset !== 0
12270
                )
12271
                &&
12272
                ($length + $offset) <= 0
12273
                &&
12274
                !Bootup::is_php('7.1') // output from "substr_count()" have changed in PHP 7.1
12275
            ) {
12276
                return false;
12277
            }
12278
12279
            /** @var false|string $haystack_tmp - needed for PhpStan (stubs error) */
12280
            $haystack_tmp = \substr($haystack, $offset, $length);
12281
            if ($haystack_tmp === false) {
12282
                $haystack_tmp = '';
12283
            }
12284
            $haystack = (string) $haystack_tmp;
12285
        }
12286
12287 3
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
12288
            // "mb_" is available if overload is used, so use it ...
12289
            return \mb_substr_count($haystack, $needle, 'CP850'); // 8-BIT
12290
        }
12291
12292 3
        if ($length === null) {
12293 3
            return \substr_count($haystack, $needle, $offset);
12294
        }
12295
12296
        return \substr_count($haystack, $needle, $offset, $length);
12297
    }
12298
12299
    /**
12300
     * Returns the number of occurrences of $substring in the given string.
12301
     * By default, the comparison is case-sensitive, but can be made insensitive
12302
     * by setting $case_sensitive to false.
12303
     *
12304
     * @param string $str            <p>The input string.</p>
12305
     * @param string $substring      <p>The substring to search for.</p>
12306
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
12307
     * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
12308
     *
12309
     * @psalm-pure
12310
     *
12311
     * @return int
12312
     */
12313 15
    public static function substr_count_simple(
12314
        string $str,
12315
        string $substring,
12316
        bool $case_sensitive = true,
12317
        string $encoding = 'UTF-8'
12318
    ): int {
12319 15
        if ($str === '' || $substring === '') {
12320 2
            return 0;
12321
        }
12322
12323 13
        if ($encoding === 'UTF-8') {
12324 7
            if ($case_sensitive) {
12325
                return (int) \mb_substr_count($str, $substring);
12326
            }
12327
12328 7
            return (int) \mb_substr_count(
12329 7
                \mb_strtoupper($str),
12330 7
                \mb_strtoupper($substring)
12331
            );
12332
        }
12333
12334 6
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
12335
12336 6
        if ($case_sensitive) {
12337 3
            return (int) \mb_substr_count($str, $substring, $encoding);
12338
        }
12339
12340 3
        return (int) \mb_substr_count(
12341 3
            self::strtocasefold($str, true, false, $encoding, null, false),
12342 3
            self::strtocasefold($substring, true, false, $encoding, null, false),
12343 3
            $encoding
12344
        );
12345
    }
12346
12347
    /**
12348
     * Removes a prefix ($needle) from the beginning of the string ($haystack), case-insensitive.
12349
     *
12350
     * EXMAPLE: <code>
12351
     * UTF8::substr_ileft('ΚόσμεMiddleEnd', 'Κόσμε'); // 'MiddleEnd'
12352
     * UTF8::substr_ileft('ΚόσμεMiddleEnd', 'κόσμε'); // 'MiddleEnd'
12353
     * </code>
12354
     *
12355
     * @param string $haystack <p>The string to search in.</p>
12356
     * @param string $needle   <p>The substring to search for.</p>
12357
     *
12358
     * @psalm-pure
12359
     *
12360
     * @return string
12361
     *                <p>Return the sub-string.</p>
12362
     */
12363 2
    public static function substr_ileft(string $haystack, string $needle): string
12364
    {
12365 2
        if ($haystack === '') {
12366 2
            return '';
12367
        }
12368
12369 2
        if ($needle === '') {
12370 2
            return $haystack;
12371
        }
12372
12373 2
        if (self::str_istarts_with($haystack, $needle)) {
12374 2
            $haystack = (string) \mb_substr($haystack, (int) self::strlen($needle));
12375
        }
12376
12377 2
        return $haystack;
12378
    }
12379
12380
    /**
12381
     * Get part of a string process in bytes.
12382
     *
12383
     * @param string   $str    <p>The string being checked.</p>
12384
     * @param int      $offset <p>The first position used in str.</p>
12385
     * @param int|null $length [optional] <p>The maximum length of the returned string.</p>
12386
     *
12387
     * @psalm-pure
12388
     *
12389
     * @return false|string
12390
     *                      The portion of <i>str</i> specified by the <i>offset</i> and
12391
     *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
12392
     *                      characters long, <b>FALSE</b> will be returned.
12393
     */
12394 1
    public static function substr_in_byte(string $str, int $offset = 0, int $length = null)
12395
    {
12396
        // empty string
12397 1
        if ($str === '' || $length === 0) {
12398
            return '';
12399
        }
12400
12401
        // whole string
12402 1
        if (!$offset && $length === null) {
12403
            return $str;
12404
        }
12405
12406 1
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
12407
            // "mb_" is available if overload is used, so use it ...
12408
            return \mb_substr($str, $offset, $length, 'CP850'); // 8-BIT
12409
        }
12410
12411 1
        return \substr($str, $offset, $length ?? 2147483647);
12412
    }
12413
12414
    /**
12415
     * Removes a suffix ($needle) from the end of the string ($haystack), case-insensitive.
12416
     *
12417
     * EXAMPLE: <code>
12418
     * UTF8::substr_iright('BeginMiddleΚόσμε', 'Κόσμε'); // 'BeginMiddle'
12419
     * UTF8::substr_iright('BeginMiddleΚόσμε', 'κόσμε'); // 'BeginMiddle'
12420
     * </code>
12421
     *
12422
     * @param string $haystack <p>The string to search in.</p>
12423
     * @param string $needle   <p>The substring to search for.</p>
12424
     *
12425
     * @psalm-pure
12426
     *
12427
     * @return string
12428
     *                <p>Return the sub-string.<p>
12429
     */
12430 2
    public static function substr_iright(string $haystack, string $needle): string
12431
    {
12432 2
        if ($haystack === '') {
12433 2
            return '';
12434
        }
12435
12436 2
        if ($needle === '') {
12437 2
            return $haystack;
12438
        }
12439
12440 2
        if (self::str_iends_with($haystack, $needle)) {
12441 2
            $haystack = (string) \mb_substr($haystack, 0, (int) self::strlen($haystack) - (int) self::strlen($needle));
12442
        }
12443
12444 2
        return $haystack;
12445
    }
12446
12447
    /**
12448
     * Removes a prefix ($needle) from the beginning of the string ($haystack).
12449
     *
12450
     * EXAMPLE: <code>
12451
     * UTF8::substr_left('ΚόσμεMiddleEnd', 'Κόσμε'); // 'MiddleEnd'
12452
     * UTF8::substr_left('ΚόσμεMiddleEnd', 'κόσμε'); // 'ΚόσμεMiddleEnd'
12453
     * </code>
12454
     *
12455
     * @param string $haystack <p>The string to search in.</p>
12456
     * @param string $needle   <p>The substring to search for.</p>
12457
     *
12458
     * @psalm-pure
12459
     *
12460
     * @return string
12461
     *                <p>Return the sub-string.</p>
12462
     */
12463 2
    public static function substr_left(string $haystack, string $needle): string
12464
    {
12465 2
        if ($haystack === '') {
12466 2
            return '';
12467
        }
12468
12469 2
        if ($needle === '') {
12470 2
            return $haystack;
12471
        }
12472
12473 2
        if (self::str_starts_with($haystack, $needle)) {
12474 2
            $haystack = (string) \mb_substr($haystack, (int) self::strlen($needle));
12475
        }
12476
12477 2
        return $haystack;
12478
    }
12479
12480
    /**
12481
     * Replace text within a portion of a string.
12482
     *
12483
     * EXAMPLE: <code>UTF8::substr_replace(array('Iñtërnâtiônàlizætiøn', 'foo'), 'æ', 1); // array('Iæñtërnâtiônàlizætiøn', 'fæoo')</code>
12484
     *
12485
     * source: https://gist.github.com/stemar/8287074
12486
     *
12487
     * @param string|string[] $str         <p>The input string or an array of stings.</p>
12488
     * @param string|string[] $replacement <p>The replacement string or an array of stings.</p>
12489
     * @param int|int[]       $offset      <p>
12490
     *                                     If start is positive, the replacing will begin at the start'th offset
12491
     *                                     into string.
12492
     *                                     <br><br>
12493
     *                                     If start is negative, the replacing will begin at the start'th character
12494
     *                                     from the end of string.
12495
     *                                     </p>
12496
     * @param int|int[]|null  $length      [optional] <p>If given and is positive, it represents the length of the
12497
     *                                     portion of string which is to be replaced. If it is negative, it
12498
     *                                     represents the number of characters from the end of string at which to
12499
     *                                     stop replacing. If it is not given, then it will default to strlen(
12500
     *                                     string ); i.e. end the replacing at the end of string. Of course, if
12501
     *                                     length is zero then this function will have the effect of inserting
12502
     *                                     replacement into string at the given start offset.</p>
12503
     * @param string          $encoding    [optional] <p>Set the charset for e.g. "mb_" function</p>
12504
     *
12505
     * @psalm-pure
12506
     *
12507
     * @return string|string[]
12508
     *                         <p>The result string is returned. If string is an array then array is returned.</p>
12509
     */
12510 10
    public static function substr_replace(
12511
        $str,
12512
        $replacement,
12513
        $offset,
12514
        $length = null,
12515
        string $encoding = 'UTF-8'
12516
    ) {
12517 10
        if (\is_array($str)) {
12518 1
            $num = \count($str);
12519
12520
            // the replacement
12521 1
            if (\is_array($replacement)) {
12522 1
                $replacement = \array_slice($replacement, 0, $num);
12523
            } else {
12524 1
                $replacement = \array_pad([$replacement], $num, $replacement);
12525
            }
12526
12527
            // the offset
12528 1
            if (\is_array($offset)) {
12529 1
                $offset = \array_slice($offset, 0, $num);
12530 1
                foreach ($offset as &$value_tmp) {
12531 1
                    $value_tmp = (int) $value_tmp === $value_tmp ? $value_tmp : 0;
12532
                }
12533 1
                unset($value_tmp);
12534
            } else {
12535 1
                $offset = \array_pad([$offset], $num, $offset);
12536
            }
12537
12538
            // the length
12539 1
            if ($length === null) {
12540 1
                $length = \array_fill(0, $num, 0);
12541 1
            } elseif (\is_array($length)) {
12542 1
                $length = \array_slice($length, 0, $num);
12543 1
                foreach ($length as &$value_tmp_V2) {
12544 1
                    $value_tmp_V2 = (int) $value_tmp_V2 === $value_tmp_V2 ? $value_tmp_V2 : $num;
12545
                }
12546 1
                unset($value_tmp_V2);
12547
            } else {
12548 1
                $length = \array_pad([$length], $num, $length);
12549
            }
12550
12551
            // recursive call
12552 1
            return \array_map([self::class, 'substr_replace'], $str, $replacement, $offset, $length);
12553
        }
12554
12555 10
        if (\is_array($replacement)) {
12556 1
            if ($replacement !== []) {
12557 1
                $replacement = $replacement[0];
12558
            } else {
12559 1
                $replacement = '';
12560
            }
12561
        }
12562
12563
        // init
12564 10
        $str = (string) $str;
12565 10
        $replacement = (string) $replacement;
12566
12567 10
        if (\is_array($length)) {
12568
            throw new \InvalidArgumentException('Parameter "$length" can only be an array, if "$str" is also an array.');
12569
        }
12570
12571 10
        if (\is_array($offset)) {
12572
            throw new \InvalidArgumentException('Parameter "$offset" can only be an array, if "$str" is also an array.');
12573
        }
12574
12575 10
        if ($str === '') {
12576 1
            return $replacement;
12577
        }
12578
12579 9
        if (self::$SUPPORT['mbstring'] === true) {
12580 9
            $string_length = (int) self::strlen($str, $encoding);
12581
12582 9
            if ($offset < 0) {
12583 1
                $offset = (int) \max(0, $string_length + $offset);
12584 9
            } elseif ($offset > $string_length) {
12585 1
                $offset = $string_length;
12586
            }
12587
12588 9
            if ($length !== null && $length < 0) {
12589 1
                $length = (int) \max(0, $string_length - $offset + $length);
12590 9
            } elseif ($length === null || $length > $string_length) {
12591 4
                $length = $string_length;
12592
            }
12593
12594
            /** @noinspection AdditionOperationOnArraysInspection */
12595 9
            if (($offset + $length) > $string_length) {
12596 4
                $length = $string_length - $offset;
12597
            }
12598
12599
            /** @noinspection AdditionOperationOnArraysInspection */
12600 9
            return ((string) \mb_substr($str, 0, $offset, $encoding)) .
12601 9
                   $replacement .
12602 9
                   ((string) \mb_substr($str, $offset + $length, $string_length - $offset - $length, $encoding));
12603
        }
12604
12605
        //
12606
        // fallback for ascii only
12607
        //
12608
12609
        if (ASCII::is_ascii($str)) {
12610
            return ($length === null) ?
12611
                \substr_replace($str, $replacement, $offset) :
12612
                \substr_replace($str, $replacement, $offset, $length);
12613
        }
12614
12615
        //
12616
        // fallback via vanilla php
12617
        //
12618
12619
        \preg_match_all('/./us', $str, $str_matches);
12620
        \preg_match_all('/./us', $replacement, $replacement_matches);
12621
12622
        if ($length === null) {
12623
            $length_tmp = self::strlen($str, $encoding);
12624
            if ($length_tmp === false) {
12625
                // e.g.: non mbstring support + invalid chars
12626
                return '';
12627
            }
12628
            $length = (int) $length_tmp;
12629
        }
12630
12631
        \array_splice($str_matches[0], $offset, $length, $replacement_matches[0]);
12632
12633
        return \implode('', $str_matches[0]);
12634
    }
12635
12636
    /**
12637
     * Removes a suffix ($needle) from the end of the string ($haystack).
12638
     *
12639
     * EXAMPLE: <code>
12640
     * UTF8::substr_right('BeginMiddleΚόσμε', 'Κόσμε'); // 'BeginMiddle'
12641
     * UTF8::substr_right('BeginMiddleΚόσμε', 'κόσμε'); // 'BeginMiddleΚόσμε'
12642
     * </code>
12643
     *
12644
     * @param string $haystack <p>The string to search in.</p>
12645
     * @param string $needle   <p>The substring to search for.</p>
12646
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
12647
     *
12648
     * @psalm-pure
12649
     *
12650
     * @return string
12651
     *                <p>Return the sub-string.</p>
12652
     */
12653 2
    public static function substr_right(
12654
        string $haystack,
12655
        string $needle,
12656
        string $encoding = 'UTF-8'
12657
    ): string {
12658 2
        if ($haystack === '') {
12659 2
            return '';
12660
        }
12661
12662 2
        if ($needle === '') {
12663 2
            return $haystack;
12664
        }
12665
12666
        if (
12667 2
            $encoding === 'UTF-8'
12668
            &&
12669 2
            \substr($haystack, -\strlen($needle)) === $needle
12670
        ) {
12671 2
            return (string) \mb_substr($haystack, 0, (int) \mb_strlen($haystack) - (int) \mb_strlen($needle));
12672
        }
12673
12674 2
        if (\substr($haystack, -\strlen($needle)) === $needle) {
12675
            return (string) self::substr(
12676
                $haystack,
12677
                0,
12678
                (int) self::strlen($haystack, $encoding) - (int) self::strlen($needle, $encoding),
12679
                $encoding
12680
            );
12681
        }
12682
12683 2
        return $haystack;
12684
    }
12685
12686
    /**
12687
     * Returns a case swapped version of the string.
12688
     *
12689
     * EXAMPLE: <code>UTF8::swapCase('déJÀ σσς iıII'); // 'DÉjà ΣΣΣ IIii'</code>
12690
     *
12691
     * @param string $str        <p>The input string.</p>
12692
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
12693
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
12694
     *
12695
     * @psalm-pure
12696
     *
12697
     * @return string
12698
     *                <p>Each character's case swapped.</p>
12699
     */
12700 6
    public static function swapCase(string $str, string $encoding = 'UTF-8', bool $clean_utf8 = false): string
12701
    {
12702 6
        if ($str === '') {
12703 1
            return '';
12704
        }
12705
12706 6
        if ($clean_utf8) {
12707
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
12708
            // if invalid characters are found in $haystack before $needle
12709 2
            $str = self::clean($str);
12710
        }
12711
12712 6
        if ($encoding === 'UTF-8') {
12713 4
            return (string) (\mb_strtolower($str) ^ \mb_strtoupper($str) ^ $str);
12714
        }
12715
12716 4
        return (string) (self::strtolower($str, $encoding) ^ self::strtoupper($str, $encoding) ^ $str);
12717
    }
12718
12719
    /**
12720
     * Checks whether symfony-polyfills are used.
12721
     *
12722
     * @psalm-pure
12723
     *
12724
     * @return bool
12725
     *              <p><strong>true</strong> if in use, <strong>false</strong> otherwise</p>
12726
     *
12727
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
12728
     */
12729
    public static function symfony_polyfill_used(): bool
12730
    {
12731
        // init
12732
        $return = false;
12733
12734
        $return_tmp = \extension_loaded('mbstring');
12735
        if (!$return_tmp && \function_exists('mb_strlen')) {
12736
            $return = true;
12737
        }
12738
12739
        $return_tmp = \extension_loaded('iconv');
12740
        if (!$return_tmp && \function_exists('iconv')) {
12741
            $return = true;
12742
        }
12743
12744
        return $return;
12745
    }
12746
12747
    /**
12748
     * @param string $str
12749
     * @param int    $tab_length
12750
     *
12751
     * @psalm-pure
12752
     *
12753
     * @return string
12754
     */
12755 6
    public static function tabs_to_spaces(string $str, int $tab_length = 4): string
12756
    {
12757 6
        if ($tab_length === 4) {
12758 3
            $spaces = '    ';
12759 3
        } elseif ($tab_length === 2) {
12760 1
            $spaces = '  ';
12761
        } else {
12762 2
            $spaces = \str_repeat(' ', $tab_length);
12763
        }
12764
12765 6
        return \str_replace("\t", $spaces, $str);
12766
    }
12767
12768
    /**
12769
     * Converts the first character of each word in the string to uppercase
12770
     * and all other chars to lowercase.
12771
     *
12772
     * @param string      $str                           <p>The input string.</p>
12773
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
12774
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
12775
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
12776
     *                                                   tr</p>
12777
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
12778
     *                                                   -> ß</p>
12779
     *
12780
     * @psalm-pure
12781
     *
12782
     * @return string
12783
     *                <p>A string with all characters of $str being title-cased.</p>
12784
     */
12785 5
    public static function titlecase(
12786
        string $str,
12787
        string $encoding = 'UTF-8',
12788
        bool $clean_utf8 = false,
12789
        string $lang = null,
12790
        bool $try_to_keep_the_string_length = false
12791
    ): string {
12792 5
        if ($clean_utf8) {
12793
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
12794
            // if invalid characters are found in $haystack before $needle
12795
            $str = self::clean($str);
12796
        }
12797
12798
        if (
12799 5
            $lang === null
12800
            &&
12801 5
            !$try_to_keep_the_string_length
12802
        ) {
12803 5
            if ($encoding === 'UTF-8') {
12804 3
                return \mb_convert_case($str, \MB_CASE_TITLE);
12805
            }
12806
12807 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
12808
12809 2
            return \mb_convert_case($str, \MB_CASE_TITLE, $encoding);
12810
        }
12811
12812
        return self::str_titleize(
12813
            $str,
12814
            null,
12815
            $encoding,
12816
            false,
12817
            $lang,
12818
            $try_to_keep_the_string_length,
12819
            false
12820
        );
12821
    }
12822
12823
    /**
12824
     * alias for "UTF8::to_ascii()"
12825
     *
12826
     * @param string $str
12827
     * @param string $subst_chr
12828
     * @param bool   $strict
12829
     *
12830
     * @psalm-pure
12831
     *
12832
     * @return string
12833
     *
12834
     * @see        UTF8::to_ascii()
12835
     * @deprecated <p>please use "UTF8::to_ascii()"</p>
12836
     */
12837 7
    public static function toAscii(
12838
        string $str,
12839
        string $subst_chr = '?',
12840
        bool $strict = false
12841
    ): string {
12842 7
        return self::to_ascii($str, $subst_chr, $strict);
12843
    }
12844
12845
    /**
12846
     * alias for "UTF8::to_iso8859()"
12847
     *
12848
     * @param string|string[] $str
12849
     *
12850
     * @psalm-pure
12851
     *
12852
     * @return string|string[]
12853
     *
12854
     * @see        UTF8::to_iso8859()
12855
     * @deprecated <p>please use "UTF8::to_iso8859()"</p>
12856
     */
12857 2
    public static function toIso8859($str)
12858
    {
12859 2
        return self::to_iso8859($str);
12860
    }
12861
12862
    /**
12863
     * alias for "UTF8::to_latin1()"
12864
     *
12865
     * @param string|string[] $str
12866
     *
12867
     * @psalm-pure
12868
     *
12869
     * @return string|string[]
12870
     *
12871
     * @see        UTF8::to_iso8859()
12872
     * @deprecated <p>please use "UTF8::to_iso8859()"</p>
12873
     */
12874 2
    public static function toLatin1($str)
12875
    {
12876 2
        return self::to_iso8859($str);
12877
    }
12878
12879
    /**
12880
     * alias for "UTF8::to_utf8()"
12881
     *
12882
     * @param string|string[] $str
12883
     *
12884
     * @psalm-pure
12885
     *
12886
     * @return string|string[]
12887
     *
12888
     * @see        UTF8::to_utf8()
12889
     * @deprecated <p>please use "UTF8::to_utf8()"</p>
12890
     */
12891 2
    public static function toUTF8($str)
12892
    {
12893 2
        return self::to_utf8($str);
12894
    }
12895
12896
    /**
12897
     * Convert a string into ASCII.
12898
     *
12899
     * EXAMPLE: <code>UTF8::to_ascii('déjà σσς iıii'); // 'deja sss iiii'</code>
12900
     *
12901
     * @param string $str     <p>The input string.</p>
12902
     * @param string $unknown [optional] <p>Character use if character unknown. (default is ?)</p>
12903
     * @param bool   $strict  [optional] <p>Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad
12904
     *                        performance</p>
12905
     *
12906
     * @psalm-pure
12907
     *
12908
     * @return string
12909
     */
12910 37
    public static function to_ascii(
12911
        string $str,
12912
        string $unknown = '?',
12913
        bool $strict = false
12914
    ): string {
12915 37
        return ASCII::to_transliterate($str, $unknown, $strict);
12916
    }
12917
12918
    /**
12919
     * @param bool|int|string $str
12920
     *
12921
     * @psalm-param bool|int|numeric-string $str
12922
     *
12923
     * @psalm-pure
12924
     *
12925
     * @return bool
12926
     */
12927 19
    public static function to_boolean($str): bool
12928
    {
12929
        // init
12930 19
        $str = (string) $str;
12931
12932 19
        if ($str === '') {
12933 2
            return false;
12934
        }
12935
12936
        // Info: http://php.net/manual/en/filter.filters.validate.php
12937
        $map = [
12938 17
            'true'  => true,
12939
            '1'     => true,
12940
            'on'    => true,
12941
            'yes'   => true,
12942
            'false' => false,
12943
            '0'     => false,
12944
            'off'   => false,
12945
            'no'    => false,
12946
        ];
12947
12948 17
        if (isset($map[$str])) {
12949 11
            return $map[$str];
12950
        }
12951
12952 6
        $key = \strtolower($str);
12953 6
        if (isset($map[$key])) {
12954 2
            return $map[$key];
12955
        }
12956
12957 4
        if (\is_numeric($str)) {
12958 2
            return ((float) $str + 0) > 0;
12959
        }
12960
12961 2
        return (bool) \trim($str);
12962
    }
12963
12964
    /**
12965
     * Convert given string to safe filename (and keep string case).
12966
     *
12967
     * @param string $str
12968
     * @param bool   $use_transliterate No transliteration, conversion etc. is done by default - unsafe characters are
12969
     *                                  simply replaced with hyphen.
12970
     * @param string $fallback_char
12971
     *
12972
     * @psalm-pure
12973
     *
12974
     * @return string
12975
     */
12976 1
    public static function to_filename(
12977
        string $str,
12978
        bool $use_transliterate = false,
12979
        string $fallback_char = '-'
12980
    ): string {
12981 1
        return ASCII::to_filename(
12982 1
            $str,
12983 1
            $use_transliterate,
12984 1
            $fallback_char
12985
        );
12986
    }
12987
12988
    /**
12989
     * Convert a string into "ISO-8859"-encoding (Latin-1).
12990
     *
12991
     * EXAMPLE: <code>UTF8::to_utf8(UTF8::to_iso8859('  -ABC-中文空白-  ')); // '  -ABC-????-  '</code>
12992
     *
12993
     * @param string|string[] $str
12994
     *
12995
     * @psalm-pure
12996
     *
12997
     * @return string|string[]
12998
     */
12999 8
    public static function to_iso8859($str)
13000
    {
13001 8
        if (\is_array($str)) {
13002 2
            foreach ($str as $k => &$v) {
13003 2
                $v = self::to_iso8859($v);
13004
            }
13005
13006 2
            return $str;
13007
        }
13008
13009 8
        $str = (string) $str;
13010 8
        if ($str === '') {
13011 2
            return '';
13012
        }
13013
13014 8
        return self::utf8_decode($str);
13015
    }
13016
13017
    /**
13018
     * alias for "UTF8::to_iso8859()"
13019
     *
13020
     * @param string|string[] $str
13021
     *
13022
     * @psalm-pure
13023
     *
13024
     * @return string|string[]
13025
     *
13026
     * @see        UTF8::to_iso8859()
13027
     * @deprecated <p>please use "UTF8::to_iso8859()"</p>
13028
     */
13029 2
    public static function to_latin1($str)
13030
    {
13031 2
        return self::to_iso8859($str);
13032
    }
13033
13034
    /**
13035
     * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
13036
     *
13037
     * <ul>
13038
     * <li>It decode UTF-8 codepoints and Unicode escape sequences.</li>
13039
     * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li>
13040
     * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
13041
     * case.</li>
13042
     * </ul>
13043
     *
13044
     * EXAMPLE: <code>UTF8::to_utf8(["\u0063\u0061\u0074"]); // array('cat')</code>
13045
     *
13046
     * @param string|string[] $str                        <p>Any string or array of strings.</p>
13047
     * @param bool            $decode_html_entity_to_utf8 <p>Set to true, if you need to decode html-entities.</p>
13048
     *
13049
     * @psalm-pure
13050
     *
13051
     * @return string|string[]
13052
     *                         <p>The UTF-8 encoded string</p>
13053
     *
13054
     * @template TToUtf8
13055
     * @psalm-param TToUtf8 $str
13056
     * @psalm-return TToUtf8
13057
     *
13058
     * @noinspection SuspiciousBinaryOperationInspection
13059
     */
13060 44
    public static function to_utf8($str, bool $decode_html_entity_to_utf8 = false)
13061
    {
13062 44
        if (\is_array($str)) {
13063 4
            foreach ($str as $k => &$v) {
13064 4
                $v = self::to_utf8_string($v, $decode_html_entity_to_utf8);
13065
            }
13066
13067 4
            return $str;
13068
        }
13069
13070
        /** @psalm-var TToUtf8 $str */
13071 44
        $str = self::to_utf8_string($str, $decode_html_entity_to_utf8);
13072
13073 44
        return $str;
13074
    }
13075
13076
    /**
13077
     * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
13078
     *
13079
     * <ul>
13080
     * <li>It decode UTF-8 codepoints and Unicode escape sequences.</li>
13081
     * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li>
13082
     * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
13083
     * case.</li>
13084
     * </ul>
13085
     *
13086
     * EXAMPLE: <code>UTF8::to_utf8_string("\u0063\u0061\u0074"); // 'cat'</code>
13087
     *
13088
     * @param string $str                        <p>Any string.</p>
13089
     * @param bool   $decode_html_entity_to_utf8 <p>Set to true, if you need to decode html-entities.</p>
13090
     *
13091
     * @psalm-pure
13092
     *
13093
     * @return string
13094
     *                <p>The UTF-8 encoded string</p>
13095
     *
13096
     * @noinspection SuspiciousBinaryOperationInspection
13097
     */
13098 44
    public static function to_utf8_string(string $str, bool $decode_html_entity_to_utf8 = false): string
13099
    {
13100 44
        if ($str === '') {
13101 7
            return $str;
13102
        }
13103
13104 44
        $max = \strlen($str);
13105 44
        $buf = '';
13106
13107 44
        for ($i = 0; $i < $max; ++$i) {
13108 44
            $c1 = $str[$i];
13109
13110 44
            if ($c1 >= "\xC0") { // should be converted to UTF8, if it's not UTF8 already
13111
13112 40
                if ($c1 <= "\xDF") { // looks like 2 bytes UTF8
13113
13114 36
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
13115
13116 36
                    if ($c2 >= "\x80" && $c2 <= "\xBF") { // yeah, almost sure it's UTF8 already
13117 22
                        $buf .= $c1 . $c2;
13118 22
                        ++$i;
13119
                    } else { // not valid UTF8 - convert it
13120 36
                        $buf .= self::to_utf8_convert_helper($c1);
13121
                    }
13122 37
                } elseif ($c1 >= "\xE0" && $c1 <= "\xEF") { // looks like 3 bytes UTF8
13123
13124 36
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
13125 36
                    $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
13126
13127 36
                    if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF") { // yeah, almost sure it's UTF8 already
13128 17
                        $buf .= $c1 . $c2 . $c3;
13129 17
                        $i += 2;
13130
                    } else { // not valid UTF8 - convert it
13131 36
                        $buf .= self::to_utf8_convert_helper($c1);
13132
                    }
13133 28
                } elseif ($c1 >= "\xF0" && $c1 <= "\xF7") { // looks like 4 bytes UTF8
13134
13135 28
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
13136 28
                    $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
13137 28
                    $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
13138
13139 28
                    if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF" && $c4 >= "\x80" && $c4 <= "\xBF") { // yeah, almost sure it's UTF8 already
13140 10
                        $buf .= $c1 . $c2 . $c3 . $c4;
13141 10
                        $i += 3;
13142
                    } else { // not valid UTF8 - convert it
13143 28
                        $buf .= self::to_utf8_convert_helper($c1);
13144
                    }
13145
                } else { // doesn't look like UTF8, but should be converted
13146
13147 40
                    $buf .= self::to_utf8_convert_helper($c1);
13148
                }
13149 41
            } elseif (($c1 & "\xC0") === "\x80") { // needs conversion
13150
13151 4
                $buf .= self::to_utf8_convert_helper($c1);
13152
            } else { // it doesn't need conversion
13153
13154 41
                $buf .= $c1;
13155
            }
13156
        }
13157
13158
        // decode unicode escape sequences + unicode surrogate pairs
13159 44
        $buf = \preg_replace_callback(
13160 44
            '/\\\\u([dD][89abAB][0-9a-fA-F]{2})\\\\u([dD][cdefCDEF][\da-fA-F]{2})|\\\\u([0-9a-fA-F]{4})/',
13161
            /**
13162
             * @param array $matches
13163
             *
13164
             * @psalm-pure
13165
             *
13166
             * @return string
13167
             */
13168
            static function (array $matches): string {
13169 13
                if (isset($matches[3])) {
13170 13
                    $cp = (int) \hexdec($matches[3]);
13171
                } else {
13172
                    // http://unicode.org/faq/utf_bom.html#utf16-4
13173 1
                    $cp = ((int) \hexdec($matches[1]) << 10)
13174 1
                          + (int) \hexdec($matches[2])
13175 1
                          + 0x10000
13176 1
                          - (0xD800 << 10)
13177 1
                          - 0xDC00;
13178
                }
13179
13180
                // https://github.com/php/php-src/blob/php-7.3.2/ext/standard/html.c#L471
13181
                //
13182
                // php_utf32_utf8(unsigned char *buf, unsigned k)
13183
13184 13
                if ($cp < 0x80) {
13185 8
                    return (string) self::chr($cp);
13186
                }
13187
13188 10
                if ($cp < 0xA0) {
13189
                    /** @noinspection UnnecessaryCastingInspection */
13190
                    return (string) self::chr(0xC0 | $cp >> 6) . (string) self::chr(0x80 | $cp & 0x3F);
13191
                }
13192
13193 10
                return self::decimal_to_chr($cp);
13194 44
            },
13195 44
            $buf
13196
        );
13197
13198 44
        if ($buf === null) {
13199
            return '';
13200
        }
13201
13202
        // decode UTF-8 codepoints
13203 44
        if ($decode_html_entity_to_utf8) {
13204 3
            $buf = self::html_entity_decode($buf);
13205
        }
13206
13207 44
        return $buf;
13208
    }
13209
13210
    /**
13211
     * Returns the given string as an integer, or null if the string isn't numeric.
13212
     *
13213
     * @param string $str
13214
     *
13215
     * @psalm-pure
13216
     *
13217
     * @return int|null
13218
     *                  <p>null if the string isn't numeric</p>
13219
     */
13220 1
    public static function to_int(string $str)
13221
    {
13222 1
        if (\is_numeric($str)) {
13223 1
            return (int) $str;
13224
        }
13225
13226 1
        return null;
13227
    }
13228
13229
    /**
13230
     * Returns the given input as string, or null if the input isn't int|float|string
13231
     * and do not implement the "__toString()" method.
13232
     *
13233
     * @param float|int|object|string|null $input
13234
     *
13235
     * @psalm-pure
13236
     *
13237
     * @return string|null
13238
     *                     <p>null if the input isn't int|float|string and has no "__toString()" method</p>
13239
     */
13240 1
    public static function to_string($input)
13241
    {
13242 1
        if ($input === null) {
13243
            return null;
13244
        }
13245
13246
        /** @var string $input_type - hack for psalm */
13247 1
        $input_type = \gettype($input);
13248
13249
        if (
13250 1
            $input_type === 'string'
13251
            ||
13252 1
            $input_type === 'integer'
13253
            ||
13254 1
            $input_type === 'float'
13255
            ||
13256 1
            $input_type === 'double'
13257
        ) {
13258 1
            return (string) $input;
13259
        }
13260
13261 1
        if ($input_type === 'object') {
13262
            /** @noinspection PhpSillyAssignmentInspection */
13263
            /** @var object $input - hack for psalm / phpstan */
13264 1
            $input = $input;
13265
            /** @noinspection NestedPositiveIfStatementsInspection */
13266
            /** @noinspection MissingOrEmptyGroupStatementInspection */
13267 1
            if (\method_exists($input, '__toString')) {
13268 1
                return (string) $input;
13269
            }
13270
        }
13271
13272 1
        return null;
13273
    }
13274
13275
    /**
13276
     * Strip whitespace or other characters from the beginning and end of a UTF-8 string.
13277
     *
13278
     * INFO: This is slower then "trim()"
13279
     *
13280
     * We can only use the original-function, if we use <= 7-Bit in the string / chars
13281
     * but the check for ASCII (7-Bit) cost more time, then we can safe here.
13282
     *
13283
     * EXAMPLE: <code>UTF8::trim('   -ABC-中文空白-  '); // '-ABC-中文空白-'</code>
13284
     *
13285
     * @param string      $str   <p>The string to be trimmed</p>
13286
     * @param string|null $chars [optional] <p>Optional characters to be stripped</p>
13287
     *
13288
     * @psalm-pure
13289
     *
13290
     * @return string
13291
     *                <p>The trimmed string.</p>
13292
     */
13293 57
    public static function trim(string $str = '', string $chars = null): string
13294
    {
13295 57
        if ($str === '') {
13296 9
            return '';
13297
        }
13298
13299 50
        if (self::$SUPPORT['mbstring'] === true) {
13300 50
            if ($chars !== null) {
13301
                /** @noinspection PregQuoteUsageInspection */
13302 28
                $chars = \preg_quote($chars);
13303 28
                $pattern = "^[${chars}]+|[${chars}]+\$";
13304
            } else {
13305 22
                $pattern = '^[\\s]+|[\\s]+$';
13306
            }
13307
13308
            /** @noinspection PhpComposerExtensionStubsInspection */
13309 50
            return (string) \mb_ereg_replace($pattern, '', $str);
13310
        }
13311
13312 8
        if ($chars !== null) {
13313
            $chars = \preg_quote($chars, '/');
13314
            $pattern = "^[${chars}]+|[${chars}]+\$";
13315
        } else {
13316 8
            $pattern = '^[\\s]+|[\\s]+$';
13317
        }
13318
13319 8
        return self::regex_replace($str, $pattern, '');
13320
    }
13321
13322
    /**
13323
     * Makes string's first char uppercase.
13324
     *
13325
     * EXAMPLE: <code>UTF8::ucfirst('ñtërnâtiônàlizætiøn foo'); // 'Ñtërnâtiônàlizætiøn foo'</code>
13326
     *
13327
     * @param string      $str                           <p>The input string.</p>
13328
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
13329
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
13330
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
13331
     *                                                   tr</p>
13332
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
13333
     *                                                   -> ß</p>
13334
     *
13335
     * @psalm-pure
13336
     *
13337
     * @return string
13338
     *                <p>The resulting string with with char uppercase.</p>
13339
     */
13340 69
    public static function ucfirst(
13341
        string $str,
13342
        string $encoding = 'UTF-8',
13343
        bool $clean_utf8 = false,
13344
        string $lang = null,
13345
        bool $try_to_keep_the_string_length = false
13346
    ): string {
13347 69
        if ($str === '') {
13348 3
            return '';
13349
        }
13350
13351 68
        if ($clean_utf8) {
13352
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
13353
            // if invalid characters are found in $haystack before $needle
13354 1
            $str = self::clean($str);
13355
        }
13356
13357 68
        $use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
13358
13359 68
        if ($encoding === 'UTF-8') {
13360 22
            $str_part_two = (string) \mb_substr($str, 1);
13361
13362 22
            if ($use_mb_functions) {
13363 22
                $str_part_one = \mb_strtoupper(
13364 22
                    (string) \mb_substr($str, 0, 1)
13365
                );
13366
            } else {
13367
                $str_part_one = self::strtoupper(
13368
                    (string) \mb_substr($str, 0, 1),
13369
                    $encoding,
13370
                    false,
13371
                    $lang,
13372 22
                    $try_to_keep_the_string_length
13373
                );
13374
            }
13375
        } else {
13376 47
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
13377
13378 47
            $str_part_two = (string) self::substr($str, 1, null, $encoding);
13379
13380 47
            if ($use_mb_functions) {
13381 47
                $str_part_one = \mb_strtoupper(
13382 47
                    (string) \mb_substr($str, 0, 1, $encoding),
13383 47
                    $encoding
13384
                );
13385
            } else {
13386
                $str_part_one = self::strtoupper(
13387
                    (string) self::substr($str, 0, 1, $encoding),
13388
                    $encoding,
13389
                    false,
13390
                    $lang,
13391
                    $try_to_keep_the_string_length
13392
                );
13393
            }
13394
        }
13395
13396 68
        return $str_part_one . $str_part_two;
13397
    }
13398
13399
    /**
13400
     * alias for "UTF8::ucfirst()"
13401
     *
13402
     * @param string $str
13403
     * @param string $encoding
13404
     * @param bool   $clean_utf8
13405
     *
13406
     * @psalm-pure
13407
     *
13408
     * @return string
13409
     *
13410
     * @see        UTF8::ucfirst()
13411
     * @deprecated <p>please use "UTF8::ucfirst()"</p>
13412
     */
13413 1
    public static function ucword(
13414
        string $str,
13415
        string $encoding = 'UTF-8',
13416
        bool $clean_utf8 = false
13417
    ): string {
13418 1
        return self::ucfirst($str, $encoding, $clean_utf8);
13419
    }
13420
13421
    /**
13422
     * Uppercase for all words in the string.
13423
     *
13424
     * EXAMPLE: <code>UTF8::ucwords('iñt ërn âTi ônà liz æti øn'); // 'Iñt Ërn ÂTi Ônà Liz Æti Øn'</code>
13425
     *
13426
     * @param string   $str        <p>The input string.</p>
13427
     * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
13428
     * @param string   $char_list  [optional] <p>Additional chars that contains to words and do not start a new
13429
     *                             word.</p>
13430
     * @param string   $encoding   [optional] <p>Set the charset.</p>
13431
     * @param bool     $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
13432
     *
13433
     * @psalm-pure
13434
     *
13435
     * @return string
13436
     */
13437 8
    public static function ucwords(
13438
        string $str,
13439
        array $exceptions = [],
13440
        string $char_list = '',
13441
        string $encoding = 'UTF-8',
13442
        bool $clean_utf8 = false
13443
    ): string {
13444 8
        if (!$str) {
13445 2
            return '';
13446
        }
13447
13448
        // INFO: mb_convert_case($str, MB_CASE_TITLE);
13449
        // -> MB_CASE_TITLE didn't only uppercase the first letter, it also lowercase all other letters
13450
13451 7
        if ($clean_utf8) {
13452
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
13453
            // if invalid characters are found in $haystack before $needle
13454 1
            $str = self::clean($str);
13455
        }
13456
13457 7
        $use_php_default_functions = !(bool) ($char_list . \implode('', $exceptions));
13458
13459
        if (
13460 7
            $use_php_default_functions
13461
            &&
13462 7
            ASCII::is_ascii($str)
13463
        ) {
13464
            return \ucwords($str);
13465
        }
13466
13467 7
        $words = self::str_to_words($str, $char_list);
13468 7
        $use_exceptions = $exceptions !== [];
13469
13470 7
        $words_str = '';
13471 7
        foreach ($words as &$word) {
13472 7
            if (!$word) {
13473 7
                continue;
13474
            }
13475
13476
            if (
13477 7
                !$use_exceptions
13478
                ||
13479 7
                !\in_array($word, $exceptions, true)
13480
            ) {
13481 7
                $words_str .= self::ucfirst($word, $encoding);
13482
            } else {
13483 7
                $words_str .= $word;
13484
            }
13485
        }
13486
13487 7
        return $words_str;
13488
    }
13489
13490
    /**
13491
     * Multi decode HTML entity + fix urlencoded-win1252-chars.
13492
     *
13493
     * EXAMPLE: <code>UTF8::urldecode('tes%20öäü%20\u00edtest+test'); // 'tes öäü ítest test'</code>
13494
     *
13495
     * e.g:
13496
     * 'test+test'                     => 'test test'
13497
     * 'D&#252;sseldorf'               => 'Düsseldorf'
13498
     * 'D%FCsseldorf'                  => 'Düsseldorf'
13499
     * 'D&#xFC;sseldorf'               => 'Düsseldorf'
13500
     * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
13501
     * 'Düsseldorf'                   => 'Düsseldorf'
13502
     * 'D%C3%BCsseldorf'               => 'Düsseldorf'
13503
     * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
13504
     * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
13505
     *
13506
     * @param string $str          <p>The input string.</p>
13507
     * @param bool   $multi_decode <p>Decode as often as possible.</p>
13508
     *
13509
     * @psalm-pure
13510
     *
13511
     * @return string
13512
     */
13513 4
    public static function urldecode(string $str, bool $multi_decode = true): string
13514
    {
13515 4
        if ($str === '') {
13516 3
            return '';
13517
        }
13518
13519
        if (
13520 4
            \strpos($str, '&') === false
13521
            &&
13522 4
            \strpos($str, '%') === false
13523
            &&
13524 4
            \strpos($str, '+') === false
13525
            &&
13526 4
            \strpos($str, '\u') === false
13527
        ) {
13528 3
            return self::fix_simple_utf8($str);
13529
        }
13530
13531 4
        $str = self::urldecode_unicode_helper($str);
13532
13533 4
        if ($multi_decode) {
13534
            do {
13535 3
                $str_compare = $str;
13536
13537
                /**
13538
                 * @psalm-suppress PossiblyInvalidArgument
13539
                 */
13540 3
                $str = self::fix_simple_utf8(
13541 3
                    \urldecode(
13542 3
                        self::html_entity_decode(
13543 3
                            self::to_utf8($str),
13544 3
                            \ENT_QUOTES | \ENT_HTML5
13545
                        )
13546
                    )
13547
                );
13548 3
            } while ($str_compare !== $str);
13549
        } else {
13550
            /**
13551
             * @psalm-suppress PossiblyInvalidArgument
13552
             */
13553 1
            $str = self::fix_simple_utf8(
13554 1
                \urldecode(
13555 1
                    self::html_entity_decode(
13556 1
                        self::to_utf8($str),
13557 1
                        \ENT_QUOTES | \ENT_HTML5
13558
                    )
13559
                )
13560
            );
13561
        }
13562
13563 4
        return $str;
13564
    }
13565
13566
    /**
13567
     * Return a array with "urlencoded"-win1252 -> UTF-8
13568
     *
13569
     * @psalm-pure
13570
     *
13571
     * @return string[]
13572
     *
13573
     * @deprecated <p>please use the "UTF8::urldecode()" function to decode a string</p>
13574
     */
13575 2
    public static function urldecode_fix_win1252_chars(): array
13576
    {
13577
        return [
13578 2
            '%20' => ' ',
13579
            '%21' => '!',
13580
            '%22' => '"',
13581
            '%23' => '#',
13582
            '%24' => '$',
13583
            '%25' => '%',
13584
            '%26' => '&',
13585
            '%27' => "'",
13586
            '%28' => '(',
13587
            '%29' => ')',
13588
            '%2A' => '*',
13589
            '%2B' => '+',
13590
            '%2C' => ',',
13591
            '%2D' => '-',
13592
            '%2E' => '.',
13593
            '%2F' => '/',
13594
            '%30' => '0',
13595
            '%31' => '1',
13596
            '%32' => '2',
13597
            '%33' => '3',
13598
            '%34' => '4',
13599
            '%35' => '5',
13600
            '%36' => '6',
13601
            '%37' => '7',
13602
            '%38' => '8',
13603
            '%39' => '9',
13604
            '%3A' => ':',
13605
            '%3B' => ';',
13606
            '%3C' => '<',
13607
            '%3D' => '=',
13608
            '%3E' => '>',
13609
            '%3F' => '?',
13610
            '%40' => '@',
13611
            '%41' => 'A',
13612
            '%42' => 'B',
13613
            '%43' => 'C',
13614
            '%44' => 'D',
13615
            '%45' => 'E',
13616
            '%46' => 'F',
13617
            '%47' => 'G',
13618
            '%48' => 'H',
13619
            '%49' => 'I',
13620
            '%4A' => 'J',
13621
            '%4B' => 'K',
13622
            '%4C' => 'L',
13623
            '%4D' => 'M',
13624
            '%4E' => 'N',
13625
            '%4F' => 'O',
13626
            '%50' => 'P',
13627
            '%51' => 'Q',
13628
            '%52' => 'R',
13629
            '%53' => 'S',
13630
            '%54' => 'T',
13631
            '%55' => 'U',
13632
            '%56' => 'V',
13633
            '%57' => 'W',
13634
            '%58' => 'X',
13635
            '%59' => 'Y',
13636
            '%5A' => 'Z',
13637
            '%5B' => '[',
13638
            '%5C' => '\\',
13639
            '%5D' => ']',
13640
            '%5E' => '^',
13641
            '%5F' => '_',
13642
            '%60' => '`',
13643
            '%61' => 'a',
13644
            '%62' => 'b',
13645
            '%63' => 'c',
13646
            '%64' => 'd',
13647
            '%65' => 'e',
13648
            '%66' => 'f',
13649
            '%67' => 'g',
13650
            '%68' => 'h',
13651
            '%69' => 'i',
13652
            '%6A' => 'j',
13653
            '%6B' => 'k',
13654
            '%6C' => 'l',
13655
            '%6D' => 'm',
13656
            '%6E' => 'n',
13657
            '%6F' => 'o',
13658
            '%70' => 'p',
13659
            '%71' => 'q',
13660
            '%72' => 'r',
13661
            '%73' => 's',
13662
            '%74' => 't',
13663
            '%75' => 'u',
13664
            '%76' => 'v',
13665
            '%77' => 'w',
13666
            '%78' => 'x',
13667
            '%79' => 'y',
13668
            '%7A' => 'z',
13669
            '%7B' => '{',
13670
            '%7C' => '|',
13671
            '%7D' => '}',
13672
            '%7E' => '~',
13673
            '%7F' => '',
13674
            '%80' => '`',
13675
            '%81' => '',
13676
            '%82' => '‚',
13677
            '%83' => 'ƒ',
13678
            '%84' => '„',
13679
            '%85' => '…',
13680
            '%86' => '†',
13681
            '%87' => '‡',
13682
            '%88' => 'ˆ',
13683
            '%89' => '‰',
13684
            '%8A' => 'Š',
13685
            '%8B' => '‹',
13686
            '%8C' => 'Œ',
13687
            '%8D' => '',
13688
            '%8E' => 'Ž',
13689
            '%8F' => '',
13690
            '%90' => '',
13691
            '%91' => '‘',
13692
            '%92' => '’',
13693
            '%93' => '“',
13694
            '%94' => '”',
13695
            '%95' => '•',
13696
            '%96' => '–',
13697
            '%97' => '—',
13698
            '%98' => '˜',
13699
            '%99' => '™',
13700
            '%9A' => 'š',
13701
            '%9B' => '›',
13702
            '%9C' => 'œ',
13703
            '%9D' => '',
13704
            '%9E' => 'ž',
13705
            '%9F' => 'Ÿ',
13706
            '%A0' => '',
13707
            '%A1' => '¡',
13708
            '%A2' => '¢',
13709
            '%A3' => '£',
13710
            '%A4' => '¤',
13711
            '%A5' => '¥',
13712
            '%A6' => '¦',
13713
            '%A7' => '§',
13714
            '%A8' => '¨',
13715
            '%A9' => '©',
13716
            '%AA' => 'ª',
13717
            '%AB' => '«',
13718
            '%AC' => '¬',
13719
            '%AD' => '',
13720
            '%AE' => '®',
13721
            '%AF' => '¯',
13722
            '%B0' => '°',
13723
            '%B1' => '±',
13724
            '%B2' => '²',
13725
            '%B3' => '³',
13726
            '%B4' => '´',
13727
            '%B5' => 'µ',
13728
            '%B6' => '¶',
13729
            '%B7' => '·',
13730
            '%B8' => '¸',
13731
            '%B9' => '¹',
13732
            '%BA' => 'º',
13733
            '%BB' => '»',
13734
            '%BC' => '¼',
13735
            '%BD' => '½',
13736
            '%BE' => '¾',
13737
            '%BF' => '¿',
13738
            '%C0' => 'À',
13739
            '%C1' => 'Á',
13740
            '%C2' => 'Â',
13741
            '%C3' => 'Ã',
13742
            '%C4' => 'Ä',
13743
            '%C5' => 'Å',
13744
            '%C6' => 'Æ',
13745
            '%C7' => 'Ç',
13746
            '%C8' => 'È',
13747
            '%C9' => 'É',
13748
            '%CA' => 'Ê',
13749
            '%CB' => 'Ë',
13750
            '%CC' => 'Ì',
13751
            '%CD' => 'Í',
13752
            '%CE' => 'Î',
13753
            '%CF' => 'Ï',
13754
            '%D0' => 'Ð',
13755
            '%D1' => 'Ñ',
13756
            '%D2' => 'Ò',
13757
            '%D3' => 'Ó',
13758
            '%D4' => 'Ô',
13759
            '%D5' => 'Õ',
13760
            '%D6' => 'Ö',
13761
            '%D7' => '×',
13762
            '%D8' => 'Ø',
13763
            '%D9' => 'Ù',
13764
            '%DA' => 'Ú',
13765
            '%DB' => 'Û',
13766
            '%DC' => 'Ü',
13767
            '%DD' => 'Ý',
13768
            '%DE' => 'Þ',
13769
            '%DF' => 'ß',
13770
            '%E0' => 'à',
13771
            '%E1' => 'á',
13772
            '%E2' => 'â',
13773
            '%E3' => 'ã',
13774
            '%E4' => 'ä',
13775
            '%E5' => 'å',
13776
            '%E6' => 'æ',
13777
            '%E7' => 'ç',
13778
            '%E8' => 'è',
13779
            '%E9' => 'é',
13780
            '%EA' => 'ê',
13781
            '%EB' => 'ë',
13782
            '%EC' => 'ì',
13783
            '%ED' => 'í',
13784
            '%EE' => 'î',
13785
            '%EF' => 'ï',
13786
            '%F0' => 'ð',
13787
            '%F1' => 'ñ',
13788
            '%F2' => 'ò',
13789
            '%F3' => 'ó',
13790
            '%F4' => 'ô',
13791
            '%F5' => 'õ',
13792
            '%F6' => 'ö',
13793
            '%F7' => '÷',
13794
            '%F8' => 'ø',
13795
            '%F9' => 'ù',
13796
            '%FA' => 'ú',
13797
            '%FB' => 'û',
13798
            '%FC' => 'ü',
13799
            '%FD' => 'ý',
13800
            '%FE' => 'þ',
13801
            '%FF' => 'ÿ',
13802
        ];
13803
    }
13804
13805
    /**
13806
     * Decodes a UTF-8 string to ISO-8859-1.
13807
     *
13808
     * EXAMPLE: <code>UTF8::encode('UTF-8', UTF8::utf8_decode('-ABC-中文空白-')); // '-ABC-????-'</code>
13809
     *
13810
     * @param string $str             <p>The input string.</p>
13811
     * @param bool   $keep_utf8_chars
13812
     *
13813
     * @psalm-pure
13814
     *
13815
     * @return string
13816
     *
13817
     * @noinspection SuspiciousBinaryOperationInspection
13818
     */
13819 14
    public static function utf8_decode(string $str, bool $keep_utf8_chars = false): string
13820
    {
13821 14
        if ($str === '') {
13822 6
            return '';
13823
        }
13824
13825
        // save for later comparision
13826 14
        $str_backup = $str;
13827 14
        $len = \strlen($str);
13828
13829 14
        if (self::$ORD === null) {
13830
            self::$ORD = self::getData('ord');
13831
        }
13832
13833 14
        if (self::$CHR === null) {
13834
            self::$CHR = self::getData('chr');
13835
        }
13836
13837 14
        $no_char_found = '?';
13838
        /** @noinspection ForeachInvariantsInspection */
13839 14
        for ($i = 0, $j = 0; $i < $len; ++$i, ++$j) {
13840 14
            switch ($str[$i] & "\xF0") {
13841 14
                case "\xC0":
13842 13
                case "\xD0":
13843 13
                    $c = (self::$ORD[$str[$i] & "\x1F"] << 6) | self::$ORD[$str[++$i] & "\x3F"];
13844 13
                    $str[$j] = $c < 256 ? self::$CHR[$c] : $no_char_found;
13845
13846 13
                    break;
13847
13848
                /** @noinspection PhpMissingBreakStatementInspection */
13849 13
                case "\xF0":
13850
                    ++$i;
13851
13852
                // no break
13853
13854 13
                case "\xE0":
13855 11
                    $str[$j] = $no_char_found;
13856 11
                    $i += 2;
13857
13858 11
                    break;
13859
13860
                default:
13861 12
                    $str[$j] = $str[$i];
13862
            }
13863
        }
13864
13865
        /** @var false|string $return - needed for PhpStan (stubs error) */
13866 14
        $return = \substr($str, 0, $j);
13867 14
        if ($return === false) {
13868
            $return = '';
13869
        }
13870
13871
        if (
13872 14
            $keep_utf8_chars
13873
            &&
13874 14
            (int) self::strlen($return) >= (int) self::strlen($str_backup)
13875
        ) {
13876 2
            return $str_backup;
13877
        }
13878
13879 14
        return $return;
13880
    }
13881
13882
    /**
13883
     * Encodes an ISO-8859-1 string to UTF-8.
13884
     *
13885
     * EXAMPLE: <code>UTF8::utf8_decode(UTF8::utf8_encode('-ABC-中文空白-')); // '-ABC-中文空白-'</code>
13886
     *
13887
     * @param string $str <p>The input string.</p>
13888
     *
13889
     * @psalm-pure
13890
     *
13891
     * @return string
13892
     */
13893 16
    public static function utf8_encode(string $str): string
13894
    {
13895 16
        if ($str === '') {
13896 14
            return '';
13897
        }
13898
13899
        /** @var false|string $str - the polyfill maybe return false */
13900 16
        $str = \utf8_encode($str);
0 ignored issues
show
Bug introduced by
It seems like $str can also be of type false; however, parameter $data of utf8_encode() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

13900
        $str = \utf8_encode(/** @scrutinizer ignore-type */ $str);
Loading history...
13901
13902
        /** @noinspection CallableParameterUseCaseInTypeContextInspection */
13903
        /** @psalm-suppress TypeDoesNotContainType */
13904 16
        if ($str === false) {
13905
            return '';
13906
        }
13907
13908 16
        return $str;
13909
    }
13910
13911
    /**
13912
     * fix -> utf8-win1252 chars
13913
     *
13914
     * @param string $str <p>The input string.</p>
13915
     *
13916
     * @psalm-pure
13917
     *
13918
     * @return string
13919
     *
13920
     * @deprecated <p>please use "UTF8::fix_simple_utf8()"</p>
13921
     */
13922 2
    public static function utf8_fix_win1252_chars(string $str): string
13923
    {
13924 2
        return self::fix_simple_utf8($str);
13925
    }
13926
13927
    /**
13928
     * Returns an array with all utf8 whitespace characters.
13929
     *
13930
     * @see http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
13931
     *
13932
     * @psalm-pure
13933
     *
13934
     * @return string[]
13935
     *                  An array with all known whitespace characters as values and the type of whitespace as keys
13936
     *                  as defined in above URL
13937
     */
13938 2
    public static function whitespace_table(): array
13939
    {
13940 2
        return self::$WHITESPACE_TABLE;
13941
    }
13942
13943
    /**
13944
     * Limit the number of words in a string.
13945
     *
13946
     * EXAMPLE: <code>UTF8::words_limit('fòô bàř fòô', 2, ''); // 'fòô bàř'</code>
13947
     *
13948
     * @param string $str        <p>The input string.</p>
13949
     * @param int    $limit      <p>The limit of words as integer.</p>
13950
     * @param string $str_add_on <p>Replacement for the striped string.</p>
13951
     *
13952
     * @psalm-pure
13953
     *
13954
     * @return string
13955
     */
13956 2
    public static function words_limit(
13957
        string $str,
13958
        int $limit = 100,
13959
        string $str_add_on = '…'
13960
    ): string {
13961 2
        if ($str === '' || $limit < 1) {
13962 2
            return '';
13963
        }
13964
13965 2
        \preg_match('/^\\s*+(?:[^\\s]++\\s*+){1,' . $limit . '}/u', $str, $matches);
13966
13967
        if (
13968 2
            !isset($matches[0])
13969
            ||
13970 2
            \mb_strlen($str) === (int) \mb_strlen($matches[0])
13971
        ) {
13972 2
            return $str;
13973
        }
13974
13975 2
        return \rtrim($matches[0]) . $str_add_on;
13976
    }
13977
13978
    /**
13979
     * Wraps a string to a given number of characters
13980
     *
13981
     * EXAMPLE: <code>UTF8::wordwrap('Iñtërnâtiônàlizætiøn', 2, '<br>', true)); // 'Iñ<br>të<br>rn<br>ât<br>iô<br>nà<br>li<br>zæ<br>ti<br>øn'</code>
13982
     *
13983
     * @see http://php.net/manual/en/function.wordwrap.php
13984
     *
13985
     * @param string $str   <p>The input string.</p>
13986
     * @param int    $width [optional] <p>The column width.</p>
13987
     * @param string $break [optional] <p>The line is broken using the optional break parameter.</p>
13988
     * @param bool   $cut   [optional] <p>
13989
     *                      If the cut is set to true, the string is
13990
     *                      always wrapped at or before the specified width. So if you have
13991
     *                      a word that is larger than the given width, it is broken apart.
13992
     *                      </p>
13993
     *
13994
     * @psalm-pure
13995
     *
13996
     * @return string
13997
     *                <p>The given string wrapped at the specified column.</p>
13998
     */
13999 12
    public static function wordwrap(
14000
        string $str,
14001
        int $width = 75,
14002
        string $break = "\n",
14003
        bool $cut = false
14004
    ): string {
14005 12
        if ($str === '' || $break === '') {
14006 4
            return '';
14007
        }
14008
14009 10
        $str_split = \explode($break, $str);
14010 10
        if ($str_split === false) {
14011
            return '';
14012
        }
14013
14014
        /** @var string[] $charsArray */
14015 10
        $charsArray = [];
14016 10
        $word_split = '';
14017 10
        foreach ($str_split as $i => $i_value) {
14018 10
            if ($i) {
14019 3
                $charsArray[] = $break;
14020 3
                $word_split .= '#';
14021
            }
14022
14023 10
            foreach (self::str_split($i_value) as $c) {
14024 10
                $charsArray[] = $c;
14025 10
                if ($c === ' ') {
14026 3
                    $word_split .= ' ';
14027
                } else {
14028 10
                    $word_split .= '?';
14029
                }
14030
            }
14031
        }
14032
14033 10
        $str_return = '';
14034 10
        $j = 0;
14035 10
        $b = -1;
14036 10
        $i = -1;
14037 10
        $word_split = \wordwrap($word_split, $width, '#', $cut);
14038
14039 10
        $max = \mb_strlen($word_split);
14040 10
        while (($b = \mb_strpos($word_split, '#', $b + 1)) !== false) {
14041 8
            for (++$i; $i < $b; ++$i) {
14042 8
                if (isset($charsArray[$j])) {
14043 8
                    $str_return .= $charsArray[$j];
14044 8
                    unset($charsArray[$j]);
14045
                }
14046 8
                ++$j;
14047
14048
                // prevent endless loop, e.g. if there is a error in the "mb_*" polyfill
14049 8
                if ($i > $max) {
14050
                    break 2;
14051
                }
14052
            }
14053
14054
            if (
14055 8
                $break === $charsArray[$j]
14056
                ||
14057 8
                $charsArray[$j] === ' '
14058
            ) {
14059 5
                unset($charsArray[$j++]);
14060
            }
14061
14062 8
            $str_return .= $break;
14063
14064
            // prevent endless loop, e.g. if there is a error in the "mb_*" polyfill
14065 8
            if ($b > $max) {
14066
                break;
14067
            }
14068
        }
14069
14070 10
        return $str_return . \implode('', $charsArray);
14071
    }
14072
14073
    /**
14074
     * Line-Wrap the string after $limit, but split the string by "$delimiter" before ...
14075
     *    ... so that we wrap the per line.
14076
     *
14077
     * @param string      $str             <p>The input string.</p>
14078
     * @param int         $width           [optional] <p>The column width.</p>
14079
     * @param string      $break           [optional] <p>The line is broken using the optional break parameter.</p>
14080
     * @param bool        $cut             [optional] <p>
14081
     *                                     If the cut is set to true, the string is
14082
     *                                     always wrapped at or before the specified width. So if you have
14083
     *                                     a word that is larger than the given width, it is broken apart.
14084
     *                                     </p>
14085
     * @param bool        $add_final_break [optional] <p>
14086
     *                                     If this flag is true, then the method will add a $break at the end
14087
     *                                     of the result string.
14088
     *                                     </p>
14089
     * @param string|null $delimiter       [optional] <p>
14090
     *                                     You can change the default behavior, where we split the string by newline.
14091
     *                                     </p>
14092
     *
14093
     * @psalm-pure
14094
     *
14095
     * @return string
14096
     */
14097 1
    public static function wordwrap_per_line(
14098
        string $str,
14099
        int $width = 75,
14100
        string $break = "\n",
14101
        bool $cut = false,
14102
        bool $add_final_break = true,
14103
        string $delimiter = null
14104
    ): string {
14105 1
        if ($delimiter === null) {
14106 1
            $strings = \preg_split('/\\r\\n|\\r|\\n/', $str);
14107
        } else {
14108 1
            $strings = \explode($delimiter, $str);
14109
        }
14110
14111 1
        $string_helper_array = [];
14112 1
        if ($strings !== false) {
14113 1
            foreach ($strings as $value) {
14114 1
                $string_helper_array[] = self::wordwrap($value, $width, $break, $cut);
14115
            }
14116
        }
14117
14118 1
        if ($add_final_break) {
14119 1
            $final_break = $break;
14120
        } else {
14121 1
            $final_break = '';
14122
        }
14123
14124 1
        return \implode($delimiter ?? "\n", $string_helper_array) . $final_break;
14125
    }
14126
14127
    /**
14128
     * Returns an array of Unicode White Space characters.
14129
     *
14130
     * @psalm-pure
14131
     *
14132
     * @return string[]
14133
     *                  <p>An array with numeric code point as key and White Space Character as value.</p>
14134
     */
14135 2
    public static function ws(): array
14136
    {
14137 2
        return self::$WHITESPACE;
14138
    }
14139
14140
    /**
14141
     * Checks whether the passed string contains only byte sequences that are valid UTF-8 characters.
14142
     *
14143
     * EXAMPLE: <code>
14144
     * UTF8::is_utf8_string('Iñtërnâtiônàlizætiøn']); // true
14145
     * //
14146
     * UTF8::is_utf8_string("Iñtërnâtiônàlizætiøn\xA0\xA1"); // false
14147
     * </code>
14148
     *
14149
     * @see          http://hsivonen.iki.fi/php-utf8/
14150
     *
14151
     * @param string $str    <p>The string to be checked.</p>
14152
     * @param bool   $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
14153
     *
14154
     * @psalm-pure
14155
     *
14156
     * @return bool
14157
     *
14158
     * @noinspection ReturnTypeCanBeDeclaredInspection
14159
     */
14160 110
    private static function is_utf8_string(string $str, bool $strict = false)
14161
    {
14162 110
        if ($str === '') {
14163 15
            return true;
14164
        }
14165
14166 103
        if ($strict) {
14167 2
            $is_binary = self::is_binary($str, true);
14168
14169 2
            if ($is_binary && self::is_utf16($str, false) !== false) {
14170 2
                return false;
14171
            }
14172
14173
            if ($is_binary && self::is_utf32($str, false) !== false) {
14174
                return false;
14175
            }
14176
        }
14177
14178 103
        if (self::$SUPPORT['pcre_utf8']) {
14179
            // If even just the first character can be matched, when the /u
14180
            // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
14181
            // invalid, nothing at all will match, even if the string contains
14182
            // some valid sequences
14183 103
            return \preg_match('/^./us', $str) === 1;
14184
        }
14185
14186 2
        $mState = 0; // cached expected number of octets after the current octet
14187
        // until the beginning of the next UTF8 character sequence
14188 2
        $mUcs4 = 0; // cached Unicode character
14189 2
        $mBytes = 1; // cached expected number of octets in the current sequence
14190
14191 2
        if (self::$ORD === null) {
14192
            self::$ORD = self::getData('ord');
14193
        }
14194
14195 2
        $len = \strlen($str);
14196
        /** @noinspection ForeachInvariantsInspection */
14197 2
        for ($i = 0; $i < $len; ++$i) {
14198 2
            $in = self::$ORD[$str[$i]];
14199
14200 2
            if ($mState === 0) {
14201
                // When mState is zero we expect either a US-ASCII character or a
14202
                // multi-octet sequence.
14203 2
                if ((0x80 & $in) === 0) {
14204
                    // US-ASCII, pass straight through.
14205 2
                    $mBytes = 1;
14206 2
                } elseif ((0xE0 & $in) === 0xC0) {
14207
                    // First octet of 2 octet sequence.
14208 2
                    $mUcs4 = $in;
14209 2
                    $mUcs4 = ($mUcs4 & 0x1F) << 6;
14210 2
                    $mState = 1;
14211 2
                    $mBytes = 2;
14212 2
                } elseif ((0xF0 & $in) === 0xE0) {
14213
                    // First octet of 3 octet sequence.
14214 2
                    $mUcs4 = $in;
14215 2
                    $mUcs4 = ($mUcs4 & 0x0F) << 12;
14216 2
                    $mState = 2;
14217 2
                    $mBytes = 3;
14218
                } elseif ((0xF8 & $in) === 0xF0) {
14219
                    // First octet of 4 octet sequence.
14220
                    $mUcs4 = $in;
14221
                    $mUcs4 = ($mUcs4 & 0x07) << 18;
14222
                    $mState = 3;
14223
                    $mBytes = 4;
14224
                } elseif ((0xFC & $in) === 0xF8) {
14225
                    /* First octet of 5 octet sequence.
14226
                     *
14227
                     * This is illegal because the encoded codepoint must be either
14228
                     * (a) not the shortest form or
14229
                     * (b) outside the Unicode range of 0-0x10FFFF.
14230
                     * Rather than trying to resynchronize, we will carry on until the end
14231
                     * of the sequence and let the later error handling code catch it.
14232
                     */
14233
                    $mUcs4 = $in;
14234
                    $mUcs4 = ($mUcs4 & 0x03) << 24;
14235
                    $mState = 4;
14236
                    $mBytes = 5;
14237
                } elseif ((0xFE & $in) === 0xFC) {
14238
                    // First octet of 6 octet sequence, see comments for 5 octet sequence.
14239
                    $mUcs4 = $in;
14240
                    $mUcs4 = ($mUcs4 & 1) << 30;
14241
                    $mState = 5;
14242
                    $mBytes = 6;
14243
                } else {
14244
                    // Current octet is neither in the US-ASCII range nor a legal first
14245
                    // octet of a multi-octet sequence.
14246 2
                    return false;
14247
                }
14248 2
            } elseif ((0xC0 & $in) === 0x80) {
14249
14250
                // When mState is non-zero, we expect a continuation of the multi-octet
14251
                // sequence
14252
14253
                // Legal continuation.
14254 2
                $shift = ($mState - 1) * 6;
14255 2
                $tmp = $in;
14256 2
                $tmp = ($tmp & 0x0000003F) << $shift;
14257 2
                $mUcs4 |= $tmp;
14258
                // Prefix: End of the multi-octet sequence. mUcs4 now contains the final
14259
                // Unicode code point to be output.
14260 2
                if (--$mState === 0) {
14261
                    // Check for illegal sequences and code points.
14262
                    //
14263
                    // From Unicode 3.1, non-shortest form is illegal
14264
                    if (
14265 2
                        ($mBytes === 2 && $mUcs4 < 0x0080)
14266
                        ||
14267 2
                        ($mBytes === 3 && $mUcs4 < 0x0800)
14268
                        ||
14269 2
                        ($mBytes === 4 && $mUcs4 < 0x10000)
14270
                        ||
14271 2
                        ($mBytes > 4)
14272
                        ||
14273
                        // From Unicode 3.2, surrogate characters are illegal.
14274 2
                        (($mUcs4 & 0xFFFFF800) === 0xD800)
14275
                        ||
14276
                        // Code points outside the Unicode range are illegal.
14277 2
                        ($mUcs4 > 0x10FFFF)
14278
                    ) {
14279
                        return false;
14280
                    }
14281
                    // initialize UTF8 cache
14282 2
                    $mState = 0;
14283 2
                    $mUcs4 = 0;
14284 2
                    $mBytes = 1;
14285
                }
14286
            } else {
14287
                // ((0xC0 & (*in) != 0x80) && (mState != 0))
14288
                // Incomplete multi-octet sequence.
14289
                return false;
14290
            }
14291
        }
14292
14293 2
        return $mState === 0;
14294
    }
14295
14296
    /**
14297
     * @param string $str
14298
     * @param bool   $use_lowercase      <p>Use uppercase by default, otherwise use lowercase.</p>
14299
     * @param bool   $use_full_case_fold <p>Convert not only common cases.</p>
14300
     *
14301
     * @psalm-pure
14302
     *
14303
     * @return string
14304
     *
14305
     * @noinspection ReturnTypeCanBeDeclaredInspection
14306
     */
14307 33
    private static function fixStrCaseHelper(
14308
        string $str,
14309
        bool $use_lowercase = false,
14310
        bool $use_full_case_fold = false
14311
    ) {
14312 33
        $upper = self::$COMMON_CASE_FOLD['upper'];
14313 33
        $lower = self::$COMMON_CASE_FOLD['lower'];
14314
14315 33
        if ($use_lowercase) {
14316 2
            $str = \str_replace(
14317 2
                $upper,
14318 2
                $lower,
14319 2
                $str
14320
            );
14321
        } else {
14322 31
            $str = \str_replace(
14323 31
                $lower,
14324 31
                $upper,
14325 31
                $str
14326
            );
14327
        }
14328
14329 33
        if ($use_full_case_fold) {
14330
            /**
14331
             * @psalm-suppress ImpureStaticVariable
14332
             *
14333
             * @var array<mixed>|null
14334
             */
14335 31
            static $FULL_CASE_FOLD = null;
14336 31
            if ($FULL_CASE_FOLD === null) {
14337 1
                $FULL_CASE_FOLD = self::getData('caseFolding_full');
14338
            }
14339
14340 31
            if ($use_lowercase) {
14341 2
                $str = \str_replace($FULL_CASE_FOLD[0], $FULL_CASE_FOLD[1], $str);
14342
            } else {
14343 29
                $str = \str_replace($FULL_CASE_FOLD[1], $FULL_CASE_FOLD[0], $str);
14344
            }
14345
        }
14346
14347 33
        return $str;
14348
    }
14349
14350
    /**
14351
     * get data from "/data/*.php"
14352
     *
14353
     * @param string $file
14354
     *
14355
     * @psalm-pure
14356
     *
14357
     * @return array
14358
     *
14359
     * @noinspection ReturnTypeCanBeDeclaredInspection
14360
     */
14361 6
    private static function getData(string $file)
14362
    {
14363
        /** @noinspection PhpIncludeInspection */
14364
        /** @noinspection UsingInclusionReturnValueInspection */
14365
        /** @psalm-suppress UnresolvableInclude */
14366 6
        return include __DIR__ . '/data/' . $file . '.php';
14367
    }
14368
14369
    /**
14370
     * @psalm-pure
14371
     *
14372
     * @return true|null
14373
     */
14374 12
    private static function initEmojiData()
14375
    {
14376 12
        if (self::$EMOJI_KEYS_CACHE === null) {
14377 1
            if (self::$EMOJI === null) {
14378 1
                self::$EMOJI = self::getData('emoji');
14379
            }
14380
14381
            /**
14382
             * @psalm-suppress ImpureFunctionCall - static sort function is used
14383
             */
14384 1
            \uksort(
14385 1
                self::$EMOJI,
14386
                static function (string $a, string $b): int {
14387 1
                    return \strlen($b) <=> \strlen($a);
14388 1
                }
14389
            );
14390
14391 1
            self::$EMOJI_KEYS_CACHE = \array_keys(self::$EMOJI);
14392 1
            self::$EMOJI_VALUES_CACHE = self::$EMOJI;
14393
14394 1
            foreach (self::$EMOJI_KEYS_CACHE as $key) {
14395 1
                $tmp_key = \crc32($key);
14396 1
                self::$EMOJI_KEYS_REVERSIBLE_CACHE[] = '_-_PORTABLE_UTF8_-_' . $tmp_key . '_-_' . \strrev((string) $tmp_key) . '_-_8FTU_ELBATROP_-_';
14397
            }
14398
14399 1
            return true;
14400
        }
14401
14402 12
        return null;
14403
    }
14404
14405
    /**
14406
     * Checks whether mbstring "overloaded" is active on the server.
14407
     *
14408
     * @psalm-pure
14409
     *
14410
     * @return bool
14411
     *
14412
     * @noinspection ReturnTypeCanBeDeclaredInspection
14413
     */
14414
    private static function mbstring_overloaded()
14415
    {
14416
        /**
14417
         * INI directive 'mbstring.func_overload' is deprecated since PHP 7.2
14418
         */
14419
14420
        /** @noinspection PhpComposerExtensionStubsInspection */
14421
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
14422
        return \defined('MB_OVERLOAD_STRING')
14423
               &&
14424
               ((int) @\ini_get('mbstring.func_overload') & \MB_OVERLOAD_STRING);
14425
    }
14426
14427
    /**
14428
     * @param array    $strings
14429
     * @param bool     $remove_empty_values
14430
     * @param int|null $remove_short_values
14431
     *
14432
     * @psalm-pure
14433
     *
14434
     * @return array
14435
     *
14436
     * @noinspection ReturnTypeCanBeDeclaredInspection
14437
     */
14438 2
    private static function reduce_string_array(
14439
        array $strings,
14440
        bool $remove_empty_values,
14441
        int $remove_short_values = null
14442
    ) {
14443
        // init
14444 2
        $return = [];
14445
14446 2
        foreach ($strings as &$str) {
14447
            if (
14448 2
                $remove_short_values !== null
14449
                &&
14450 2
                \mb_strlen($str) <= $remove_short_values
14451
            ) {
14452 2
                continue;
14453
            }
14454
14455
            if (
14456 2
                $remove_empty_values
14457
                &&
14458 2
                \trim($str) === ''
14459
            ) {
14460 2
                continue;
14461
            }
14462
14463 2
            $return[] = $str;
14464
        }
14465
14466 2
        return $return;
14467
    }
14468
14469
    /**
14470
     * rxClass
14471
     *
14472
     * @param string $s
14473
     * @param string $class
14474
     *
14475
     * @psalm-pure
14476
     *
14477
     * @return string
14478
     *
14479
     * @noinspection ReturnTypeCanBeDeclaredInspection
14480
     */
14481 33
    private static function rxClass(string $s, string $class = '')
14482
    {
14483
        /**
14484
         * @psalm-suppress ImpureStaticVariable
14485
         *
14486
         * @var array<string,string>
14487
         */
14488 33
        static $RX_CLASS_CACHE = [];
14489
14490 33
        $cache_key = $s . '_' . $class;
14491
14492 33
        if (isset($RX_CLASS_CACHE[$cache_key])) {
14493 21
            return $RX_CLASS_CACHE[$cache_key];
14494
        }
14495
14496
        /** @var string[] $class_array */
14497 16
        $class_array[] = $class;
0 ignored issues
show
Comprehensibility Best Practice introduced by
$class_array was never initialized. Although not strictly required by PHP, it is generally a good practice to add $class_array = array(); before regardless.
Loading history...
14498
14499
        /** @noinspection SuspiciousLoopInspection */
14500
        /** @noinspection AlterInForeachInspection */
14501 16
        foreach (self::str_split($s) as &$s) {
0 ignored issues
show
Bug introduced by
The expression self::str_split($s) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
14502 15
            if ($s === '-') {
14503
                $class_array[0] = '-' . $class_array[0];
14504 15
            } elseif (!isset($s[2])) {
14505 15
                $class_array[0] .= \preg_quote($s, '/');
14506 1
            } elseif (self::strlen($s) === 1) {
14507 1
                $class_array[0] .= $s;
14508
            } else {
14509 15
                $class_array[] = $s;
14510
            }
14511
        }
14512
14513 16
        if ($class_array[0]) {
14514 16
            $class_array[0] = '[' . $class_array[0] . ']';
14515
        }
14516
14517 16
        if (\count($class_array) === 1) {
14518 16
            $return = $class_array[0];
14519
        } else {
14520
            $return = '(?:' . \implode('|', $class_array) . ')';
14521
        }
14522
14523 16
        $RX_CLASS_CACHE[$cache_key] = $return;
14524
14525 16
        return $return;
14526
    }
14527
14528
    /**
14529
     * Personal names such as "Marcus Aurelius" are sometimes typed incorrectly using lowercase ("marcus aurelius").
14530
     *
14531
     * @param string $names
14532
     * @param string $delimiter
14533
     * @param string $encoding
14534
     *
14535
     * @psalm-pure
14536
     *
14537
     * @return string
14538
     *
14539
     * @noinspection ReturnTypeCanBeDeclaredInspection
14540
     */
14541 1
    private static function str_capitalize_name_helper(
14542
        string $names,
14543
        string $delimiter,
14544
        string $encoding = 'UTF-8'
14545
    ) {
14546
        // init
14547 1
        $name_helper_array = \explode($delimiter, $names);
14548 1
        if ($name_helper_array === false) {
14549
            return '';
14550
        }
14551
14552
        $special_cases = [
14553 1
            'names' => [
14554
                'ab',
14555
                'af',
14556
                'al',
14557
                'and',
14558
                'ap',
14559
                'bint',
14560
                'binte',
14561
                'da',
14562
                'de',
14563
                'del',
14564
                'den',
14565
                'der',
14566
                'di',
14567
                'dit',
14568
                'ibn',
14569
                'la',
14570
                'mac',
14571
                'nic',
14572
                'of',
14573
                'ter',
14574
                'the',
14575
                'und',
14576
                'van',
14577
                'von',
14578
                'y',
14579
                'zu',
14580
            ],
14581
            'prefixes' => [
14582
                'al-',
14583
                "d'",
14584
                'ff',
14585
                "l'",
14586
                'mac',
14587
                'mc',
14588
                'nic',
14589
            ],
14590
        ];
14591
14592 1
        foreach ($name_helper_array as &$name) {
14593 1
            if (\in_array($name, $special_cases['names'], true)) {
14594 1
                continue;
14595
            }
14596
14597 1
            $continue = false;
14598
14599 1
            if ($delimiter === '-') {
14600
                /** @noinspection AlterInForeachInspection */
14601 1
                foreach ((array) $special_cases['names'] as &$beginning) {
14602 1
                    if (\strncmp($name, $beginning, \strlen($beginning)) === 0) {
14603 1
                        $continue = true;
14604
14605 1
                        break;
14606
                    }
14607
                }
14608
            }
14609
14610
            /** @noinspection AlterInForeachInspection */
14611 1
            foreach ((array) $special_cases['prefixes'] as &$beginning) {
14612 1
                if (\strncmp($name, $beginning, \strlen($beginning)) === 0) {
14613 1
                    $continue = true;
14614
14615 1
                    break;
14616
                }
14617
            }
14618
14619 1
            if ($continue) {
14620 1
                continue;
14621
            }
14622
14623 1
            $name = self::ucfirst($name);
14624
        }
14625
14626 1
        return \implode($delimiter, $name_helper_array);
14627
    }
14628
14629
    /**
14630
     * Generic case-sensitive transformation for collation matching.
14631
     *
14632
     * @param string $str <p>The input string</p>
14633
     *
14634
     * @psalm-pure
14635
     *
14636
     * @return string|null
14637
     */
14638 6
    private static function strtonatfold(string $str)
14639
    {
14640
        /** @noinspection PhpUndefinedClassInspection */
14641 6
        return \preg_replace(
14642 6
            '/\p{Mn}+/u',
14643 6
            '',
14644 6
            \Normalizer::normalize($str, \Normalizer::NFD)
14645
        );
14646
    }
14647
14648
    /**
14649
     * @param int|string $input
14650
     *
14651
     * @psalm-pure
14652
     *
14653
     * @return string
14654
     *
14655
     * @noinspection ReturnTypeCanBeDeclaredInspection
14656
     * @noinspection SuspiciousBinaryOperationInspection
14657
     */
14658 32
    private static function to_utf8_convert_helper($input)
14659
    {
14660
        // init
14661 32
        $buf = '';
14662
14663 32
        if (self::$ORD === null) {
14664 1
            self::$ORD = self::getData('ord');
14665
        }
14666
14667 32
        if (self::$CHR === null) {
14668 1
            self::$CHR = self::getData('chr');
14669
        }
14670
14671 32
        if (self::$WIN1252_TO_UTF8 === null) {
14672 1
            self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
14673
        }
14674
14675 32
        $ordC1 = self::$ORD[$input];
14676 32
        if (isset(self::$WIN1252_TO_UTF8[$ordC1])) { // found in Windows-1252 special cases
14677 32
            $buf .= self::$WIN1252_TO_UTF8[$ordC1];
14678
        } else {
14679
            /** @noinspection OffsetOperationsInspection */
14680 1
            $cc1 = self::$CHR[$ordC1 / 64] | "\xC0";
14681 1
            $cc2 = ((string) $input & "\x3F") | "\x80";
0 ignored issues
show
Bug introduced by
Are you sure you want to use the bitwise | or did you mean ||?
Loading history...
Bug introduced by
Are you sure you want to use the bitwise & or did you mean &&?
Loading history...
14682 1
            $buf .= $cc1 . $cc2;
14683
        }
14684
14685 32
        return $buf;
14686
    }
14687
14688
    /**
14689
     * @param string $str
14690
     *
14691
     * @psalm-pure
14692
     *
14693
     * @return string
14694
     *
14695
     * @noinspection ReturnTypeCanBeDeclaredInspection
14696
     */
14697 10
    private static function urldecode_unicode_helper(string $str)
14698
    {
14699 10
        $pattern = '/%u([0-9a-fA-F]{3,4})/';
14700 10
        if (\preg_match($pattern, $str)) {
14701 7
            $str = (string) \preg_replace($pattern, '&#x\\1;', $str);
14702
        }
14703
14704 10
        return $str;
14705
    }
14706
}
14707