UTF8::between()   B
last analyzed

Complexity

Conditions 8
Paths 6

Size

Total Lines 48
Code Lines 28

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 22
CRAP Score 8

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 8
eloc 28
nc 6
nop 5
dl 0
loc 48
ccs 22
cts 22
cp 1
crap 8
rs 8.4444
c 1
b 0
f 0
1
<?php
2
3
declare(strict_types=1);
4
5
namespace voku\helper;
6
7
/**
8
 * ## 🇷🇺 Русским гражданам
9
 * В Украине сейчас идет война. Силами РФ наносятся удары по гражданской инфраструктуре в [Харькове][1], [Киеве][2], [Чернигове][3], [Сумах][4], [Ирпене][5] и десятках других городов. Гибнут люди - и гражданское население, и военные, в том числе российские призывники, которых бросили воевать. Чтобы лишить собственный народ доступа к информации, правительство РФ запретило называть войну войной, закрыло независимые СМИ и принимает сейчас ряд диктаторских законов. Эти законы призваны заткнуть рот всем, кто против войны. За обычный призыв к миру сейчас можно получить несколько лет тюрьмы.
10
 * Не молчите! Молчание - знак вашего согласия с политикой российского правительства.
11
 *  **Вы можете сделать выбор НЕ МОЛЧАТЬ.**
12
 * ---
13
 * ## 🇺🇸 To people of Russia
14
 * There is a war in Ukraine right now. The forces of the Russian Federation are attacking civilian infrastructure in [Kharkiv][1], [Kyiv][2], [Chernihiv][3], [Sumy][4], [Irpin][5] and dozens of other cities. People are dying – both civilians and military servicemen, including Russian conscripts who were thrown into the fighting. In order to deprive its own people of access to information, the government of the Russian Federation has forbidden calling a war a war, shut down independent media and is passing a number of dictatorial laws. These laws are meant to silence all those who are against war. You can be jailed for multiple years for simply calling for peace.
15
 * Do not be silent! Silence is a sign that you accept the Russian government's policy.
16
 * **You can choose NOT TO BE SILENT.**
17
 * ---
18
 * - [1] https://cloudfront-us-east-2.images.arcpublishing.com/reuters/P7K2MSZDGFMIJPDD7CI2GIROJI.jpg "Kharkiv under attack"
19
 * - [2] https://gdb.voanews.com/01bd0000-0aff-0242-fad0-08d9fc92c5b3_cx0_cy5_cw0_w1023_r1_s.jpg "Kyiv under attack"
20
 * - [3] https://ichef.bbci.co.uk/news/976/cpsprodpb/163DD/production/_123510119_hi074310744.jpg "Chernihiv under attack"
21
 * - [4] https://www.youtube.com/watch?v=8K-bkqKKf2A "Sumy under attack"
22
 * - [5] https://cloudfront-us-east-2.images.arcpublishing.com/reuters/K4MTMLEHTRKGFK3GSKAT4GR3NE.jpg "Irpin under attack"
23
 *
24
 * @psalm-immutable
25
 */
26
final class UTF8
27
{
28
    /**
29
     * Bom => Byte-Length
30
     *
31
     * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
32
     *
33
     * @var array<string, int>
34
     */
35
    private static $BOM = [
36
        "\xef\xbb\xbf"     => 3, // UTF-8 BOM
37
        ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
38
        "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
39
        '  þÿ'             => 6, // UTF-32 (BE) BOM as "WINDOWS-1252"
40
        "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
41
        'ÿþ  '             => 6, // UTF-32 (LE) BOM as "WINDOWS-1252"
42
        "\xfe\xff"         => 2, // UTF-16 (BE) BOM
43
        'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
44
        "\xff\xfe"         => 2, // UTF-16 (LE) BOM
45
        'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
46
    ];
47
48
    /**
49
     * Numeric code point => UTF-8 Character
50
     *
51
     * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
52
     *
53
     * @var array<int, string>
54
     */
55
    private static $WHITESPACE = [
56
        // NULL Byte
57
        0 => "\x0",
58
        // Tab
59
        9 => "\x9",
60
        // New Line
61
        10 => "\xa",
62
        // Vertical Tab
63
        11 => "\xb",
64
        // Carriage Return
65
        13 => "\xd",
66
        // Ordinary Space
67
        32 => "\x20",
68
        // NO-BREAK SPACE
69
        160 => "\xc2\xa0",
70
        // OGHAM SPACE MARK
71
        5760 => "\xe1\x9a\x80",
72
        // MONGOLIAN VOWEL SEPARATOR
73
        6158 => "\xe1\xa0\x8e",
74
        // EN QUAD
75
        8192 => "\xe2\x80\x80",
76
        // EM QUAD
77
        8193 => "\xe2\x80\x81",
78
        // EN SPACE
79
        8194 => "\xe2\x80\x82",
80
        // EM SPACE
81
        8195 => "\xe2\x80\x83",
82
        // THREE-PER-EM SPACE
83
        8196 => "\xe2\x80\x84",
84
        // FOUR-PER-EM SPACE
85
        8197 => "\xe2\x80\x85",
86
        // SIX-PER-EM SPACE
87
        8198 => "\xe2\x80\x86",
88
        // FIGURE SPACE
89
        8199 => "\xe2\x80\x87",
90
        // PUNCTUATION SPACE
91
        8200 => "\xe2\x80\x88",
92
        // THIN SPACE
93
        8201 => "\xe2\x80\x89",
94
        // HAIR SPACE
95
        8202 => "\xe2\x80\x8a",
96
        // LINE SEPARATOR
97
        8232 => "\xe2\x80\xa8",
98
        // PARAGRAPH SEPARATOR
99
        8233 => "\xe2\x80\xa9",
100
        // NARROW NO-BREAK SPACE
101
        8239 => "\xe2\x80\xaf",
102
        // MEDIUM MATHEMATICAL SPACE
103
        8287 => "\xe2\x81\x9f",
104
        // HALFWIDTH HANGUL FILLER
105
        65440 => "\xef\xbe\xa0",
106
        // IDEOGRAPHIC SPACE
107
        12288 => "\xe3\x80\x80",
108
    ];
109
110
    /**
111
     * @var array<string, string>
112
     */
113
    private static $WHITESPACE_TABLE = [
114
        'SPACE'                     => "\x20",
115
        'NO-BREAK SPACE'            => "\xc2\xa0",
116
        'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
117
        'EN QUAD'                   => "\xe2\x80\x80",
118
        'EM QUAD'                   => "\xe2\x80\x81",
119
        'EN SPACE'                  => "\xe2\x80\x82",
120
        'EM SPACE'                  => "\xe2\x80\x83",
121
        'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
122
        'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
123
        'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
124
        'FIGURE SPACE'              => "\xe2\x80\x87",
125
        'PUNCTUATION SPACE'         => "\xe2\x80\x88",
126
        'THIN SPACE'                => "\xe2\x80\x89",
127
        'HAIR SPACE'                => "\xe2\x80\x8a",
128
        'LINE SEPARATOR'            => "\xe2\x80\xa8",
129
        'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
130
        'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
131
        'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
132
        'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
133
        'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
134
        'HALFWIDTH HANGUL FILLER'   => "\xef\xbe\xa0",
135
    ];
136
137
    /**
138
     * @var array
139
     *
140
     * @phpstan-var array{upper: string[], lower: string[]}
141
     */
142
    private static $COMMON_CASE_FOLD = [
143
        'upper' => [
144
            'µ',
145
            'ſ',
146
            "\xCD\x85",
147
            'ς',
148
            'ẞ',
149
            "\xCF\x90",
150
            "\xCF\x91",
151
            "\xCF\x95",
152
            "\xCF\x96",
153
            "\xCF\xB0",
154
            "\xCF\xB1",
155
            "\xCF\xB5",
156
            "\xE1\xBA\x9B",
157
            "\xE1\xBE\xBE",
158
        ],
159
        'lower' => [
160
            'μ',
161
            's',
162
            'ι',
163
            'σ',
164
            'ß',
165
            'β',
166
            'θ',
167
            'φ',
168
            'π',
169
            'κ',
170
            'ρ',
171
            'ε',
172
            "\xE1\xB9\xA1",
173
            'ι',
174
        ],
175
    ];
176
177
    /**
178
     * @var array
179
     *
180
     * @phpstan-var array<string, mixed>
181
     */
182
    private static $SUPPORT = [];
183
184
    /**
185
     * @var string[]|null
186
     *
187
     * @phpstan-var array<string, string>|null
188
     */
189
    private static $BROKEN_UTF8_FIX;
190
191
    /**
192
     * @var string[]|null
193
     *
194
     * @phpstan-var array<int, string>|null
195
     */
196
    private static $WIN1252_TO_UTF8;
197
198
    /**
199
     * @var string[]|null
200
     *
201
     * @phpstan-var array<int ,string>|null
202
     */
203
    private static $INTL_TRANSLITERATOR_LIST;
204
205
    /**
206
     * @var string[]|null
207
     *
208
     * @phpstan-var array<string>|null
209
     */
210
    private static $ENCODINGS;
211
212
    /**
213
     * @var int[]|null
214
     *
215
     * @phpstan-var array<string ,int>|null
216
     */
217
    private static $ORD;
218
219
    /**
220
     * @var string[]|null
221
     *
222
     * @phpstan-var array<string, string>|null
223
     */
224
    private static $EMOJI;
225
226
    /**
227
     * @var string[]|null
228
     *
229
     * @phpstan-var array<string>|null
230
     */
231
    private static $EMOJI_VALUES_CACHE;
232
233
    /**
234
     * @var string[]|null
235
     *
236
     * @phpstan-var array<string>|null
237
     */
238
    private static $EMOJI_KEYS_CACHE;
239
240
    /**
241
     * @var string[]|null
242
     *
243
     * @phpstan-var array<string>|null
244
     */
245
    private static $EMOJI_KEYS_REVERSIBLE_CACHE;
246
247
    /**
248
     * @var string[]|null
249
     *
250
     * @phpstan-var array<int, string>|null
251
     */
252
    private static $CHR;
253
254
    /**
255
     * __construct()
256
     */
257
    public function __construct()
258
    {
259
    }
260
261
    /**
262
     * Return the character at the specified position: $str[1] like functionality.
263
     *
264
     * EXAMPLE: <code>UTF8::access('fòô', 1); // 'ò'</code>
265
     *
266
     * @param string $str      <p>A UTF-8 string.</p>
267
     * @param int    $pos      <p>The position of character to return.</p>
268
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
269
     *
270
     * @psalm-pure
271
     *
272
     * @return string
273
     *                <p>Single multi-byte character.</p>
274
     */
275 3
    public static function access(string $str, int $pos, string $encoding = 'UTF-8'): string
276
    {
277 3
        if ($str === '' || $pos < 0) {
278 2
            return '';
279
        }
280
281 3
        if ($encoding === 'UTF-8') {
282 3
            return (string) \mb_substr($str, $pos, 1);
283
        }
284
285
        return (string) self::substr($str, $pos, 1, $encoding);
286
    }
287
288
    /**
289
     * Prepends UTF-8 BOM character to the string and returns the whole string.
290
     *
291
     * INFO: If BOM already existed there, the Input string is returned.
292
     *
293
     * EXAMPLE: <code>UTF8::add_bom_to_string('fòô'); // "\xEF\xBB\xBF" . 'fòô'</code>
294
     *
295
     * @param string $str <p>The input string.</p>
296
     *
297
     * @psalm-pure
298
     *
299
     * @return string
300
     *                <p>The output string that contains BOM.</p>
301
     */
302 2
    public static function add_bom_to_string(string $str): string
303
    {
304 2
        if (!self::string_has_bom($str)) {
305 2
            $str = self::bom() . $str;
306
        }
307
308 2
        return $str;
309
    }
310
311
    /**
312
     * Changes all keys in an array.
313
     *
314
     * @param array<string, mixed> $array    <p>The array to work on</p>
315
     * @param int                  $case     [optional] <p> Either <strong>CASE_UPPER</strong><br>
316
     *                                       or <strong>CASE_LOWER</strong> (default)</p>
317
     * @param string               $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
318
     *
319
     * @psalm-pure
320
     *
321
     * @return string[]
322
     *                  <p>An array with its keys lower- or uppercased.</p>
323
     */
324 2
    public static function array_change_key_case(
325
        array $array,
326
        int $case = \CASE_LOWER,
327
        string $encoding = 'UTF-8'
328
    ): array {
329
        if (
330 2
            $case !== \CASE_LOWER
331
            &&
332 2
            $case !== \CASE_UPPER
333
        ) {
334
            $case = \CASE_LOWER;
335
        }
336
337 2
        $return = [];
338 2
        foreach ($array as $key => &$value) {
339 2
            $key = $case === \CASE_LOWER
340 2
                ? self::strtolower($key, $encoding)
341 2
                : self::strtoupper($key, $encoding);
342
343 2
            $return[$key] = $value;
344
        }
345
346 2
        return $return;
347
    }
348
349
    /**
350
     * Returns the substring between $start and $end, if found, or an empty
351
     * string. An optional offset may be supplied from which to begin the
352
     * search for the start string.
353
     *
354
     * @param string $str
355
     * @param string $start    <p>Delimiter marking the start of the substring.</p>
356
     * @param string $end      <p>Delimiter marking the end of the substring.</p>
357
     * @param int    $offset   [optional] <p>Index from which to begin the search. Default: 0</p>
358
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
359
     *
360
     * @psalm-pure
361
     *
362
     * @return string
363
     */
364 16
    public static function between(
365
        string $str,
366
        string $start,
367
        string $end,
368
        int $offset = 0,
369
        string $encoding = 'UTF-8'
370
    ): string {
371 16
        if ($encoding === 'UTF-8') {
372 8
            $start_position = \mb_strpos($str, $start, $offset);
373 8
            if ($start_position === false) {
374 1
                return '';
375
            }
376
377 7
            $substr_index = $start_position + (int) \mb_strlen($start);
378 7
            $end_position = \mb_strpos($str, $end, $substr_index);
379
            if (
380 7
                $end_position === false
381
                ||
382 7
                $end_position === $substr_index
383
            ) {
384 2
                return '';
385
            }
386
387 5
            return (string) \mb_substr($str, $substr_index, $end_position - $substr_index);
388
        }
389
390 8
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
391
392 8
        $start_position = self::strpos($str, $start, $offset, $encoding);
393 8
        if ($start_position === false) {
394 1
            return '';
395
        }
396
397 7
        $substr_index = $start_position + (int) self::strlen($start, $encoding);
398 7
        $end_position = self::strpos($str, $end, $substr_index, $encoding);
399
        if (
400 7
            $end_position === false
401
            ||
402 7
            $end_position === $substr_index
403
        ) {
404 2
            return '';
405
        }
406
407 5
        return (string) self::substr(
408
            $str,
409
            $substr_index,
410 5
            $end_position - $substr_index,
411
            $encoding
412
        );
413
    }
414
415
    /**
416
     * Convert binary into a string.
417
     *
418
     * INFO: opposite to UTF8::str_to_binary()
419
     *
420
     * EXAMPLE: <code>UTF8::binary_to_str('11110000100111111001100010000011'); // '😃'</code>
421
     *
422
     * @param string $bin 1|0
423
     *
424
     * @psalm-pure
425
     *
426
     * @return string
427
     */
428 2
    public static function binary_to_str($bin): string
429
    {
430 2
        if (!isset($bin[0])) {
431
            return '';
432
        }
433
434 2
        $convert = \base_convert($bin, 2, 16);
435 2
        if ($convert === '0') {
436 1
            return '';
437
        }
438
439 2
        return \pack('H*', $convert);
440
    }
441
442
    /**
443
     * Returns the UTF-8 Byte Order Mark Character.
444
     *
445
     * INFO: take a look at UTF8::$bom for e.g. UTF-16 and UTF-32 BOM values
446
     *
447
     * EXAMPLE: <code>UTF8::bom(); // "\xEF\xBB\xBF"</code>
448
     *
449
     * @psalm-pure
450
     *
451
     * @return string
452
     *                <p>UTF-8 Byte Order Mark.</p>
453
     */
454 4
    public static function bom(): string
455
    {
456 4
        return "\xef\xbb\xbf";
457
    }
458
459
    /**
460
     * @alias of UTF8::chr_map()
461
     *
462
     * @param callable $callback
463
     * @param string   $str
464
     *
465
     * @psalm-pure
466
     *
467
     * @return string[]
468
     *
469
     * @see   UTF8::chr_map()
470
     */
471 2
    public static function callback($callback, string $str): array
472
    {
473 2
        return self::chr_map($callback, $str);
474
    }
475
476
    /**
477
     * Returns the character at $index, with indexes starting at 0.
478
     *
479
     * @param string $str      <p>The input string.</p>
480
     * @param int    $index    <p>Position of the character.</p>
481
     * @param string $encoding [optional] <p>Default is UTF-8</p>
482
     *
483
     * @psalm-pure
484
     *
485
     * @return string
486
     *                <p>The character at $index.</p>
487
     */
488 9
    public static function char_at(string $str, int $index, string $encoding = 'UTF-8'): string
489
    {
490 9
        if ($encoding === 'UTF-8') {
491 5
            return (string) \mb_substr($str, $index, 1);
492
        }
493
494 4
        return (string) self::substr($str, $index, 1, $encoding);
495
    }
496
497
    /**
498
     * Returns an array consisting of the characters in the string.
499
     *
500
     * @param string $str <p>The input string.</p>
501
     *
502
     * @psalm-pure
503
     *
504
     * @return string[]
505
     *                  <p>An array of chars.</p>
506
     */
507 4
    public static function chars(string $str): array
508
    {
509
        /** @var string[] */
510 4
        return self::str_split($str);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::str_split($str) returns an array which contains values of type string[] which are incompatible with the documented value type string.
Loading history...
511
    }
512
513
    /**
514
     * This method will auto-detect your server environment for UTF-8 support.
515
     *
516
     * @return true|null
517
     *
518
     * @internal <p>You don't need to run it manually, it will be triggered if it's needed.</p>
519
     */
520 4
    public static function checkForSupport()
521
    {
522 4
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
523
            self::$SUPPORT['already_checked_via_portable_utf8'] = true;
524
525
            // http://php.net/manual/en/book.mbstring.php
526
            self::$SUPPORT['mbstring'] = self::mbstring_loaded();
527
528
            self::$SUPPORT['mbstring_func_overload'] = self::mbstring_overloaded();
529
            if (self::$SUPPORT['mbstring'] === true) {
530
                \mb_internal_encoding('UTF-8');
531
                \mb_regex_encoding('UTF-8');
532
                self::$SUPPORT['mbstring_internal_encoding'] = 'UTF-8';
533
            }
534
535
            // http://php.net/manual/en/book.iconv.php
536
            self::$SUPPORT['iconv'] = self::iconv_loaded();
537
538
            // http://php.net/manual/en/book.intl.php
539
            self::$SUPPORT['intl'] = self::intl_loaded();
540
541
            // http://php.net/manual/en/class.intlchar.php
542
            self::$SUPPORT['intlChar'] = self::intlChar_loaded();
543
544
            // http://php.net/manual/en/book.ctype.php
545
            self::$SUPPORT['ctype'] = self::ctype_loaded();
546
547
            // http://php.net/manual/en/class.finfo.php
548
            self::$SUPPORT['finfo'] = self::finfo_loaded();
549
550
            // http://php.net/manual/en/book.json.php
551
            self::$SUPPORT['json'] = self::json_loaded();
552
553
            // http://php.net/manual/en/book.pcre.php
554
            self::$SUPPORT['pcre_utf8'] = self::pcre_utf8_support();
555
556
            self::$SUPPORT['symfony_polyfill_used'] = self::symfony_polyfill_used();
557
            if (self::$SUPPORT['symfony_polyfill_used'] === true) {
558
                \mb_internal_encoding('UTF-8');
559
                self::$SUPPORT['mbstring_internal_encoding'] = 'UTF-8';
560
            }
561
562
            return true;
563
        }
564
565 4
        return null;
566
    }
567
568
    /**
569
     * Generates a UTF-8 encoded character from the given code point.
570
     *
571
     * INFO: opposite to UTF8::ord()
572
     *
573
     * EXAMPLE: <code>UTF8::chr(0x2603); // '☃'</code>
574
     *
575
     * @param int    $code_point <p>The code point for which to generate a character.</p>
576
     * @param string $encoding   [optional] <p>Default is UTF-8</p>
577
     *
578
     * @psalm-pure
579
     *
580
     * @return string|null
581
     *                     <p>Multi-byte character, returns null on failure or empty input.</p>
582
     */
583 21
    public static function chr($code_point, string $encoding = 'UTF-8')
584
    {
585
        // init
586
        /**
587
         * @psalm-suppress ImpureStaticVariable
588
         *
589
         * @var array<string,string>
590
         */
591 21
        static $CHAR_CACHE = [];
592
593 21
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
594 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
595
        }
596
597
        if (
598 21
            $encoding !== 'UTF-8'
599
            &&
600 21
            $encoding !== 'ISO-8859-1'
601
            &&
602 21
            $encoding !== 'WINDOWS-1252'
603
            &&
604 21
            self::$SUPPORT['mbstring'] === false
605
        ) {
606
            /**
607
             * @psalm-suppress ImpureFunctionCall - is is only a warning
608
             */
609
            \trigger_error('UTF8::chr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
610
        }
611
612 21
        if (!\is_int($code_point) || $code_point <= 0) {
0 ignored issues
show
introduced by
The condition is_int($code_point) is always true.
Loading history...
613 5
            return null;
614
        }
615
616 21
        $cache_key = $code_point . '_' . $encoding;
617 21
        if (isset($CHAR_CACHE[$cache_key])) {
618 19
            return $CHAR_CACHE[$cache_key];
619
        }
620
621 10
        if ($code_point <= 0x80) { // only for "simple"-chars
622
623 9
            if (self::$CHR === null) {
624 1
                self::$CHR = self::getData('chr');
625
            }
626
627
            /**
628
             * @psalm-suppress PossiblyNullArrayAccess
629
             */
630 9
            $chr = self::$CHR[$code_point];
631
632 9
            if ($encoding !== 'UTF-8') {
633 1
                $chr = self::encode($encoding, $chr);
634
            }
635
636 9
            return $CHAR_CACHE[$cache_key] = $chr;
637
        }
638
639
        //
640
        // fallback via "IntlChar"
641
        //
642
643 6
        if (self::$SUPPORT['intlChar'] === true) {
644 6
            $chr = \IntlChar::chr($code_point);
645
646 6
            if ($encoding !== 'UTF-8') {
647
                $chr = self::encode($encoding, $chr);
648
            }
649
650 6
            return $CHAR_CACHE[$cache_key] = $chr;
651
        }
652
653
        //
654
        // fallback via vanilla php
655
        //
656
657
        if (self::$CHR === null) {
658
            self::$CHR = self::getData('chr');
659
        }
660
661
        $code_point = (int) $code_point;
662
        if ($code_point <= 0x7FF) {
663
            /**
664
             * @psalm-suppress PossiblyNullArrayAccess
665
             */
666
            $chr = self::$CHR[($code_point >> 6) + 0xC0] .
667
                   self::$CHR[($code_point & 0x3F) + 0x80];
668
        } elseif ($code_point <= 0xFFFF) {
669
            /**
670
             * @psalm-suppress PossiblyNullArrayAccess
671
             */
672
            $chr = self::$CHR[($code_point >> 12) + 0xE0] .
673
                   self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
674
                   self::$CHR[($code_point & 0x3F) + 0x80];
675
        } else {
676
            /**
677
             * @psalm-suppress PossiblyNullArrayAccess
678
             */
679
            $chr = self::$CHR[($code_point >> 18) + 0xF0] .
680
                   self::$CHR[(($code_point >> 12) & 0x3F) + 0x80] .
681
                   self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
682
                   self::$CHR[($code_point & 0x3F) + 0x80];
683
        }
684
685
        if ($encoding !== 'UTF-8') {
686
            $chr = self::encode($encoding, $chr);
687
        }
688
689
        return $CHAR_CACHE[$cache_key] = $chr;
690
    }
691
692
    /**
693
     * Applies callback to all characters of a string.
694
     *
695
     * EXAMPLE: <code>UTF8::chr_map([UTF8::class, 'strtolower'], 'Κόσμε'); // ['κ','ό', 'σ', 'μ', 'ε']</code>
696
     *
697
     * @param callable $callback <p>The callback function.</p>
698
     * @param string   $str      <p>UTF-8 string to run callback on.</p>
699
     *
700
     * @psalm-pure
701
     *
702
     * @return string[]
703
     *                  <p>The outcome of the callback, as array.</p>
704
     */
705 2
    public static function chr_map($callback, string $str): array
706
    {
707 2
        return \array_map(
708
            $callback,
709 2
            self::str_split($str)
710
        );
711
    }
712
713
    /**
714
     * Generates an array of byte length of each character of a Unicode string.
715
     *
716
     * 1 byte => U+0000  - U+007F
717
     * 2 byte => U+0080  - U+07FF
718
     * 3 byte => U+0800  - U+FFFF
719
     * 4 byte => U+10000 - U+10FFFF
720
     *
721
     * EXAMPLE: <code>UTF8::chr_size_list('中文空白-test'); // [3, 3, 3, 3, 1, 1, 1, 1, 1]</code>
722
     *
723
     * @param string $str <p>The original unicode string.</p>
724
     *
725
     * @psalm-pure
726
     *
727
     * @return int[]
728
     *               <p>An array of byte lengths of each character.</p>
729
     */
730 4
    public static function chr_size_list(string $str): array
731
    {
732 4
        if ($str === '') {
733 4
            return [];
734
        }
735
736 4
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
737
            return \array_map(
738
                static function (string $data): int {
739
                    // "mb_" is available if overload is used, so use it ...
740
                    return \mb_strlen($data, 'CP850'); // 8-BIT
741
                },
742
                self::str_split($str)
743
            );
744
        }
745
746 4
        return \array_map('\strlen', self::str_split($str));
747
    }
748
749
    /**
750
     * Get a decimal code representation of a specific character.
751
     *
752
     * INFO: opposite to UTF8::decimal_to_chr()
753
     *
754
     * EXAMPLE: <code>UTF8::chr_to_decimal('§'); // 0xa7</code>
755
     *
756
     * @param string $char <p>The input character.</p>
757
     *
758
     * @psalm-pure
759
     *
760
     * @return int
761
     */
762 5
    public static function chr_to_decimal(string $char): int
763
    {
764 5
        if (self::$SUPPORT['iconv'] === true) {
765 5
            $chr_tmp = \iconv('UTF-8', 'UCS-4LE', $char);
766 5
            if ($chr_tmp !== false) {
767
                /** @phpstan-ignore-next-line - "unpack": only false if the format string contains errors */
768 5
                return \unpack('V', $chr_tmp)[1];
769
            }
770
        }
771
772
        $code = self::ord($char[0]);
773
        $bytes = 1;
774
775
        if (!($code & 0x80)) {
776
            // 0xxxxxxx
777
            return $code;
778
        }
779
780
        if (($code & 0xe0) === 0xc0) {
781
            // 110xxxxx
782
            $bytes = 2;
783
            $code &= ~0xc0;
784
        } elseif (($code & 0xf0) === 0xe0) {
785
            // 1110xxxx
786
            $bytes = 3;
787
            $code &= ~0xe0;
788
        } elseif (($code & 0xf8) === 0xf0) {
789
            // 11110xxx
790
            $bytes = 4;
791
            $code &= ~0xf0;
792
        }
793
794
        for ($i = 2; $i <= $bytes; ++$i) {
795
            // 10xxxxxx
796
            $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
797
        }
798
799
        return $code;
800
    }
801
802
    /**
803
     * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
804
     *
805
     * EXAMPLE: <code>UTF8::chr_to_hex('§'); // U+00a7</code>
806
     *
807
     * @param int|string $char   <p>The input character</p>
808
     * @param string     $prefix [optional]
809
     *
810
     * @psalm-pure
811
     *
812
     * @return string
813
     *                <p>The code point encoded as U+xxxx.</p>
814
     */
815 2
    public static function chr_to_hex($char, string $prefix = 'U+'): string
816
    {
817 2
        if ($char === '') {
818 2
            return '';
819
        }
820
821 2
        if ($char === '&#0;') {
822
            $char = '';
823
        }
824
825 2
        return self::int_to_hex(self::ord((string) $char), $prefix);
826
    }
827
828
    /**
829
     * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
830
     *
831
     * EXAMPLE: <code>UTF8::chunk_split('ABC-ÖÄÜ-中文空白-κόσμε', 3); // "ABC\r\n-ÖÄ\r\nÜ-中\r\n文空白\r\n-κό\r\nσμε"</code>
832
     *
833
     * @param string $body         <p>The original string to be split.</p>
834
     * @param int    $chunk_length [optional] <p>The maximum character length of a chunk.</p>
835
     * @param string $end          [optional] <p>The character(s) to be inserted at the end of each chunk.</p>
836
     *
837
     * @psalm-pure
838
     *
839
     * @return string
840
     *                <p>The chunked string.</p>
841
     */
842 4
    public static function chunk_split(string $body, int $chunk_length = 76, string $end = "\r\n"): string
843
    {
844 4
        return \implode($end, self::str_split($body, $chunk_length));
845
    }
846
847
    /**
848
     * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
849
     *
850
     * EXAMPLE: <code>UTF8::clean("\xEF\xBB\xBF„Abcdef\xc2\xa0\x20…” — 😃 - Düsseldorf", true, true); // '„Abcdef  …” — 😃 - Düsseldorf'</code>
851
     *
852
     * @param string $str                                     <p>The string to be sanitized.</p>
853
     * @param bool   $remove_bom                              [optional] <p>Set to true, if you need to remove
854
     *                                                        UTF-BOM.</p>
855
     * @param bool   $normalize_whitespace                    [optional] <p>Set to true, if you need to normalize the
856
     *                                                        whitespace.</p>
857
     * @param bool   $normalize_msword                        [optional] <p>Set to true, if you need to normalize MS
858
     *                                                        Word chars e.g.: "…"
859
     *                                                        => "..."</p>
860
     * @param bool   $keep_non_breaking_space                 [optional] <p>Set to true, to keep non-breaking-spaces,
861
     *                                                        in
862
     *                                                        combination with
863
     *                                                        $normalize_whitespace</p>
864
     * @param bool   $replace_diamond_question_mark           [optional] <p>Set to true, if you need to remove diamond
865
     *                                                        question mark e.g.: "�"</p>
866
     * @param bool   $remove_invisible_characters             [optional] <p>Set to false, if you not want to remove
867
     *                                                        invisible characters e.g.: "\0"</p>
868
     * @param bool   $remove_invisible_characters_url_encoded [optional] <p>Set to true, if you not want to remove
869
     *                                                        invisible url encoded characters e.g.: "%0B"<br> WARNING:
870
     *                                                        maybe contains false-positives e.g. aa%0Baa -> aaaa.
871
     *                                                        </p>
872
     *
873
     * @psalm-pure
874
     *
875
     * @return string
876
     *                <p>An clean UTF-8 encoded string.</p>
877
     */
878 94
    public static function clean(
879
        string $str,
880
        bool $remove_bom = false,
881
        bool $normalize_whitespace = false,
882
        bool $normalize_msword = false,
883
        bool $keep_non_breaking_space = false,
884
        bool $replace_diamond_question_mark = false,
885
        bool $remove_invisible_characters = true,
886
        bool $remove_invisible_characters_url_encoded = false
887
    ): string {
888
        // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
889
        // caused connection reset problem on larger strings
890
891 94
        $regex = '/
892
          (
893
            (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
894
            |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
895
            |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
896
            |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
897
            ){1,100}                      # ...one or more times
898
          )
899
        | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
900
        | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
901
        /x';
902 94
        $str = (string) \preg_replace($regex, '$1', $str);
903
904 94
        if ($replace_diamond_question_mark) {
905 33
            $str = self::replace_diamond_question_mark($str);
906
        }
907
908 94
        if ($remove_invisible_characters) {
909 94
            $str = self::remove_invisible_characters($str, $remove_invisible_characters_url_encoded);
910
        }
911
912 94
        if ($normalize_whitespace) {
913 37
            $str = self::normalize_whitespace($str, $keep_non_breaking_space);
914
        }
915
916 94
        if ($normalize_msword) {
917 4
            $str = self::normalize_msword($str);
918
        }
919
920 94
        if ($remove_bom) {
921 37
            $str = self::remove_bom($str);
922
        }
923
924 94
        return $str;
925
    }
926
927
    /**
928
     * Clean-up a string and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
929
     *
930
     * EXAMPLE: <code>UTF8::cleanup("\xEF\xBB\xBF„Abcdef\xc2\xa0\x20…” — 😃 - Düsseldorf", true, true); // '„Abcdef  …” — 😃 - Düsseldorf'</code>
931
     *
932
     * @param string $str <p>The input string.</p>
933
     *
934
     * @psalm-pure
935
     *
936
     * @return string
937
     */
938 33
    public static function cleanup($str): string
939
    {
940
        // init
941 33
        $str = (string) $str;
942
943 33
        if ($str === '') {
944 5
            return '';
945
        }
946
947
        // fixed ISO <-> UTF-8 Errors
948 33
        $str = self::fix_simple_utf8($str);
949
950
        // remove all none UTF-8 symbols
951
        // && remove diamond question mark (�)
952
        // && remove remove invisible characters (e.g. "\0")
953
        // && remove BOM
954
        // && normalize whitespace chars (but keep non-breaking-spaces)
955 33
        return self::clean(
956
            $str,
957
            true,
958
            true,
959
            false,
960
            true,
961
            true
962
        );
963
    }
964
965
    /**
966
     * Accepts a string or a array of strings and returns an array of Unicode code points.
967
     *
968
     * INFO: opposite to UTF8::string()
969
     *
970
     * EXAMPLE: <code>
971
     * UTF8::codepoints('κöñ'); // array(954, 246, 241)
972
     * // ... OR ...
973
     * UTF8::codepoints('κöñ', true); // array('U+03ba', 'U+00f6', 'U+00f1')
974
     * </code>
975
     *
976
     * @param string|string[] $arg         <p>A UTF-8 encoded string or an array of such strings.</p>
977
     * @param bool            $use_u_style <p>If True, will return code points in U+xxxx format,
978
     *                                     default, code points will be returned as integers.</p>
979
     *
980
     * @psalm-pure
981
     *
982
     * @return int[]|string[]
983
     *                        <p>
984
     *                        The array of code points:<br>
985
     *                        int[] for $u_style === false<br>
986
     *                        string[] for $u_style === true<br>
987
     *                        </p>
988
     */
989 12
    public static function codepoints($arg, bool $use_u_style = false): array
990
    {
991 12
        if (\is_string($arg)) {
992 12
            $arg = self::str_split($arg);
993
        }
994
995
        /**
996
         * @psalm-suppress DocblockTypeContradiction
997
         */
998 12
        if (!\is_array($arg)) {
0 ignored issues
show
introduced by
The condition is_array($arg) is always true.
Loading history...
999 4
            return [];
1000
        }
1001
1002 12
        if ($arg === []) {
1003 7
            return [];
1004
        }
1005
1006 11
        $arg = \array_map(
1007
            [
1008 11
                self::class,
1009
                'ord',
1010
            ],
1011
            $arg
1012
        );
1013
1014 11
        if ($use_u_style) {
1015 2
            $arg = \array_map(
1016
                [
1017 2
                    self::class,
1018
                    'int_to_hex',
1019
                ],
1020
                $arg
1021
            );
1022
        }
1023
1024 11
        return $arg;
1025
    }
1026
1027
    /**
1028
     * Trims the string and replaces consecutive whitespace characters with a
1029
     * single space. This includes tabs and newline characters, as well as
1030
     * multibyte whitespace such as the thin space and ideographic space.
1031
     *
1032
     * @param string $str <p>The input string.</p>
1033
     *
1034
     * @psalm-pure
1035
     *
1036
     * @return string
1037
     *                <p>A string with trimmed $str and condensed whitespace.</p>
1038
     */
1039 13
    public static function collapse_whitespace(string $str): string
1040
    {
1041 13
        if (self::$SUPPORT['mbstring'] === true) {
1042 13
            return \trim((string) \mb_ereg_replace('[[:space:]]+', ' ', $str));
1043
        }
1044
1045
        return \trim(self::regex_replace($str, '[[:space:]]+', ' '));
1046
    }
1047
1048
    /**
1049
     * Returns count of characters used in a string.
1050
     *
1051
     * EXAMPLE: <code>UTF8::count_chars('κaκbκc'); // array('κ' => 3, 'a' => 1, 'b' => 1, 'c' => 1)</code>
1052
     *
1053
     * @param string $str                     <p>The input string.</p>
1054
     * @param bool   $clean_utf8              [optional] <p>Remove non UTF-8 chars from the string.</p>
1055
     * @param bool   $try_to_use_mb_functions [optional] <p>Set to false, if you don't want to use
1056
     *
1057
     * @psalm-pure
1058
     *
1059
     * @return int[]
1060
     *               <p>An associative array of Character as keys and
1061
     *               their count as values.</p>
1062
     */
1063 25
    public static function count_chars(
1064
        string $str,
1065
        bool $clean_utf8 = false,
1066
        bool $try_to_use_mb_functions = true
1067
    ): array {
1068 25
        return \array_count_values(
1069 25
            self::str_split(
1070
                $str,
1071
                1,
1072
                $clean_utf8,
1073
                $try_to_use_mb_functions
1074
            )
1075
        );
1076
    }
1077
1078
    /**
1079
     * Create a valid CSS identifier for e.g. "class"- or "id"-attributes.
1080
     *
1081
     * EXAMPLE: <code>UTF8::css_identifier('123foo/bar!!!'); // _23foo-bar</code>
1082
     *
1083
     * copy&past from https://github.com/drupal/core/blob/8.8.x/lib/Drupal/Component/Utility/Html.php#L95
1084
     *
1085
     * @param string   $str        <p>INFO: if no identifier is given e.g. " " or "", we will create a unique string automatically</p>
1086
     * @param string[] $filter
1087
     * @param bool     $strip_tags
1088
     * @param bool     $strtolower
1089
     *
1090
     * @psalm-pure
1091
     *
1092
     * @return string
1093
     *
1094
     * @phpstan-param array<string,string> $filter
1095
     */
1096 1
    public static function css_identifier(
1097
        string $str = '',
1098
        array $filter = [
1099
            ' ' => '-',
1100
            '/' => '-',
1101
            '[' => '',
1102
            ']' => '',
1103
        ],
1104
        bool $strip_tags = false,
1105
        bool $strtolower = true
1106
    ): string {
1107
        // We could also use strtr() here but its much slower than str_replace(). In
1108
        // order to keep '__' to stay '__' we first replace it with a different
1109
        // placeholder after checking that it is not defined as a filter.
1110 1
        $double_underscore_replacements = 0;
1111
1112
        // Fallback ...
1113 1
        if (\trim($str) === '') {
1114 1
            $str = \uniqid('auto-generated-css-class', true);
1115
        } else {
1116 1
            $str = self::clean($str);
1117
        }
1118
1119 1
        if ($strip_tags) {
1120
            $str = \strip_tags($str);
1121
        }
1122
1123 1
        if ($strtolower) {
1124 1
            $str = \strtolower($str);
1125
        }
1126
1127 1
        if (!isset($filter['__'])) {
1128 1
            $str = \str_replace('__', '##', $str, $double_underscore_replacements);
1129
        }
1130
1131 1
        $str = \str_replace(\array_keys($filter), \array_values($filter), $str);
1132
        // Replace temporary placeholder '##' with '__' only if the original
1133
        // $identifier contained '__'.
1134 1
        if ($double_underscore_replacements > 0) {
1135
            $str = \str_replace('##', '__', $str);
1136
        }
1137
1138
        // Valid characters in a CSS identifier are:
1139
        // - the hyphen (U+002D)
1140
        // - a-z (U+0030 - U+0039)
1141
        // - A-Z (U+0041 - U+005A)
1142
        // - the underscore (U+005F)
1143
        // - 0-9 (U+0061 - U+007A)
1144
        // - ISO 10646 characters U+00A1 and higher
1145
        // We strip out any character not in the above list.
1146 1
        $str = (string) \preg_replace('/[^\x{002D}\x{0030}-\x{0039}\x{0041}-\x{005A}\x{005F}\x{0061}-\x{007A}\x{00A1}-\x{FFFF}]/u', '', $str);
1147
        // Identifiers cannot start with a digit, two hyphens, or a hyphen followed by a digit.
1148 1
        $str = (string) \preg_replace(['/^[0-9]/', '/^(-[0-9])|^(--)/'], ['_', '__'], $str);
1149
1150 1
        return \trim($str, '-');
1151
    }
1152
1153
    /**
1154
     * Remove css media-queries.
1155
     *
1156
     * @param string $str
1157
     *
1158
     * @psalm-pure
1159
     *
1160
     * @return string
1161
     */
1162 1
    public static function css_stripe_media_queries(string $str): string
1163
    {
1164 1
        return (string) \preg_replace(
1165
            '#@media\\s+(?:only\\s)?(?:[\\s{(]|screen|all)\\s?[^{]+{.*}\\s*}\\s*#isumU',
1166
            '',
1167
            $str
1168
        );
1169
    }
1170
1171
    /**
1172
     * Checks whether ctype is available on the server.
1173
     *
1174
     * @psalm-pure
1175
     *
1176
     * @return bool
1177
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
1178
     *
1179
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
1180
     */
1181
    public static function ctype_loaded(): bool
1182
    {
1183
        return \extension_loaded('ctype');
1184
    }
1185
1186
    /**
1187
     * Converts an int value into a UTF-8 character.
1188
     *
1189
     * INFO: opposite to UTF8::string()
1190
     *
1191
     * EXAMPLE: <code>UTF8::decimal_to_chr(931); // 'Σ'</code>
1192
     *
1193
     * @param int|string $int
1194
     *
1195
     * @phpstan-param int|numeric-string $int
1196
     *
1197
     * @psalm-pure
1198
     *
1199
     * @return string
1200
     */
1201 20
    public static function decimal_to_chr($int): string
1202
    {
1203
        // We cannot use html_entity_decode() here, as it will not return
1204
        // characters for many values < 160.
1205 20
        return mb_convert_encoding('&#' . $int . ';', 'UTF-8', 'HTML-ENTITIES');
0 ignored issues
show
Bug Best Practice introduced by
The expression return mb_convert_encodi...TF-8', 'HTML-ENTITIES') could return the type array which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
1206
    }
1207
1208
    /**
1209
     * Decodes a MIME header field
1210
     *
1211
     * @param string $str
1212
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
1213
     *
1214
     * @psalm-pure
1215
     *
1216
     * @return false|string
1217
     *                      <p>A decoded MIME field on success,
1218
     *                      or false if an error occurs during the decoding.</p>
1219
     */
1220 2
    public static function decode_mimeheader($str, string $encoding = 'UTF-8')
1221
    {
1222 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
1223 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
1224
        }
1225
1226
        // always fallback via symfony polyfill
1227 2
        return \iconv_mime_decode($str, \ICONV_MIME_DECODE_CONTINUE_ON_ERROR, $encoding);
1228
    }
1229
1230
    /**
1231
     * Convert any two-letter country code (ISO 3166-1) to the corresponding Emoji.
1232
     *
1233
     * @see https://en.wikipedia.org/wiki/ISO_3166-1
1234
     *
1235
     * @param string $country_code_iso_3166_1 <p>e.g. DE</p>
1236
     *
1237
     * @return string
1238
     *                <p>Emoji or empty string on error.</p>
1239
     */
1240 1
    public static function emoji_from_country_code(string $country_code_iso_3166_1): string
1241
    {
1242 1
        if ($country_code_iso_3166_1 === '') {
1243 1
            return '';
1244
        }
1245
1246 1
        if (self::strlen($country_code_iso_3166_1) !== 2) {
1247 1
            return '';
1248
        }
1249
1250 1
        $country_code_iso_3166_1 = \strtoupper($country_code_iso_3166_1);
1251
1252 1
        $flagOffset = 0x1F1E6;
1253 1
        $asciiOffset = 0x41;
1254
1255 1
        return (self::chr((self::ord($country_code_iso_3166_1[0]) - $asciiOffset + $flagOffset)) ?? '') .
1256 1
               (self::chr((self::ord($country_code_iso_3166_1[1]) - $asciiOffset + $flagOffset)) ?? '');
1257
    }
1258
1259
    /**
1260
     * Decodes a string which was encoded by "UTF8::emoji_encode()".
1261
     *
1262
     * INFO: opposite to UTF8::emoji_encode()
1263
     *
1264
     * EXAMPLE: <code>
1265
     * UTF8::emoji_decode('foo CHARACTER_OGRE', false); // 'foo 👹'
1266
     * //
1267
     * UTF8::emoji_decode('foo _-_PORTABLE_UTF8_-_308095726_-_627590803_-_8FTU_ELBATROP_-_', true); // 'foo 👹'
1268
     * </code>
1269
     *
1270
     * @param string $str                            <p>The input string.</p>
1271
     * @param bool   $use_reversible_string_mappings [optional] <p>
1272
     *                                               When <b>TRUE</b>, we se a reversible string mapping
1273
     *                                               between "emoji_encode" and "emoji_decode".</p>
1274
     *
1275
     * @psalm-pure
1276
     *
1277
     * @return string
1278
     */
1279 9
    public static function emoji_decode(
1280
        string $str,
1281
        bool $use_reversible_string_mappings = false
1282
    ): string {
1283 9
        if (self::$EMOJI_KEYS_CACHE === null) {
1284
            /** @phpstan-ignore-next-line - we need to load the data first */
1285
            self::initEmojiData();
1286
        }
1287
1288 9
        if ($use_reversible_string_mappings) {
1289 9
            return (string) \str_replace(
1290 9
                (array) self::$EMOJI_KEYS_REVERSIBLE_CACHE,
1291 9
                (array) self::$EMOJI_VALUES_CACHE,
1292
                $str
1293
            );
1294
        }
1295
1296 1
        return (string) \str_replace(
1297 1
            (array) self::$EMOJI_KEYS_CACHE,
1298 1
            (array) self::$EMOJI_VALUES_CACHE,
1299
            $str
1300
        );
1301
    }
1302
1303
    /**
1304
     * Encode a string with emoji chars into a non-emoji string.
1305
     *
1306
     * INFO: opposite to UTF8::emoji_decode()
1307
     *
1308
     * EXAMPLE: <code>
1309
     * UTF8::emoji_encode('foo 👹', false)); // 'foo CHARACTER_OGRE'
1310
     * //
1311
     * UTF8::emoji_encode('foo 👹', true)); // 'foo _-_PORTABLE_UTF8_-_308095726_-_627590803_-_8FTU_ELBATROP_-_'
1312
     * </code>
1313
     *
1314
     * @param string $str                            <p>The input string</p>
1315
     * @param bool   $use_reversible_string_mappings [optional] <p>
1316
     *                                               when <b>TRUE</b>, we use a reversible string mapping
1317
     *                                               between "emoji_encode" and "emoji_decode"</p>
1318
     *
1319
     * @psalm-pure
1320
     *
1321
     * @return string
1322
     */
1323 12
    public static function emoji_encode(
1324
        string $str,
1325
        bool $use_reversible_string_mappings = false
1326
    ): string {
1327 12
        if (self::$EMOJI_KEYS_CACHE === null) {
1328
            /** @phpstan-ignore-next-line - we need to load the data first */
1329 1
            self::initEmojiData();
1330
        }
1331
1332 12
        if ($use_reversible_string_mappings) {
1333 9
            return (string) \str_replace(
1334 9
                (array) self::$EMOJI_VALUES_CACHE,
1335 9
                (array) self::$EMOJI_KEYS_REVERSIBLE_CACHE,
1336
                $str
1337
            );
1338
        }
1339
1340 4
        return (string) \str_replace(
1341 4
            (array) self::$EMOJI_VALUES_CACHE,
1342 4
            (array) self::$EMOJI_KEYS_CACHE,
1343
            $str
1344
        );
1345
    }
1346
1347
    /**
1348
     * Encode a string with a new charset-encoding.
1349
     *
1350
     * INFO:  This function will also try to fix broken / double encoding,
1351
     *        so you can call this function also on a UTF-8 string and you don't mess up the string.
1352
     *
1353
     * EXAMPLE: <code>
1354
     * UTF8::encode('ISO-8859-1', '-ABC-中文空白-'); // '-ABC-????-'
1355
     * //
1356
     * UTF8::encode('UTF-8', '-ABC-中文空白-'); // '-ABC-中文空白-'
1357
     * //
1358
     * UTF8::encode('HTML', '-ABC-中文空白-'); // '-ABC-&#20013;&#25991;&#31354;&#30333;-'
1359
     * //
1360
     * UTF8::encode('BASE64', '-ABC-中文空白-'); // 'LUFCQy3kuK3mlofnqbrnmb0t'
1361
     * </code>
1362
     *
1363
     * @param string $to_encoding                   <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.</p>
1364
     * @param string $str                           <p>The input string</p>
1365
     * @param bool   $auto_detect_the_from_encoding [optional] <p>Force the new encoding (we try to fix broken / double
1366
     *                                              encoding for UTF-8)<br> otherwise we auto-detect the current
1367
     *                                              string-encoding</p>
1368
     * @param string $from_encoding                 [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
1369
     *                                              A empty string will trigger the autodetect anyway.</p>
1370
     *
1371
     * @psalm-pure
1372
     *
1373
     * @return string
1374
     *
1375
     * @psalm-suppress InvalidReturnStatement
1376
     */
1377 28
    public static function encode(
1378
        string $to_encoding,
1379
        string $str,
1380
        bool $auto_detect_the_from_encoding = true,
1381
        string $from_encoding = ''
1382
    ): string {
1383 28
        if ($str === '' || $to_encoding === '') {
1384 13
            return $str;
1385
        }
1386
1387 28
        if ($to_encoding !== 'UTF-8' && $to_encoding !== 'CP850') {
1388 7
            $to_encoding = self::normalize_encoding($to_encoding, 'UTF-8');
1389
        }
1390
1391 28
        if ($from_encoding && $from_encoding !== 'UTF-8' && $from_encoding !== 'CP850') {
1392 2
            $from_encoding = self::normalize_encoding($from_encoding);
1393
        }
1394
1395
        if (
1396 28
            $to_encoding
1397
            &&
1398
            $from_encoding
1399
            &&
1400 28
            $from_encoding === $to_encoding
1401
        ) {
1402
            return $str;
1403
        }
1404
1405 28
        if ($to_encoding === 'JSON') {
1406 1
            $return = self::json_encode($str);
1407 1
            if ($return === false) {
1408
                throw new \InvalidArgumentException('The input string [' . $str . '] can not be used for json_encode().');
1409
            }
1410
1411 1
            return $return;
1412
        }
1413 28
        if ($from_encoding === 'JSON') {
1414 1
            $str = self::json_decode($str);
1415 1
            $from_encoding = '';
1416
        }
1417
1418 28
        if ($to_encoding === 'BASE64') {
1419 2
            return \base64_encode($str);
1420
        }
1421 28
        if ($from_encoding === 'BASE64') {
1422 2
            $str = \base64_decode($str, true);
1423 2
            $from_encoding = '';
1424
        }
1425
1426 28
        if ($to_encoding === 'HTML-ENTITIES') {
1427 2
            return self::html_encode($str, true);
1428
        }
1429 28
        if ($from_encoding === 'HTML-ENTITIES') {
1430 2
            $str = self::html_entity_decode($str, \ENT_COMPAT);
1431 2
            $from_encoding = '';
1432
        }
1433
1434 28
        $from_encoding_auto_detected = false;
1435
        if (
1436 28
            $auto_detect_the_from_encoding
1437
            ||
1438
            !$from_encoding
1439
        ) {
1440 28
            $from_encoding_auto_detected = self::str_detect_encoding($str);
1441
        }
1442
1443
        // DEBUG
1444
        //var_dump($to_encoding, $from_encoding, $from_encoding_auto_detected, $str, "\n\n");
1445
1446 28
        if ($from_encoding_auto_detected !== false) {
1447 25
            $from_encoding = $from_encoding_auto_detected;
1448 6
        } elseif ($auto_detect_the_from_encoding) {
1449
            // fallback for the "autodetect"-mode
1450 6
            return self::to_utf8($str);
1451
        }
1452
1453
        if (
1454 25
            !$from_encoding
1455
            ||
1456 25
            $from_encoding === $to_encoding
1457
        ) {
1458 15
            return $str;
1459
        }
1460
1461
        if (
1462 20
            $to_encoding === 'UTF-8'
1463
            &&
1464
            (
1465 18
                $from_encoding === 'WINDOWS-1252'
1466
                ||
1467 20
                $from_encoding === 'ISO-8859-1'
1468
            )
1469
        ) {
1470 13
            return self::to_utf8($str);
1471
        }
1472
1473
        if (
1474 13
            $to_encoding === 'ISO-8859-1'
1475
            &&
1476
            (
1477 6
                $from_encoding === 'WINDOWS-1252'
1478
                ||
1479 13
                $from_encoding === 'UTF-8'
1480
            )
1481
        ) {
1482 6
            return self::to_iso8859($str);
1483
        }
1484
1485
        if (
1486 11
            $to_encoding !== 'UTF-8'
1487
            &&
1488 11
            $to_encoding !== 'ISO-8859-1'
1489
            &&
1490 11
            $to_encoding !== 'WINDOWS-1252'
1491
            &&
1492 11
            self::$SUPPORT['mbstring'] === false
1493
        ) {
1494
            /**
1495
             * @psalm-suppress ImpureFunctionCall - is is only a warning
1496
             */
1497
            \trigger_error('UTF8::encode() without mbstring cannot handle "' . $to_encoding . '" encoding', \E_USER_WARNING);
1498
        }
1499
1500 11
        if (self::$SUPPORT['mbstring'] === true) {
1501 11
            $str_encoded = \mb_convert_encoding(
1502
                $str,
1503
                $to_encoding,
1504
                $from_encoding
1505
            );
1506
1507 11
            if ($str_encoded) {
1508
                \assert(\is_string($str_encoded));
1509
1510 11
                return $str_encoded;
1511
            }
1512
        }
1513
1514
        /** @noinspection PhpUsageOfSilenceOperatorInspection - Detected an incomplete multibyte character in input string */
1515
        $return = @\iconv($from_encoding, $to_encoding, $str);
1516
        if ($return !== false) {
1517
            return $return;
1518
        }
1519
1520
        return $str;
1521
    }
1522
1523
    /**
1524
     * @param string $str
1525
     * @param string $from_charset      [optional] <p>Set the input charset.</p>
1526
     * @param string $to_charset        [optional] <p>Set the output charset.</p>
1527
     * @param string $transfer_encoding [optional] <p>Set the transfer encoding.</p>
1528
     * @param string $linefeed          [optional] <p>Set the used linefeed.</p>
1529
     * @param int    $indent            [optional] <p>Set the max length indent.</p>
1530
     *
1531
     * @psalm-pure
1532
     *
1533
     * @return false|string
1534
     *                      <p>An encoded MIME field on success,
1535
     *                      or false if an error occurs during the encoding.</p>
1536
     */
1537 1
    public static function encode_mimeheader(
1538
        string $str,
1539
        string $from_charset = 'UTF-8',
1540
        string $to_charset = 'UTF-8',
1541
        string $transfer_encoding = 'Q',
1542
        string $linefeed = "\r\n",
1543
        int $indent = 76
1544
    ) {
1545 1
        if ($from_charset !== 'UTF-8' && $from_charset !== 'CP850') {
1546
            $from_charset = self::normalize_encoding($from_charset, 'UTF-8');
1547
        }
1548
1549 1
        if ($to_charset !== 'UTF-8' && $to_charset !== 'CP850') {
1550 1
            $to_charset = self::normalize_encoding($to_charset, 'UTF-8');
1551
        }
1552
1553
        // always fallback via symfony polyfill
1554 1
        return \iconv_mime_encode(
1555
            '',
1556
            $str,
1557
            [
1558 1
                'scheme'           => $transfer_encoding,
1559
                'line-length'      => $indent,
1560
                'input-charset'    => $from_charset,
1561
                'output-charset'   => $to_charset,
1562
                'line-break-chars' => $linefeed,
1563
            ]
1564
        );
1565
    }
1566
1567
    /**
1568
     * Create an extract from a sentence, so if the search-string was found, it try to centered in the output.
1569
     *
1570
     * @param string   $str                       <p>The input string.</p>
1571
     * @param string   $search                    <p>The searched string.</p>
1572
     * @param int|null $length                    [optional] <p>Default: null === text->length / 2</p>
1573
     * @param string   $replacer_for_skipped_text [optional] <p>Default: …</p>
1574
     * @param string   $encoding                  [optional] <p>Set the charset for e.g. "mb_" function</p>
1575
     *
1576
     * @psalm-pure
1577
     *
1578
     * @return string
1579
     */
1580 1
    public static function extract_text(
1581
        string $str,
1582
        string $search = '',
1583
        int $length = null,
1584
        string $replacer_for_skipped_text = '…',
1585
        string $encoding = 'UTF-8'
1586
    ): string {
1587 1
        if ($str === '') {
1588 1
            return '';
1589
        }
1590
1591 1
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
1592
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
1593
        }
1594
1595 1
        $trim_chars = "\t\r\n -_()!~?=+/*\\,.:;\"'[]{}`&";
1596
1597 1
        if ($length === null) {
1598 1
            $length = (int) \round((int) self::strlen($str, $encoding) / 2);
1599
        }
1600
1601 1
        if ($search === '') {
1602 1
            if ($encoding === 'UTF-8') {
1603 1
                if ($length > 0) {
1604 1
                    $string_length = (int) \mb_strlen($str);
1605 1
                    $end = ($length - 1) > $string_length ? $string_length : ($length - 1);
1606
                } else {
1607 1
                    $end = 0;
1608
                }
1609
1610 1
                $pos = (int) \min(
1611 1
                    \mb_strpos($str, ' ', $end),
1612 1
                    \mb_strpos($str, '.', $end)
1613
                );
1614
            } else {
1615
                if ($length > 0) {
1616
                    $string_length = (int) self::strlen($str, $encoding);
1617
                    $end = ($length - 1) > $string_length ? $string_length : ($length - 1);
1618
                } else {
1619
                    $end = 0;
1620
                }
1621
1622
                $pos = (int) \min(
1623
                    self::strpos($str, ' ', $end, $encoding),
1624
                    self::strpos($str, '.', $end, $encoding)
1625
                );
1626
            }
1627
1628 1
            if ($pos) {
1629 1
                if ($encoding === 'UTF-8') {
1630 1
                    $str_sub = \mb_substr($str, 0, $pos);
1631
                } else {
1632
                    $str_sub = self::substr($str, 0, $pos, $encoding);
1633
                }
1634
1635 1
                if ($str_sub === false) {
1636
                    return '';
1637
                }
1638
1639 1
                return \rtrim($str_sub, $trim_chars) . $replacer_for_skipped_text;
1640
            }
1641
1642
            return $str;
1643
        }
1644
1645 1
        if ($encoding === 'UTF-8') {
1646 1
            $word_position = (int) \mb_stripos($str, $search);
1647 1
            $half_side = (int) ($word_position - $length / 2 + (int) \mb_strlen($search) / 2);
1648
        } else {
1649
            $word_position = (int) self::stripos($str, $search, 0, $encoding);
1650
            $half_side = (int) ($word_position - $length / 2 + (int) self::strlen($search, $encoding) / 2);
1651
        }
1652
1653 1
        $pos_start = 0;
1654 1
        if ($half_side > 0) {
1655 1
            if ($encoding === 'UTF-8') {
1656 1
                $half_text = \mb_substr($str, 0, $half_side);
1657
            } else {
1658
                $half_text = self::substr($str, 0, $half_side, $encoding);
1659
            }
1660 1
            if ($half_text !== false) {
1661 1
                if ($encoding === 'UTF-8') {
1662 1
                    $pos_start = (int) \max(
1663 1
                        \mb_strrpos($half_text, ' '),
1664 1
                        \mb_strrpos($half_text, '.')
1665
                    );
1666
                } else {
1667
                    $pos_start = (int) \max(
1668
                        self::strrpos($half_text, ' ', 0, $encoding),
1669
                        self::strrpos($half_text, '.', 0, $encoding)
1670
                    );
1671
                }
1672
            }
1673
        }
1674
1675 1
        if ($word_position && $half_side > 0) {
1676 1
            $offset = $pos_start + $length - 1;
1677 1
            $real_length = (int) self::strlen($str, $encoding);
1678
1679 1
            if ($offset > $real_length) {
1680
                $offset = $real_length;
1681
            }
1682
1683 1
            if ($encoding === 'UTF-8') {
1684 1
                $pos_end = (int) \min(
1685 1
                    \mb_strpos($str, ' ', $offset),
1686 1
                    \mb_strpos($str, '.', $offset)
1687 1
                ) - $pos_start;
1688
            } else {
1689
                $pos_end = (int) \min(
1690
                    self::strpos($str, ' ', $offset, $encoding),
1691
                    self::strpos($str, '.', $offset, $encoding)
1692
                ) - $pos_start;
1693
            }
1694
1695 1
            if (!$pos_end || $pos_end <= 0) {
1696 1
                if ($encoding === 'UTF-8') {
1697 1
                    $str_sub = \mb_substr($str, $pos_start, (int) \mb_strlen($str));
1698
                } else {
1699
                    $str_sub = self::substr($str, $pos_start, (int) self::strlen($str, $encoding), $encoding);
1700
                }
1701 1
                if ($str_sub !== false) {
1702 1
                    $extract = $replacer_for_skipped_text . \ltrim($str_sub, $trim_chars);
1703
                } else {
1704 1
                    $extract = '';
1705
                }
1706
            } else {
1707 1
                if ($encoding === 'UTF-8') {
1708 1
                    $str_sub = \mb_substr($str, $pos_start, $pos_end);
1709
                } else {
1710
                    $str_sub = self::substr($str, $pos_start, $pos_end, $encoding);
1711
                }
1712 1
                if ($str_sub !== false) {
1713 1
                    $extract = $replacer_for_skipped_text . \trim($str_sub, $trim_chars) . $replacer_for_skipped_text;
1714
                } else {
1715 1
                    $extract = '';
1716
                }
1717
            }
1718
        } else {
1719 1
            $offset = $length - 1;
1720 1
            $true_length = (int) self::strlen($str, $encoding);
1721
1722 1
            if ($offset > $true_length) {
1723
                $offset = $true_length;
1724
            }
1725
1726 1
            if ($encoding === 'UTF-8') {
1727 1
                $pos_end = (int) \min(
1728 1
                    \mb_strpos($str, ' ', $offset),
1729 1
                    \mb_strpos($str, '.', $offset)
1730
                );
1731
            } else {
1732
                $pos_end = (int) \min(
1733
                    self::strpos($str, ' ', $offset, $encoding),
1734
                    self::strpos($str, '.', $offset, $encoding)
1735
                );
1736
            }
1737
1738 1
            if ($pos_end) {
1739 1
                if ($encoding === 'UTF-8') {
1740 1
                    $str_sub = \mb_substr($str, 0, $pos_end);
1741
                } else {
1742
                    $str_sub = self::substr($str, 0, $pos_end, $encoding);
1743
                }
1744 1
                if ($str_sub !== false) {
1745 1
                    $extract = \rtrim($str_sub, $trim_chars) . $replacer_for_skipped_text;
1746
                } else {
1747 1
                    $extract = '';
1748
                }
1749
            } else {
1750 1
                $extract = $str;
1751
            }
1752
        }
1753
1754 1
        return $extract;
1755
    }
1756
1757
    /**
1758
     * Reads entire file into a string.
1759
     *
1760
     * EXAMPLE: <code>UTF8::file_get_contents('utf16le.txt'); // ...</code>
1761
     *
1762
     * WARNING: Do not use UTF-8 Option ($convert_to_utf8) for binary files (e.g.: images) !!!
1763
     *
1764
     * @see http://php.net/manual/en/function.file-get-contents.php
1765
     *
1766
     * @param string        $filename         <p>
1767
     *                                        Name of the file to read.
1768
     *                                        </p>
1769
     * @param bool          $use_include_path [optional] <p>
1770
     *                                        Prior to PHP 5, this parameter is called
1771
     *                                        use_include_path and is a bool.
1772
     *                                        As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
1773
     *                                        to trigger include path
1774
     *                                        search.
1775
     *                                        </p>
1776
     * @param resource|null $context          [optional] <p>
1777
     *                                        A valid context resource created with
1778
     *                                        stream_context_create. If you don't need to use a
1779
     *                                        custom context, you can skip this parameter by &null;.
1780
     *                                        </p>
1781
     * @param int|null      $offset           [optional] <p>
1782
     *                                        The offset where the reading starts.
1783
     *                                        </p>
1784
     * @param int|null      $max_length       [optional] <p>
1785
     *                                        Maximum length of data read. The default is to read until end
1786
     *                                        of file is reached.
1787
     *                                        </p>
1788
     * @param int           $timeout          <p>The time in seconds for the timeout.</p>
1789
     * @param bool          $convert_to_utf8  <strong>WARNING!!!</strong> <p>Maybe you can't use this option for
1790
     *                                        some files, because they used non default utf-8 chars. Binary files
1791
     *                                        like images or pdf will not be converted.</p>
1792
     * @param string        $from_encoding    [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
1793
     *                                        A empty string will trigger the autodetect anyway.</p>
1794
     *
1795
     * @psalm-pure
1796
     *
1797
     * @return false|string
1798
     *                      <p>The function returns the read data as string or <b>false</b> on failure.</p>
1799
     */
1800 12
    public static function file_get_contents(
1801
        string $filename,
1802
        bool $use_include_path = false,
1803
        $context = null,
1804
        int $offset = null,
1805
        int $max_length = null,
1806
        int $timeout = 10,
1807
        bool $convert_to_utf8 = true,
1808
        string $from_encoding = ''
1809
    ) {
1810
        // init
1811
        /** @noinspection CallableParameterUseCaseInTypeContextInspection - is ok here */
1812 12
        $filename = Bootup::filter_sanitize_string_polyfill($filename);
1813 12
        if ($filename === false) {
1814
            return false;
1815
        }
1816
1817 12
        if ($timeout && $context === null) {
1818 9
            $context = \stream_context_create(
1819
                [
1820
                    'http' => [
1821
                        'timeout' => $timeout,
1822
                    ],
1823
                ]
1824
            );
1825
        }
1826
1827 12
        if ($offset === null) {
1828 12
            $offset = 0;
1829
        }
1830
1831 12
        if (\is_int($max_length)) {
1832 2
            if ($max_length < 0) {
1833
                $max_length = 0;
1834
            }
1835
1836 2
            $data = \file_get_contents($filename, $use_include_path, $context, $offset, $max_length);
1837
        } else {
1838 12
            $data = \file_get_contents($filename, $use_include_path, $context, $offset);
1839
        }
1840
1841
        // return false on error
1842 12
        if ($data === false) {
1843
            return false;
1844
        }
1845
1846 12
        if ($convert_to_utf8) {
1847
            if (
1848 12
                !self::is_binary($data, true)
1849
                ||
1850 9
                self::is_utf16($data, false) !== false
1851
                ||
1852 12
                self::is_utf32($data, false) !== false
1853
            ) {
1854 9
                $data = self::encode('UTF-8', $data, false, $from_encoding);
1855 9
                $data = self::cleanup($data);
1856
            }
1857
        }
1858
1859 12
        return $data;
1860
    }
1861
1862
    /**
1863
     * Checks if a file starts with BOM (Byte Order Mark) character.
1864
     *
1865
     * EXAMPLE: <code>UTF8::file_has_bom('utf8_with_bom.txt'); // true</code>
1866
     *
1867
     * @param string $file_path <p>Path to a valid file.</p>
1868
     *
1869
     * @throws \RuntimeException if file_get_contents() returned false
1870
     *
1871
     * @return bool
1872
     *              <p><strong>true</strong> if the file has BOM at the start, <strong>false</strong> otherwise</p>
1873
     *
1874
     * @psalm-pure
1875
     */
1876 2
    public static function file_has_bom(string $file_path): bool
1877
    {
1878 2
        $file_content = \file_get_contents($file_path);
1879 2
        if ($file_content === false) {
1880
            throw new \RuntimeException('file_get_contents() returned false for:' . $file_path);
1881
        }
1882
1883 2
        return self::string_has_bom($file_content);
1884
    }
1885
1886
    /**
1887
     * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1888
     *
1889
     * EXAMPLE: <code>UTF8::filter(array("\xE9", 'à', 'a')); // array('é', 'à', 'a')</code>
1890
     *
1891
     * @param array|object|string $var
1892
     * @param int                 $normalization_form
1893
     * @param string              $leading_combining
1894
     *
1895
     * @psalm-pure
1896
     *
1897
     * @return mixed
1898
     *
1899
     * @template TFilter
1900
     * @phpstan-param TFilter $var
1901
     * @phpstan-return TFilter
1902
     */
1903 64
    public static function filter(
1904
        $var,
1905
        int $normalization_form = \Normalizer::NFC,
1906
        string $leading_combining = '◌'
1907
    ) {
1908 64
        switch (\gettype($var)) {
1909 64
            case 'object':
1910 64
            case 'array':
1911 6
                foreach ($var as &$v) {
1912 6
                    $v = self::filter($v, $normalization_form, $leading_combining);
1913
                }
1914 6
                unset($v);
1915
1916 6
                break;
1917 64
            case 'string':
1918
1919 62
                if (\strpos($var, "\r") !== false) {
1920 2
                    $var = self::normalize_line_ending($var);
1921
                }
1922
1923 62
                if (!ASCII::is_ascii($var)) {
1924 32
                    if (\Normalizer::isNormalized($var, $normalization_form)) {
1925 26
                        $n = '-';
1926
                    } else {
1927 12
                        $n = \Normalizer::normalize($var, $normalization_form);
1928
1929 12
                        if ($n && isset($n[0])) {
1930 6
                            $var = $n;
1931
                        } else {
1932 8
                            $var = self::encode('UTF-8', $var);
1933
                        }
1934
                    }
1935
1936
                    \assert(\is_string($var));
1937
                    if (
1938 32
                        $n
1939
                        &&
1940 32
                        $var[0] >= "\x80"
1941
                        &&
1942 32
                        isset($n[0], $leading_combining[0])
1943
                        &&
1944 32
                        \preg_match('/^\\p{Mn}/u', $var)
1945
                    ) {
1946
                        // Prevent leading combining chars
1947
                        // for NFC-safe concatenations.
1948 2
                        $var = $leading_combining . $var;
1949
                    }
1950
                }
1951
1952 62
                break;
1953
            default:
1954
                // nothing
1955
        }
1956
1957
        /** @noinspection PhpSillyAssignmentInspection */
1958
        /** @phpstan-var TFilter $var */
1959 64
        $var = $var;
1960
1961 64
        return $var;
1962
    }
1963
1964
    /**
1965
     * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1966
     *
1967
     * Gets a specific external variable by name and optionally filters it.
1968
     *
1969
     * EXAMPLE: <code>
1970
     * // _GET['foo'] = 'bar';
1971
     * UTF8::filter_input(INPUT_GET, 'foo', FILTER_UNSAFE_RAW)); // 'bar'
1972
     * </code>
1973
     *
1974
     * @see http://php.net/manual/en/function.filter-input.php
1975
     *
1976
     * @param int            $type          <p>
1977
     *                                      One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1978
     *                                      <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1979
     *                                      <b>INPUT_ENV</b>.
1980
     *                                      </p>
1981
     * @param string         $variable_name <p>
1982
     *                                      Name of a variable to get.
1983
     *                                      </p>
1984
     * @param int            $filter        [optional] <p>
1985
     *                                      The ID of the filter to apply. The
1986
     *                                      manual page lists the available filters.
1987
     *                                      </p>
1988
     * @param int|int[]|null $options       [optional] <p>
1989
     *                                      Associative array of options or bitwise disjunction of flags. If filter
1990
     *                                      accepts options, flags can be provided in "flags" field of array.
1991
     *                                      </p>
1992
     *
1993
     * @psalm-pure
1994
     *
1995
     * @return mixed
1996
     *               <p>
1997
     *               Value of the requested variable on success, <b>FALSE</b> if the filter fails, or <b>NULL</b> if the
1998
     *               <i>variable_name</i> variable is not set. If the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it
1999
     *               returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter fails.
2000
     *               </p>
2001
     */
2002 1
    public static function filter_input(
2003
        int $type,
2004
        string $variable_name,
2005
        int $filter = \FILTER_DEFAULT,
2006
        $options = null
2007
    ) {
2008
        /**
2009
         * @psalm-suppress ImpureFunctionCall - we use func_num_args only for args count matching here
2010
         */
2011 1
        if ($options === null || \func_num_args() < 4) {
2012 1
            $var = \filter_input($type, $variable_name, $filter);
2013
        } else {
2014
            $var = \filter_input($type, $variable_name, $filter, $options);
2015
        }
2016
2017 1
        return self::filter($var);
2018
    }
2019
2020
    /**
2021
     * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2022
     *
2023
     * Gets external variables and optionally filters them.
2024
     *
2025
     * EXAMPLE: <code>
2026
     * // _GET['foo'] = 'bar';
2027
     * UTF8::filter_input_array(INPUT_GET, array('foo' => 'FILTER_UNSAFE_RAW')); // array('bar')
2028
     * </code>
2029
     *
2030
     * @see http://php.net/manual/en/function.filter-input-array.php
2031
     *
2032
     * @param int                       $type       <p>
2033
     *                                              One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
2034
     *                                              <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
2035
     *                                              <b>INPUT_ENV</b>.
2036
     *                                              </p>
2037
     * @param array<string, mixed>|null $definition [optional] <p>
2038
     *                                              An array defining the arguments. A valid key is a string
2039
     *                                              containing a variable name and a valid value is either a filter type, or an array
2040
     *                                              optionally specifying the filter, flags and options. If the value is an
2041
     *                                              array, valid keys are filter which specifies the
2042
     *                                              filter type,
2043
     *                                              flags which specifies any flags that apply to the
2044
     *                                              filter, and options which specifies any options that
2045
     *                                              apply to the filter. See the example below for a better understanding.
2046
     *                                              </p>
2047
     *                                              <p>
2048
     *                                              This parameter can be also an integer holding a filter constant. Then all values in the
2049
     *                                              input array are filtered by this filter.
2050
     *                                              </p>
2051
     * @param bool                      $add_empty  [optional] <p>
2052
     *                                              Add missing keys as <b>NULL</b> to the return value.
2053
     *                                              </p>
2054
     *
2055
     * @psalm-pure
2056
     *
2057
     * @return array<string, mixed>|false|null
2058
     *                                         <p>
2059
     *                                         An array containing the values of the requested variables on success, or <b>FALSE</b> on failure.
2060
     *                                         An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not
2061
     *                                         set. Or if the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it returns <b>FALSE</b> if the variable
2062
     *                                         is not set and <b>NULL</b> if the filter fails.
2063
     *                                         </p>
2064
     */
2065 1
    public static function filter_input_array(
2066
        int $type,
2067
        $definition = null,
2068
        bool $add_empty = true
2069
    ) {
2070
        /**
2071
         * @psalm-suppress ImpureFunctionCall - we use func_num_args only for args count matching here
2072
         */
2073 1
        if ($definition === null || \func_num_args() < 2) {
2074
            $a = \filter_input_array($type);
2075
        } else {
2076 1
            $a = \filter_input_array($type, $definition, $add_empty);
2077
        }
2078
2079
        /* @phpstan-ignore-next-line | magic frm self::filter :/ */
2080 1
        return self::filter($a);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::filter($a) also could return the type object|string which is incompatible with the documented return type array<string,mixed>|false|null.
Loading history...
2081
    }
2082
2083
    /**
2084
     * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2085
     *
2086
     * Filters a variable with a specified filter.
2087
     *
2088
     * EXAMPLE: <code>UTF8::filter_var('-ABC-中文空白-', FILTER_VALIDATE_URL); // false</code>
2089
     *
2090
     * @see http://php.net/manual/en/function.filter-var.php
2091
     *
2092
     * @param float|int|string|null $variable <p>
2093
     *                                        Value to filter.
2094
     *                                        </p>
2095
     * @param int                   $filter   [optional] <p>
2096
     *                                        The ID of the filter to apply. The
2097
     *                                        manual page lists the available filters.
2098
     *                                        </p>
2099
     * @param int|int[]|null        $options  [optional] <p>
2100
     *                                        Associative array of options or bitwise disjunction of flags. If filter
2101
     *                                        accepts options, flags can be provided in "flags" field of array. For
2102
     *                                        the "callback" filter, callable type should be passed. The
2103
     *                                        callback must accept one argument, the value to be filtered, and return
2104
     *                                        the value after filtering/sanitizing it.
2105
     *                                        </p>
2106
     *                                        <p>
2107
     *                                        <code>
2108
     *                                        // for filters that accept options, use this format
2109
     *                                        $options = array(
2110
     *                                        'options' => array(
2111
     *                                        'default' => 3, // value to return if the filter fails
2112
     *                                        // other options here
2113
     *                                        'min_range' => 0
2114
     *                                        ),
2115
     *                                        'flags' => FILTER_FLAG_ALLOW_OCTAL,
2116
     *                                        );
2117
     *                                        $var = filter_var('0755', FILTER_VALIDATE_INT, $options);
2118
     *                                        // for filter that only accept flags, you can pass them directly
2119
     *                                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);
2120
     *                                        // for filter that only accept flags, you can also pass as an array
2121
     *                                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN,
2122
     *                                        array('flags' => FILTER_NULL_ON_FAILURE));
2123
     *                                        // callback validate filter
2124
     *                                        function foo($value)
2125
     *                                        {
2126
     *                                        // Expected format: Surname, GivenNames
2127
     *                                        if (strpos($value, ", ") === false) return false;
2128
     *                                        list($surname, $givennames) = explode(", ", $value, 2);
2129
     *                                        $empty = (empty($surname) || empty($givennames));
2130
     *                                        $notstrings = (!is_string($surname) || !is_string($givennames));
2131
     *                                        if ($empty || $notstrings) {
2132
     *                                        return false;
2133
     *                                        } else {
2134
     *                                        return $value;
2135
     *                                        }
2136
     *                                        }
2137
     *                                        $var = filter_var('Doe, Jane Sue', FILTER_CALLBACK, array('options' => 'foo'));
2138
     *                                        </code>
2139
     *                                        </p>
2140
     *
2141
     * @psalm-pure
2142
     *
2143
     * @return mixed
2144
     *               <p>The filtered data, or <b>FALSE</b> if the filter fails.</p>
2145
     */
2146 2
    public static function filter_var(
2147
        $variable,
2148
        int $filter = \FILTER_DEFAULT,
2149
        $options = null
2150
    ) {
2151
        /**
2152
         * @psalm-suppress ImpureFunctionCall - we use func_num_args only for args count matching here
2153
         */
2154 2
        if (\func_num_args() < 3) {
2155 2
            $variable = \filter_var($variable, $filter);
2156
        } else {
2157 2
            $variable = \filter_var($variable, $filter, $options);
0 ignored issues
show
Bug introduced by
It seems like $options can also be of type null; however, parameter $options of filter_var() does only seem to accept array|integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

2157
            $variable = \filter_var($variable, $filter, /** @scrutinizer ignore-type */ $options);
Loading history...
2158
        }
2159
2160
        /* @phpstan-ignore-next-line | magic frm self::filter :/ */
2161 2
        return self::filter($variable);
2162
    }
2163
2164
    /**
2165
     * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2166
     *
2167
     * Gets multiple variables and optionally filters them.
2168
     *
2169
     * EXAMPLE: <code>
2170
     * $filters = [
2171
     *     'name'  => ['filter'  => FILTER_CALLBACK, 'options' => [UTF8::class, 'ucwords']],
2172
     *     'age'   => ['filter'  => FILTER_VALIDATE_INT, 'options' => ['min_range' => 1, 'max_range' => 120]],
2173
     *     'email' => FILTER_VALIDATE_EMAIL,
2174
     * ];
2175
     *
2176
     * $data = [
2177
     *     'name' => 'κόσμε',
2178
     *     'age' => '18',
2179
     *     'email' => '[email protected]'
2180
     * ];
2181
     *
2182
     * UTF8::filter_var_array($data, $filters, true); // ['name' => 'Κόσμε', 'age' => 18, 'email' => '[email protected]']
2183
     * </code>
2184
     *
2185
     * @see http://php.net/manual/en/function.filter-var-array.php
2186
     *
2187
     * @param array<string, mixed>          $data       <p>
2188
     *                                                  An array with string keys containing the data to filter.
2189
     *                                                  </p>
2190
     * @param array<string, mixed>|int|null $definition [optional] <p>
2191
     *                                                  An array defining the arguments. A valid key is a string
2192
     *                                                  containing a variable name and a valid value is either a
2193
     *                                                  filter type, or an
2194
     *                                                  array optionally specifying the filter, flags and options.
2195
     *                                                  If the value is an array, valid keys are filter
2196
     *                                                  which specifies the filter type,
2197
     *                                                  flags which specifies any flags that apply to the
2198
     *                                                  filter, and options which specifies any options that
2199
     *                                                  apply to the filter. See the example below for a better understanding.
2200
     *                                                  </p>
2201
     *                                                  <p>
2202
     *                                                  This parameter can be also an integer holding a filter constant. Then all values
2203
     *                                                  in the input array are filtered by this filter.
2204
     *                                                  </p>
2205
     * @param bool                          $add_empty  [optional] <p>
2206
     *                                                  Add missing keys as <b>NULL</b> to the return value.
2207
     *                                                  </p>
2208
     *
2209
     * @psalm-pure
2210
     *
2211
     * @return array<string, mixed>|false|null
2212
     *                                         <p>
2213
     *                                         An array containing the values of the requested variables on success, or <b>FALSE</b> on failure.
2214
     *                                         An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not
2215
     *                                         set.
2216
     *                                         </p>
2217
     */
2218 2
    public static function filter_var_array(
2219
        array $data,
2220
        $definition = null,
2221
        bool $add_empty = true
2222
    ) {
2223
        /**
2224
         * @psalm-suppress ImpureFunctionCall - we use func_num_args only for args count matching here
2225
         */
2226 2
        if (\func_num_args() < 2) {
2227 2
            $a = \filter_var_array($data);
2228
        } else {
2229 2
            $a = \filter_var_array($data, $definition, $add_empty);
0 ignored issues
show
Bug introduced by
It seems like $definition can also be of type null; however, parameter $options of filter_var_array() does only seem to accept array|integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

2229
            $a = \filter_var_array($data, /** @scrutinizer ignore-type */ $definition, $add_empty);
Loading history...
2230
        }
2231
2232
        /* @phpstan-ignore-next-line | magic frm self::filter :/ */
2233 2
        return self::filter($a);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::filter($a) also could return the type object|string which is incompatible with the documented return type array<string,mixed>|false|null.
Loading history...
2234
    }
2235
2236
    /**
2237
     * Checks whether finfo is available on the server.
2238
     *
2239
     * @psalm-pure
2240
     *
2241
     * @return bool
2242
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
2243
     *
2244
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
2245
     */
2246
    public static function finfo_loaded(): bool
2247
    {
2248
        return \class_exists('finfo');
2249
    }
2250
2251
    /**
2252
     * Returns the first $n characters of the string.
2253
     *
2254
     * @param string $str      <p>The input string.</p>
2255
     * @param int    $n        <p>Number of characters to retrieve from the start.</p>
2256
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
2257
     *
2258
     * @psalm-pure
2259
     *
2260
     * @return string
2261
     */
2262 13
    public static function first_char(
2263
        string $str,
2264
        int $n = 1,
2265
        string $encoding = 'UTF-8'
2266
    ): string {
2267 13
        if ($str === '' || $n <= 0) {
2268 5
            return '';
2269
        }
2270
2271 8
        if ($encoding === 'UTF-8') {
2272 4
            return (string) \mb_substr($str, 0, $n);
2273
        }
2274
2275 4
        return (string) self::substr($str, 0, $n, $encoding);
2276
    }
2277
2278
    /**
2279
     * Check if the number of Unicode characters isn't greater than the specified integer.
2280
     *
2281
     * EXAMPLE: <code>UTF8::fits_inside('κόσμε', 6); // false</code>
2282
     *
2283
     * @param string $str      the original string to be checked
2284
     * @param int    $box_size the size in number of chars to be checked against string
2285
     *
2286
     * @psalm-pure
2287
     *
2288
     * @return bool
2289
     *              <p><strong>TRUE</strong> if string is less than or equal to $box_size, <strong>FALSE</strong> otherwise.</p>
2290
     */
2291 2
    public static function fits_inside(string $str, int $box_size): bool
2292
    {
2293 2
        return (int) self::strlen($str) <= $box_size;
2294
    }
2295
2296
    /**
2297
     * Try to fix simple broken UTF-8 strings.
2298
     *
2299
     * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
2300
     *
2301
     * EXAMPLE: <code>UTF8::fix_simple_utf8('Düsseldorf'); // 'Düsseldorf'</code>
2302
     *
2303
     * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
2304
     * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
2305
     * See: http://en.wikipedia.org/wiki/Windows-1252
2306
     *
2307
     * @param string $str <p>The input string</p>
2308
     *
2309
     * @psalm-pure
2310
     *
2311
     * @return string
2312
     */
2313 46
    public static function fix_simple_utf8(string $str): string
2314
    {
2315 46
        if ($str === '') {
2316 4
            return '';
2317
        }
2318
2319
        /**
2320
         * @psalm-suppress ImpureStaticVariable
2321
         *
2322
         * @var array<mixed>|null
2323
         */
2324
        static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null;
2325
2326
        /**
2327
         * @psalm-suppress ImpureStaticVariable
2328
         *
2329
         * @var array<mixed>|null
2330
         */
2331
        static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null;
2332
2333 46
        if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) {
2334 1
            if (self::$BROKEN_UTF8_FIX === null) {
2335 1
                self::$BROKEN_UTF8_FIX = self::getData('utf8_fix');
2336
            }
2337
2338 1
            $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = \array_keys(self::$BROKEN_UTF8_FIX ?: []);
2339 1
            $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = self::$BROKEN_UTF8_FIX;
2340
        }
2341
2342
        \assert(\is_array($BROKEN_UTF8_TO_UTF8_VALUES_CACHE));
2343
2344 46
        return \str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str);
2345
    }
2346
2347
    /**
2348
     * Fix a double (or multiple) encoded UTF8 string.
2349
     *
2350
     * EXAMPLE: <code>UTF8::fix_utf8('Fédération'); // 'Fédération'</code>
2351
     *
2352
     * @param string|string[] $str you can use a string or an array of strings
2353
     *
2354
     * @psalm-pure
2355
     *
2356
     * @return string|string[]
2357
     *                         <p>Will return the fixed input-"array" or
2358
     *                         the fixed input-"string".</p>
2359
     *
2360
     * @template TFixUtf8
2361
     * @phpstan-param TFixUtf8 $str
2362
     * @phpstan-return TFixUtf8
2363
     */
2364 2
    public static function fix_utf8($str)
2365
    {
2366 2
        if (\is_array($str)) {
2367 2
            foreach ($str as &$v) {
2368 2
                $v = self::fix_utf8($v);
2369
            }
2370 2
            unset($v);
2371
2372
            /**
2373
             * @psalm-suppress InvalidReturnStatement
2374
             */
2375 2
            return $str;
2376
        }
2377
2378 2
        $str = (string) $str;
2379 2
        $last = '';
2380 2
        while ($last !== $str) {
2381 2
            $last = $str;
2382
            /**
2383
             * @psalm-suppress PossiblyInvalidArgument
2384
             */
2385 2
            $str = self::to_utf8(
2386 2
                self::utf8_decode($str, true)
2387
            );
2388
        }
2389
2390
        /**
2391
         * @psalm-suppress InvalidReturnStatement
2392
         */
2393 2
        return $str;
2394
    }
2395
2396
    /**
2397
     * Get character of a specific character.
2398
     *
2399
     * EXAMPLE: <code>UTF8::getCharDirection('ا'); // 'RTL'</code>
2400
     *
2401
     * @param string $char
2402
     *
2403
     * @psalm-pure
2404
     *
2405
     * @return string
2406
     *                <p>'RTL' or 'LTR'.</p>
2407
     */
2408 2
    public static function getCharDirection(string $char): string
2409
    {
2410 2
        if (self::$SUPPORT['intlChar'] === true) {
2411 2
            $tmp_return = \IntlChar::charDirection($char);
2412
2413
            // from "IntlChar"-Class
2414 2
            $char_direction = [
2415
                'RTL' => [1, 13, 14, 15, 21],
2416
                'LTR' => [0, 11, 12, 20],
2417
            ];
2418
2419 2
            if (\in_array($tmp_return, $char_direction['LTR'], true)) {
2420
                return 'LTR';
2421
            }
2422
2423 2
            if (\in_array($tmp_return, $char_direction['RTL'], true)) {
2424 2
                return 'RTL';
2425
            }
2426
        }
2427
2428 2
        $c = static::chr_to_decimal($char);
2429
2430 2
        if (!($c >= 0x5be && $c <= 0x10b7f)) {
2431 2
            return 'LTR';
2432
        }
2433
2434 2
        if ($c <= 0x85e) {
2435 2
            if ($c === 0x5be ||
2436 2
                $c === 0x5c0 ||
2437 2
                $c === 0x5c3 ||
2438 2
                $c === 0x5c6 ||
2439 2
                ($c >= 0x5d0 && $c <= 0x5ea) ||
2440 2
                ($c >= 0x5f0 && $c <= 0x5f4) ||
2441 2
                $c === 0x608 ||
2442 2
                $c === 0x60b ||
2443 2
                $c === 0x60d ||
2444 2
                $c === 0x61b ||
2445 2
                ($c >= 0x61e && $c <= 0x64a) ||
2446
                ($c >= 0x66d && $c <= 0x66f) ||
2447
                ($c >= 0x671 && $c <= 0x6d5) ||
2448
                ($c >= 0x6e5 && $c <= 0x6e6) ||
2449
                ($c >= 0x6ee && $c <= 0x6ef) ||
2450
                ($c >= 0x6fa && $c <= 0x70d) ||
2451
                $c === 0x710 ||
2452
                ($c >= 0x712 && $c <= 0x72f) ||
2453
                ($c >= 0x74d && $c <= 0x7a5) ||
2454
                $c === 0x7b1 ||
2455
                ($c >= 0x7c0 && $c <= 0x7ea) ||
2456
                ($c >= 0x7f4 && $c <= 0x7f5) ||
2457
                $c === 0x7fa ||
2458
                ($c >= 0x800 && $c <= 0x815) ||
2459
                $c === 0x81a ||
2460
                $c === 0x824 ||
2461
                $c === 0x828 ||
2462
                ($c >= 0x830 && $c <= 0x83e) ||
2463
                ($c >= 0x840 && $c <= 0x858) ||
2464 2
                $c === 0x85e
2465
            ) {
2466 2
                return 'RTL';
2467
            }
2468 2
        } elseif ($c === 0x200f) {
2469
            return 'RTL';
2470 2
        } elseif ($c >= 0xfb1d) {
2471 2
            if ($c === 0xfb1d ||
2472 2
                ($c >= 0xfb1f && $c <= 0xfb28) ||
2473 2
                ($c >= 0xfb2a && $c <= 0xfb36) ||
2474 2
                ($c >= 0xfb38 && $c <= 0xfb3c) ||
2475 2
                $c === 0xfb3e ||
2476 2
                ($c >= 0xfb40 && $c <= 0xfb41) ||
2477 2
                ($c >= 0xfb43 && $c <= 0xfb44) ||
2478 2
                ($c >= 0xfb46 && $c <= 0xfbc1) ||
2479 2
                ($c >= 0xfbd3 && $c <= 0xfd3d) ||
2480 2
                ($c >= 0xfd50 && $c <= 0xfd8f) ||
2481 2
                ($c >= 0xfd92 && $c <= 0xfdc7) ||
2482 2
                ($c >= 0xfdf0 && $c <= 0xfdfc) ||
2483 2
                ($c >= 0xfe70 && $c <= 0xfe74) ||
2484 2
                ($c >= 0xfe76 && $c <= 0xfefc) ||
2485 2
                ($c >= 0x10800 && $c <= 0x10805) ||
2486 2
                $c === 0x10808 ||
2487 2
                ($c >= 0x1080a && $c <= 0x10835) ||
2488 2
                ($c >= 0x10837 && $c <= 0x10838) ||
2489 2
                $c === 0x1083c ||
2490 2
                ($c >= 0x1083f && $c <= 0x10855) ||
2491 2
                ($c >= 0x10857 && $c <= 0x1085f) ||
2492 2
                ($c >= 0x10900 && $c <= 0x1091b) ||
2493 2
                ($c >= 0x10920 && $c <= 0x10939) ||
2494 2
                $c === 0x1093f ||
2495 2
                $c === 0x10a00 ||
2496 2
                ($c >= 0x10a10 && $c <= 0x10a13) ||
2497 2
                ($c >= 0x10a15 && $c <= 0x10a17) ||
2498 2
                ($c >= 0x10a19 && $c <= 0x10a33) ||
2499 2
                ($c >= 0x10a40 && $c <= 0x10a47) ||
2500 2
                ($c >= 0x10a50 && $c <= 0x10a58) ||
2501 2
                ($c >= 0x10a60 && $c <= 0x10a7f) ||
2502 2
                ($c >= 0x10b00 && $c <= 0x10b35) ||
2503 2
                ($c >= 0x10b40 && $c <= 0x10b55) ||
2504 2
                ($c >= 0x10b58 && $c <= 0x10b72) ||
2505 2
                ($c >= 0x10b78)
2506
            ) {
2507 2
                return 'RTL';
2508
            }
2509
        }
2510
2511 2
        return 'LTR';
2512
    }
2513
2514
    /**
2515
     * Check for php-support.
2516
     *
2517
     * @param string|null $key
2518
     *
2519
     * @psalm-pure
2520
     *
2521
     * @return mixed
2522
     *               Return the full support-"array", if $key === null<br>
2523
     *               return bool-value, if $key is used and available<br>
2524
     *               otherwise return <strong>null</strong>
2525
     */
2526 27
    public static function getSupportInfo(string $key = null)
2527
    {
2528 27
        if ($key === null) {
2529 4
            return self::$SUPPORT;
2530
        }
2531
2532 25
        if (self::$INTL_TRANSLITERATOR_LIST === null) {
2533 1
            self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
2534
        }
2535
        // compatibility fix for old versions
2536 25
        self::$SUPPORT['intl__transliterator_list_ids'] = self::$INTL_TRANSLITERATOR_LIST;
2537
2538 25
        return self::$SUPPORT[$key] ?? null;
2539
    }
2540
2541
    /**
2542
     * Warning: this method only works for some file-types (png, jpg)
2543
     *          if you need more supported types, please use e.g. "finfo"
2544
     *
2545
     * @param string                                                        $str
2546
     * @param array{ext: null|string, mime: null|string, type: null|string} $fallback
2547
     *
2548
     * @return array{ext: null|string, mime: null|string, type: null|string}
2549
     *
2550
     * @psalm-pure
2551
     */
2552 39
    public static function get_file_type(
2553
        string $str,
2554
        array $fallback = [
2555
            'ext'  => null,
2556
            'mime' => 'application/octet-stream',
2557
            'type' => null,
2558
        ]
2559
    ): array {
2560 39
        if ($str === '') {
2561
            return $fallback;
2562
        }
2563
2564
        /** @var false|string $str_info - needed for PhpStan (stubs error) */
2565 39
        $str_info = \substr($str, 0, 2);
2566 39
        if ($str_info === false || \strlen($str_info) !== 2) {
2567 10
            return $fallback;
2568
        }
2569
2570
        // DEBUG
2571
        //var_dump($str_info);
2572
2573 36
        $str_info = \unpack('C2chars', $str_info);
2574
2575 36
        if ($str_info === false) {
2576
            return $fallback;
2577
        }
2578 36
        $type_code = (int) ($str_info['chars1'] . $str_info['chars2']);
2579
2580
        // DEBUG
2581
        //var_dump($type_code);
2582
2583
        //
2584
        // info: https://en.wikipedia.org/wiki/Magic_number_%28programming%29#Format_indicator
2585
        //
2586
        switch ($type_code) {
2587
            // WARNING: do not add too simple comparisons, because of false-positive results:
2588
            //
2589
            // 3780 => 'pdf', 7790 => 'exe', 7784 => 'midi', 8075 => 'zip',
2590
            // 8297 => 'rar', 7173 => 'gif', 7373 => 'tiff' 6677 => 'bmp', ...
2591
            //
2592 36
            case 255216:
2593
                $ext = 'jpg';
2594
                $mime = 'image/jpeg';
2595
                $type = 'binary';
2596
2597
                break;
2598 36
            case 13780:
2599 7
                $ext = 'png';
2600 7
                $mime = 'image/png';
2601 7
                $type = 'binary';
2602
2603 7
                break;
2604
            default:
2605 35
                return $fallback;
2606
        }
2607
2608
        return [
2609 7
            'ext'  => $ext,
2610
            'mime' => $mime,
2611
            'type' => $type,
2612
        ];
2613
    }
2614
2615
    /**
2616
     * @param int    $length         <p>Length of the random string.</p>
2617
     * @param string $possible_chars [optional] <p>Characters string for the random selection.</p>
2618
     * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
2619
     *
2620
     * @return string
2621
     */
2622 1
    public static function get_random_string(
2623
        int $length,
2624
        string $possible_chars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789',
2625
        string $encoding = 'UTF-8'
2626
    ): string {
2627
        // init
2628 1
        $i = 0;
2629 1
        $str = '';
2630
2631
        //
2632
        // add random chars
2633
        //
2634
2635 1
        if ($encoding === 'UTF-8') {
2636 1
            $max_length = (int) \mb_strlen($possible_chars);
2637 1
            if ($max_length === 0) {
2638 1
                return '';
2639
            }
2640
2641 1
            while ($i < $length) {
2642
                try {
2643 1
                    $rand_int = \random_int(0, $max_length - 1);
2644
                } catch (\Exception $e) {
2645
                    $rand_int = \mt_rand(0, $max_length - 1);
2646
                }
2647 1
                $char = \mb_substr($possible_chars, $rand_int, 1);
2648 1
                if ($char !== false) {
2649 1
                    $str .= $char;
2650 1
                    ++$i;
2651
                }
2652
            }
2653
        } else {
2654
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2655
2656
            $max_length = (int) self::strlen($possible_chars, $encoding);
2657
            if ($max_length === 0) {
2658
                return '';
2659
            }
2660
2661
            while ($i < $length) {
2662
                try {
2663
                    $rand_int = \random_int(0, $max_length - 1);
2664
                } catch (\Exception $e) {
2665
                    $rand_int = \mt_rand(0, $max_length - 1);
2666
                }
2667
                $char = self::substr($possible_chars, $rand_int, 1, $encoding);
2668
                if ($char !== false) {
2669
                    $str .= $char;
2670
                    ++$i;
2671
                }
2672
            }
2673
        }
2674
2675 1
        return $str;
2676
    }
2677
2678
    /**
2679
     * @param int|string $extra_entropy [optional] <p>Extra entropy via a string or int value.</p>
2680
     * @param bool       $use_md5       [optional] <p>Return the unique identifier as md5-hash? Default: true</p>
2681
     *
2682
     * @return string
2683
     */
2684 1
    public static function get_unique_string($extra_entropy = '', bool $use_md5 = true): string
2685
    {
2686
        try {
2687 1
            $rand_int = \random_int(0, \mt_getrandmax());
2688
        } catch (\Exception $e) {
2689
            $rand_int = \mt_rand(0, \mt_getrandmax());
2690
        }
2691
2692 1
        $unique_helper = $rand_int .
2693 1
                         \session_id() .
2694 1
                         ($_SERVER['REMOTE_ADDR'] ?? '') .
2695 1
                         ($_SERVER['SERVER_ADDR'] ?? '') .
2696
                         $extra_entropy;
2697
2698 1
        $unique_string = \uniqid($unique_helper, true);
2699
2700 1
        if ($use_md5) {
2701 1
            $unique_string = \md5($unique_string . $unique_helper);
2702
        }
2703
2704 1
        return $unique_string;
2705
    }
2706
2707
    /**
2708
     * Returns true if the string contains a lower case char, false otherwise.
2709
     *
2710
     * @param string $str <p>The input string.</p>
2711
     *
2712
     * @psalm-pure
2713
     *
2714
     * @return bool
2715
     *              <p>Whether or not the string contains a lower case character.</p>
2716
     */
2717 47
    public static function has_lowercase(string $str): bool
2718
    {
2719 47
        if (self::$SUPPORT['mbstring'] === true) {
2720 47
            return \mb_ereg_match('.*[[:lower:]]', $str);
2721
        }
2722
2723
        return self::str_matches_pattern($str, '.*[[:lower:]]');
2724
    }
2725
2726
    /**
2727
     * Returns true if the string contains whitespace, false otherwise.
2728
     *
2729
     * @param string $str <p>The input string.</p>
2730
     *
2731
     * @psalm-pure
2732
     *
2733
     * @return bool
2734
     *              <p>Whether or not the string contains whitespace.</p>
2735
     */
2736 11
    public static function has_whitespace(string $str): bool
2737
    {
2738 11
        if (self::$SUPPORT['mbstring'] === true) {
2739 11
            return \mb_ereg_match('.*[[:space:]]', $str);
2740
        }
2741
2742
        return self::str_matches_pattern($str, '.*[[:space:]]');
2743
    }
2744
2745
    /**
2746
     * Returns true if the string contains an upper case char, false otherwise.
2747
     *
2748
     * @param string $str <p>The input string.</p>
2749
     *
2750
     * @psalm-pure
2751
     *
2752
     * @return bool
2753
     *              <p>Whether or not the string contains an upper case character.</p>
2754
     */
2755 12
    public static function has_uppercase(string $str): bool
2756
    {
2757 12
        if (self::$SUPPORT['mbstring'] === true) {
2758 12
            return \mb_ereg_match('.*[[:upper:]]', $str);
2759
        }
2760
2761
        return self::str_matches_pattern($str, '.*[[:upper:]]');
2762
    }
2763
2764
    /**
2765
     * Converts a hexadecimal value into a UTF-8 character.
2766
     *
2767
     * INFO: opposite to UTF8::chr_to_hex()
2768
     *
2769
     * EXAMPLE: <code>UTF8::hex_to_chr('U+00a7'); // '§'</code>
2770
     *
2771
     * @param string $hexdec <p>The hexadecimal value.</p>
2772
     *
2773
     * @psalm-pure
2774
     *
2775
     * @return false|string one single UTF-8 character
2776
     */
2777 4
    public static function hex_to_chr(string $hexdec)
2778
    {
2779
        /** @noinspection PhpUsageOfSilenceOperatorInspection - Invalid characters passed for attempted conversion, these have been ignored */
2780 4
        return self::decimal_to_chr((int) @\hexdec($hexdec));
2781
    }
2782
2783
    /**
2784
     * Converts hexadecimal U+xxxx code point representation to integer.
2785
     *
2786
     * INFO: opposite to UTF8::int_to_hex()
2787
     *
2788
     * EXAMPLE: <code>UTF8::hex_to_int('U+00f1'); // 241</code>
2789
     *
2790
     * @param string $hexdec <p>The hexadecimal code point representation.</p>
2791
     *
2792
     * @psalm-pure
2793
     *
2794
     * @return false|int
2795
     *                   <p>The code point, or false on failure.</p>
2796
     */
2797 2
    public static function hex_to_int($hexdec)
2798
    {
2799
        // init
2800 2
        $hexdec = (string) $hexdec;
2801
2802 2
        if ($hexdec === '') {
2803 2
            return false;
2804
        }
2805
2806 2
        if (\preg_match('/^(?:\\\u|U\+|)([a-zA-Z0-9]{4,6})$/', $hexdec, $match)) {
2807 2
            return \intval($match[1], 16);
2808
        }
2809
2810 2
        return false;
2811
    }
2812
2813
    /**
2814
     * Converts a UTF-8 string to a series of HTML numbered entities.
2815
     *
2816
     * INFO: opposite to UTF8::html_decode()
2817
     *
2818
     * EXAMPLE: <code>UTF8::html_encode('中文空白'); // '&#20013;&#25991;&#31354;&#30333;'</code>
2819
     *
2820
     * @param string $str              <p>The Unicode string to be encoded as numbered entities.</p>
2821
     * @param bool   $keep_ascii_chars [optional] <p>Keep ASCII chars.</p>
2822
     * @param string $encoding         [optional] <p>Set the charset for e.g. "mb_" function</p>
2823
     *
2824
     * @psalm-pure
2825
     *
2826
     * @return string HTML numbered entities
2827
     */
2828 14
    public static function html_encode(
2829
        string $str,
2830
        bool $keep_ascii_chars = false,
2831
        string $encoding = 'UTF-8'
2832
    ): string {
2833 14
        if ($str === '') {
2834 4
            return '';
2835
        }
2836
2837 14
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2838 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2839
        }
2840
2841
        // INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2842 14
        if (self::$SUPPORT['mbstring'] === true) {
2843 14
            if ($keep_ascii_chars) {
2844 13
                $start_code = 0x80;
2845
            } else {
2846 3
                $start_code = 0x00;
2847
            }
2848
2849 14
            if ($encoding === 'UTF-8') {
2850
                /** @var false|string|null $return - needed for PhpStan (stubs error) */
2851 14
                $return = \mb_encode_numericentity(
0 ignored issues
show
Bug introduced by
The call to mb_encode_numericentity() has too few arguments starting with encoding. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

2851
                $return = /** @scrutinizer ignore-call */ \mb_encode_numericentity(

This check compares calls to functions or methods with their respective definitions. If the call has less arguments than are defined, it raises an issue.

If a function is defined several times with a different number of parameters, the check may pick up the wrong definition and report false positives. One codebase where this has been known to happen is Wordpress. Please note the @ignore annotation hint above.

Loading history...
2852
                    $str,
2853 14
                    [$start_code, 0xfffff, 0, 0xfffff]
2854
                );
2855 14
                if ($return !== null && $return !== false) {
2856 14
                    return $return;
2857
                }
2858
            }
2859
2860
            /** @var false|string|null $return - needed for PhpStan (stubs error) */
2861 4
            $return = \mb_encode_numericentity(
2862
                $str,
2863 4
                [$start_code, 0xfffff, 0, 0xfffff],
2864
                $encoding
2865
            );
2866 4
            if ($return !== null && $return !== false) {
2867 4
                return $return;
2868
            }
2869
        }
2870
2871
        //
2872
        // fallback via vanilla php
2873
        //
2874
2875
        return \implode(
2876
            '',
2877
            \array_map(
2878
                static function (string $chr) use ($keep_ascii_chars, $encoding): string {
2879
                    return self::single_chr_html_encode($chr, $keep_ascii_chars, $encoding);
2880
                },
2881
                self::str_split($str)
2882
            )
2883
        );
2884
    }
2885
2886
    /**
2887
     * UTF-8 version of html_entity_decode()
2888
     *
2889
     * The reason we are not using html_entity_decode() by itself is because
2890
     * while it is not technically correct to leave out the semicolon
2891
     * at the end of an entity most browsers will still interpret the entity
2892
     * correctly. html_entity_decode() does not convert entities without
2893
     * semicolons, so we are left with our own little solution here. Bummer.
2894
     *
2895
     * Convert all HTML entities to their applicable characters.
2896
     *
2897
     * INFO: opposite to UTF8::html_encode()
2898
     *
2899
     * EXAMPLE: <code>UTF8::html_entity_decode('&#20013;&#25991;&#31354;&#30333;'); // '中文空白'</code>
2900
     *
2901
     * @see http://php.net/manual/en/function.html-entity-decode.php
2902
     *
2903
     * @param string   $str      <p>
2904
     *                           The input string.
2905
     *                           </p>
2906
     * @param int|null $flags    [optional] <p>
2907
     *                           A bitmask of one or more of the following flags, which specify how to handle quotes
2908
     *                           and which document type to use. The default is ENT_COMPAT | ENT_HTML401.
2909
     *                           <table>
2910
     *                           Available <i>flags</i> constants
2911
     *                           <tr valign="top">
2912
     *                           <td>Constant Name</td>
2913
     *                           <td>Description</td>
2914
     *                           </tr>
2915
     *                           <tr valign="top">
2916
     *                           <td><b>ENT_COMPAT</b></td>
2917
     *                           <td>Will convert double-quotes and leave single-quotes alone.</td>
2918
     *                           </tr>
2919
     *                           <tr valign="top">
2920
     *                           <td><b>ENT_QUOTES</b></td>
2921
     *                           <td>Will convert both double and single quotes.</td>
2922
     *                           </tr>
2923
     *                           <tr valign="top">
2924
     *                           <td><b>ENT_NOQUOTES</b></td>
2925
     *                           <td>Will leave both double and single quotes unconverted.</td>
2926
     *                           </tr>
2927
     *                           <tr valign="top">
2928
     *                           <td><b>ENT_HTML401</b></td>
2929
     *                           <td>
2930
     *                           Handle code as HTML 4.01.
2931
     *                           </td>
2932
     *                           </tr>
2933
     *                           <tr valign="top">
2934
     *                           <td><b>ENT_XML1</b></td>
2935
     *                           <td>
2936
     *                           Handle code as XML 1.
2937
     *                           </td>
2938
     *                           </tr>
2939
     *                           <tr valign="top">
2940
     *                           <td><b>ENT_XHTML</b></td>
2941
     *                           <td>
2942
     *                           Handle code as XHTML.
2943
     *                           </td>
2944
     *                           </tr>
2945
     *                           <tr valign="top">
2946
     *                           <td><b>ENT_HTML5</b></td>
2947
     *                           <td>
2948
     *                           Handle code as HTML 5.
2949
     *                           </td>
2950
     *                           </tr>
2951
     *                           </table>
2952
     *                           </p>
2953
     * @param string   $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
2954
     *
2955
     * @psalm-pure
2956
     *
2957
     * @return string the decoded string
2958
     */
2959 34
    public static function html_entity_decode(
2960
        string $str,
2961
        int $flags = null,
2962
        string $encoding = 'UTF-8'
2963
    ): string {
2964
        if (
2965 34
            !isset($str[3]) // examples: &; || &x;
2966
            ||
2967 34
            \strpos($str, '&') === false // no "&"
2968
        ) {
2969 23
            return $str;
2970
        }
2971
2972 34
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2973 9
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2974
        }
2975
2976 34
        if ($flags === null) {
2977 11
            $flags = \ENT_QUOTES | \ENT_HTML5;
2978
        }
2979
2980
        if (
2981 34
            $encoding !== 'UTF-8'
2982
            &&
2983 34
            $encoding !== 'ISO-8859-1'
2984
            &&
2985 34
            $encoding !== 'WINDOWS-1252'
2986
            &&
2987 34
            self::$SUPPORT['mbstring'] === false
2988
        ) {
2989
            /**
2990
             * @psalm-suppress ImpureFunctionCall - is is only a warning
2991
             */
2992
            \trigger_error('UTF8::html_entity_decode() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
2993
        }
2994
2995
        do {
2996 34
            $str_compare = $str;
2997
2998 34
            if (\strpos($str, '&') !== false) {
2999 34
                if (\strpos($str, '&#') !== false) {
3000
                    // decode also numeric & UTF16 two byte entities
3001 25
                    $str = (string) \preg_replace(
3002
                        '/(&#(?:x0*[0-9a-fA-F]{2,6}(?![0-9a-fA-F;])|(?:0*\d{2,6}(?![0-9;]))))/S',
3003
                        '$1;',
3004
                        $str
3005
                    );
3006
                }
3007
3008 34
                $str = \html_entity_decode(
3009
                    $str,
3010
                    $flags,
3011
                    $encoding
3012
                );
3013
            }
3014 34
        } while ($str_compare !== $str);
3015
3016 34
        return $str;
3017
    }
3018
3019
    /**
3020
     * Create a escape html version of the string via "UTF8::htmlspecialchars()".
3021
     *
3022
     * @param string $str
3023
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
3024
     *
3025
     * @psalm-pure
3026
     *
3027
     * @return string
3028
     */
3029 6
    public static function html_escape(string $str, string $encoding = 'UTF-8'): string
3030
    {
3031 6
        return self::htmlspecialchars(
3032
            $str,
3033 6
            \ENT_QUOTES | \ENT_SUBSTITUTE,
3034
            $encoding
3035
        );
3036
    }
3037
3038
    /**
3039
     * Remove empty html-tag.
3040
     *
3041
     * e.g.: <pre><tag></tag></pre>
3042
     *
3043
     * @param string $str
3044
     *
3045
     * @psalm-pure
3046
     *
3047
     * @return string
3048
     */
3049 1
    public static function html_stripe_empty_tags(string $str): string
3050
    {
3051 1
        return (string) \preg_replace(
3052
            '/<[^\\/>]*?>\\s*?<\\/[^>]*?>/u',
3053
            '',
3054
            $str
3055
        );
3056
    }
3057
3058
    /**
3059
     * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities().
3060
     *
3061
     * EXAMPLE: <code>UTF8::htmlentities('<白-öäü>'); // '&lt;&#30333;-&ouml;&auml;&uuml;&gt;'</code>
3062
     *
3063
     * @see http://php.net/manual/en/function.htmlentities.php
3064
     *
3065
     * @param string $str           <p>
3066
     *                              The input string.
3067
     *                              </p>
3068
     * @param int    $flags         [optional] <p>
3069
     *                              A bitmask of one or more of the following flags, which specify how to handle
3070
     *                              quotes, invalid code unit sequences and the used document type. The default is
3071
     *                              ENT_COMPAT | ENT_HTML401.
3072
     *                              <table>
3073
     *                              Available <i>flags</i> constants
3074
     *                              <tr valign="top">
3075
     *                              <td>Constant Name</td>
3076
     *                              <td>Description</td>
3077
     *                              </tr>
3078
     *                              <tr valign="top">
3079
     *                              <td><b>ENT_COMPAT</b></td>
3080
     *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
3081
     *                              </tr>
3082
     *                              <tr valign="top">
3083
     *                              <td><b>ENT_QUOTES</b></td>
3084
     *                              <td>Will convert both double and single quotes.</td>
3085
     *                              </tr>
3086
     *                              <tr valign="top">
3087
     *                              <td><b>ENT_NOQUOTES</b></td>
3088
     *                              <td>Will leave both double and single quotes unconverted.</td>
3089
     *                              </tr>
3090
     *                              <tr valign="top">
3091
     *                              <td><b>ENT_IGNORE</b></td>
3092
     *                              <td>
3093
     *                              Silently discard invalid code unit sequences instead of returning
3094
     *                              an empty string. Using this flag is discouraged as it
3095
     *                              may have security implications.
3096
     *                              </td>
3097
     *                              </tr>
3098
     *                              <tr valign="top">
3099
     *                              <td><b>ENT_SUBSTITUTE</b></td>
3100
     *                              <td>
3101
     *                              Replace invalid code unit sequences with a Unicode Replacement Character
3102
     *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty
3103
     *                              string.
3104
     *                              </td>
3105
     *                              </tr>
3106
     *                              <tr valign="top">
3107
     *                              <td><b>ENT_DISALLOWED</b></td>
3108
     *                              <td>
3109
     *                              Replace invalid code points for the given document type with a
3110
     *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
3111
     *                              (otherwise) instead of leaving them as is. This may be useful, for
3112
     *                              instance, to ensure the well-formedness of XML documents with
3113
     *                              embedded external content.
3114
     *                              </td>
3115
     *                              </tr>
3116
     *                              <tr valign="top">
3117
     *                              <td><b>ENT_HTML401</b></td>
3118
     *                              <td>
3119
     *                              Handle code as HTML 4.01.
3120
     *                              </td>
3121
     *                              </tr>
3122
     *                              <tr valign="top">
3123
     *                              <td><b>ENT_XML1</b></td>
3124
     *                              <td>
3125
     *                              Handle code as XML 1.
3126
     *                              </td>
3127
     *                              </tr>
3128
     *                              <tr valign="top">
3129
     *                              <td><b>ENT_XHTML</b></td>
3130
     *                              <td>
3131
     *                              Handle code as XHTML.
3132
     *                              </td>
3133
     *                              </tr>
3134
     *                              <tr valign="top">
3135
     *                              <td><b>ENT_HTML5</b></td>
3136
     *                              <td>
3137
     *                              Handle code as HTML 5.
3138
     *                              </td>
3139
     *                              </tr>
3140
     *                              </table>
3141
     *                              </p>
3142
     * @param string $encoding      [optional] <p>
3143
     *                              Like <b>htmlspecialchars</b>,
3144
     *                              <b>htmlentities</b> takes an optional third argument
3145
     *                              <i>encoding</i> which defines encoding used in
3146
     *                              conversion.
3147
     *                              Although this argument is technically optional, you are highly
3148
     *                              encouraged to specify the correct value for your code.
3149
     *                              </p>
3150
     * @param bool   $double_encode [optional] <p>
3151
     *                              When <i>double_encode</i> is turned off PHP will not
3152
     *                              encode existing html entities. The default is to convert everything.
3153
     *                              </p>
3154
     *
3155
     * @psalm-pure
3156
     *
3157
     * @return string
3158
     *                <p>
3159
     *                The encoded string.
3160
     *                <br><br>
3161
     *                If the input <i>string</i> contains an invalid code unit
3162
     *                sequence within the given <i>encoding</i> an empty string
3163
     *                will be returned, unless either the <b>ENT_IGNORE</b> or
3164
     *                <b>ENT_SUBSTITUTE</b> flags are set.
3165
     *                </p>
3166
     */
3167 9
    public static function htmlentities(
3168
        string $str,
3169
        int $flags = \ENT_COMPAT,
3170
        string $encoding = 'UTF-8',
3171
        bool $double_encode = true
3172
    ): string {
3173 9
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
3174 7
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
3175
        }
3176
3177 9
        $str = \htmlentities(
3178
            $str,
3179
            $flags,
3180
            $encoding,
3181
            $double_encode
3182
        );
3183
3184
        /**
3185
         * PHP doesn't replace a backslash to its html entity since this is something
3186
         * that's mostly used to escape characters when inserting in a database. Since
3187
         * we're using a decent database layer, we don't need this shit and we're replacing
3188
         * the double backslashes by its' html entity equivalent.
3189
         *
3190
         * https://github.com/forkcms/library/blob/master/spoon/filter/filter.php#L303
3191
         */
3192 9
        $str = \str_replace('\\', '&#92;', $str);
3193
3194 9
        return self::html_encode($str, true, $encoding);
3195
    }
3196
3197
    /**
3198
     * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
3199
     *
3200
     * INFO: Take a look at "UTF8::htmlentities()"
3201
     *
3202
     * EXAMPLE: <code>UTF8::htmlspecialchars('<白-öäü>'); // '&lt;白-öäü&gt;'</code>
3203
     *
3204
     * @see http://php.net/manual/en/function.htmlspecialchars.php
3205
     *
3206
     * @param string $str           <p>
3207
     *                              The string being converted.
3208
     *                              </p>
3209
     * @param int    $flags         [optional] <p>
3210
     *                              A bitmask of one or more of the following flags, which specify how to handle
3211
     *                              quotes, invalid code unit sequences and the used document type. The default is
3212
     *                              ENT_COMPAT | ENT_HTML401.
3213
     *                              <table>
3214
     *                              Available <i>flags</i> constants
3215
     *                              <tr valign="top">
3216
     *                              <td>Constant Name</td>
3217
     *                              <td>Description</td>
3218
     *                              </tr>
3219
     *                              <tr valign="top">
3220
     *                              <td><b>ENT_COMPAT</b></td>
3221
     *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
3222
     *                              </tr>
3223
     *                              <tr valign="top">
3224
     *                              <td><b>ENT_QUOTES</b></td>
3225
     *                              <td>Will convert both double and single quotes.</td>
3226
     *                              </tr>
3227
     *                              <tr valign="top">
3228
     *                              <td><b>ENT_NOQUOTES</b></td>
3229
     *                              <td>Will leave both double and single quotes unconverted.</td>
3230
     *                              </tr>
3231
     *                              <tr valign="top">
3232
     *                              <td><b>ENT_IGNORE</b></td>
3233
     *                              <td>
3234
     *                              Silently discard invalid code unit sequences instead of returning
3235
     *                              an empty string. Using this flag is discouraged as it
3236
     *                              may have security implications.
3237
     *                              </td>
3238
     *                              </tr>
3239
     *                              <tr valign="top">
3240
     *                              <td><b>ENT_SUBSTITUTE</b></td>
3241
     *                              <td>
3242
     *                              Replace invalid code unit sequences with a Unicode Replacement Character
3243
     *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty
3244
     *                              string.
3245
     *                              </td>
3246
     *                              </tr>
3247
     *                              <tr valign="top">
3248
     *                              <td><b>ENT_DISALLOWED</b></td>
3249
     *                              <td>
3250
     *                              Replace invalid code points for the given document type with a
3251
     *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
3252
     *                              (otherwise) instead of leaving them as is. This may be useful, for
3253
     *                              instance, to ensure the well-formedness of XML documents with
3254
     *                              embedded external content.
3255
     *                              </td>
3256
     *                              </tr>
3257
     *                              <tr valign="top">
3258
     *                              <td><b>ENT_HTML401</b></td>
3259
     *                              <td>
3260
     *                              Handle code as HTML 4.01.
3261
     *                              </td>
3262
     *                              </tr>
3263
     *                              <tr valign="top">
3264
     *                              <td><b>ENT_XML1</b></td>
3265
     *                              <td>
3266
     *                              Handle code as XML 1.
3267
     *                              </td>
3268
     *                              </tr>
3269
     *                              <tr valign="top">
3270
     *                              <td><b>ENT_XHTML</b></td>
3271
     *                              <td>
3272
     *                              Handle code as XHTML.
3273
     *                              </td>
3274
     *                              </tr>
3275
     *                              <tr valign="top">
3276
     *                              <td><b>ENT_HTML5</b></td>
3277
     *                              <td>
3278
     *                              Handle code as HTML 5.
3279
     *                              </td>
3280
     *                              </tr>
3281
     *                              </table>
3282
     *                              </p>
3283
     * @param string $encoding      [optional] <p>
3284
     *                              Defines encoding used in conversion.
3285
     *                              </p>
3286
     *                              <p>
3287
     *                              For the purposes of this function, the encodings
3288
     *                              ISO-8859-1, ISO-8859-15,
3289
     *                              UTF-8, cp866,
3290
     *                              cp1251, cp1252, and
3291
     *                              KOI8-R are effectively equivalent, provided the
3292
     *                              <i>string</i> itself is valid for the encoding, as
3293
     *                              the characters affected by <b>htmlspecialchars</b> occupy
3294
     *                              the same positions in all of these encodings.
3295
     *                              </p>
3296
     * @param bool   $double_encode [optional] <p>
3297
     *                              When <i>double_encode</i> is turned off PHP will not
3298
     *                              encode existing html entities, the default is to convert everything.
3299
     *                              </p>
3300
     *
3301
     * @psalm-pure
3302
     *
3303
     * @return string the converted string.
3304
     *                </p>
3305
     *                <p>
3306
     *                If the input <i>string</i> contains an invalid code unit
3307
     *                sequence within the given <i>encoding</i> an empty string
3308
     *                will be returned, unless either the <b>ENT_IGNORE</b> or
3309
     *                <b>ENT_SUBSTITUTE</b> flags are set
3310
     */
3311 8
    public static function htmlspecialchars(
3312
        string $str,
3313
        int $flags = \ENT_COMPAT,
3314
        string $encoding = 'UTF-8',
3315
        bool $double_encode = true
3316
    ): string {
3317 8
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
3318 8
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
3319
        }
3320
3321 8
        return \htmlspecialchars(
3322
            $str,
3323
            $flags,
3324
            $encoding,
3325
            $double_encode
3326
        );
3327
    }
3328
3329
    /**
3330
     * Checks whether iconv is available on the server.
3331
     *
3332
     * @psalm-pure
3333
     *
3334
     * @return bool
3335
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
3336
     *
3337
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
3338
     */
3339
    public static function iconv_loaded(): bool
3340
    {
3341
        return \extension_loaded('iconv');
3342
    }
3343
3344
    /**
3345
     * Converts Integer to hexadecimal U+xxxx code point representation.
3346
     *
3347
     * INFO: opposite to UTF8::hex_to_int()
3348
     *
3349
     * EXAMPLE: <code>UTF8::int_to_hex(241); // 'U+00f1'</code>
3350
     *
3351
     * @param int    $int    <p>The integer to be converted to hexadecimal code point.</p>
3352
     * @param string $prefix [optional]
3353
     *
3354
     * @psalm-pure
3355
     *
3356
     * @return string the code point, or empty string on failure
3357
     */
3358 6
    public static function int_to_hex(int $int, string $prefix = 'U+'): string
3359
    {
3360 6
        $hex = \dechex($int);
3361
3362 6
        $hex = (\strlen($hex) < 4 ? \substr('0000' . $hex, -4) : $hex);
3363
3364 6
        return $prefix . $hex . '';
3365
    }
3366
3367
    /**
3368
     * Checks whether intl-char is available on the server.
3369
     *
3370
     * @psalm-pure
3371
     *
3372
     * @return bool
3373
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
3374
     *
3375
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
3376
     */
3377
    public static function intlChar_loaded(): bool
3378
    {
3379
        return \class_exists('IntlChar');
3380
    }
3381
3382
    /**
3383
     * Checks whether intl is available on the server.
3384
     *
3385
     * @psalm-pure
3386
     *
3387
     * @return bool
3388
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
3389
     *
3390
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
3391
     */
3392 5
    public static function intl_loaded(): bool
3393
    {
3394 5
        return \extension_loaded('intl');
3395
    }
3396
3397
    /**
3398
     * Returns true if the string contains only alphabetic chars, false otherwise.
3399
     *
3400
     * @param string $str <p>The input string.</p>
3401
     *
3402
     * @psalm-pure
3403
     *
3404
     * @return bool
3405
     *              <p>Whether or not $str contains only alphabetic chars.</p>
3406
     */
3407 10
    public static function is_alpha(string $str): bool
3408
    {
3409 10
        if (self::$SUPPORT['mbstring'] === true) {
3410 10
            return \mb_ereg_match('^[[:alpha:]]*$', $str);
3411
        }
3412
3413
        return self::str_matches_pattern($str, '^[[:alpha:]]*$');
3414
    }
3415
3416
    /**
3417
     * Returns true if the string contains only alphabetic and numeric chars, false otherwise.
3418
     *
3419
     * @param string $str <p>The input string.</p>
3420
     *
3421
     * @psalm-pure
3422
     *
3423
     * @return bool
3424
     *              <p>Whether or not $str contains only alphanumeric chars.</p>
3425
     */
3426 13
    public static function is_alphanumeric(string $str): bool
3427
    {
3428 13
        if (self::$SUPPORT['mbstring'] === true) {
3429 13
            return \mb_ereg_match('^[[:alnum:]]*$', $str);
3430
        }
3431
3432
        return self::str_matches_pattern($str, '^[[:alnum:]]*$');
3433
    }
3434
3435
    /**
3436
     * Returns true if the string contains only punctuation chars, false otherwise.
3437
     *
3438
     * @param string $str <p>The input string.</p>
3439
     *
3440
     * @psalm-pure
3441
     *
3442
     * @return bool
3443
     *              <p>Whether or not $str contains only punctuation chars.</p>
3444
     */
3445 10
    public static function is_punctuation(string $str): bool
3446
    {
3447 10
        return self::str_matches_pattern($str, '^[[:punct:]]*$');
3448
    }
3449
3450
    /**
3451
     * Returns true if the string contains only printable (non-invisible) chars, false otherwise.
3452
     *
3453
     * @param string $str                       <p>The input string.</p>
3454
     * @param bool   $ignore_control_characters [optional] <p>Ignore control characters like [LRM] or [LSEP].</p>
3455
     *
3456
     * @psalm-pure
3457
     *
3458
     * @return bool
3459
     *              <p>Whether or not $str contains only printable (non-invisible) chars.</p>
3460
     */
3461 1
    public static function is_printable(string $str, bool $ignore_control_characters = false): bool
3462
    {
3463 1
        return self::remove_invisible_characters($str, false, '', $ignore_control_characters) === $str;
3464
    }
3465
3466
    /**
3467
     * Checks if a string is 7 bit ASCII.
3468
     *
3469
     * EXAMPLE: <code>UTF8::is_ascii('白'); // false</code>
3470
     *
3471
     * @param string $str <p>The string to check.</p>
3472
     *
3473
     * @psalm-pure
3474
     *
3475
     * @return bool
3476
     *              <p>
3477
     *              <strong>true</strong> if it is ASCII<br>
3478
     *              <strong>false</strong> otherwise
3479
     *              </p>
3480
     */
3481 8
    public static function is_ascii(string $str): bool
3482
    {
3483 8
        return ASCII::is_ascii($str);
3484
    }
3485
3486
    /**
3487
     * Returns true if the string is base64 encoded, false otherwise.
3488
     *
3489
     * EXAMPLE: <code>UTF8::is_base64('4KSu4KWL4KSo4KS/4KSa'); // true</code>
3490
     *
3491
     * @param string|null $str                   <p>The input string.</p>
3492
     * @param bool        $empty_string_is_valid [optional] <p>Is an empty string valid base64 or not?</p>
3493
     *
3494
     * @psalm-pure
3495
     *
3496
     * @return bool
3497
     *              <p>Whether or not $str is base64 encoded.</p>
3498
     */
3499 16
    public static function is_base64($str, bool $empty_string_is_valid = false): bool
3500
    {
3501
        if (
3502 16
            !$empty_string_is_valid
3503
            &&
3504 16
            $str === ''
3505
        ) {
3506 3
            return false;
3507
        }
3508
3509 15
        if (!\is_string($str)) {
3510 2
            return false;
3511
        }
3512
3513 15
        $base64String = \base64_decode($str, true);
3514
3515 15
        return $base64String !== false && \base64_encode($base64String) === $str;
3516
    }
3517
3518
    /**
3519
     * Check if the input is binary... (is look like a hack).
3520
     *
3521
     * EXAMPLE: <code>UTF8::is_binary(01); // true</code>
3522
     *
3523
     * @param int|string $input
3524
     * @param bool       $strict
3525
     *
3526
     * @psalm-pure
3527
     *
3528
     * @return bool
3529
     */
3530 39
    public static function is_binary($input, bool $strict = false): bool
3531
    {
3532 39
        $input = (string) $input;
3533 39
        if ($input === '') {
3534 10
            return false;
3535
        }
3536
3537 39
        if (\preg_match('~^[01]+$~', $input)) {
3538 13
            return true;
3539
        }
3540
3541 39
        $ext = self::get_file_type($input);
3542 39
        if ($ext['type'] === 'binary') {
3543 7
            return true;
3544
        }
3545
3546 38
        if (!$strict) {
3547 7
            $test_length = \strlen($input);
3548 7
            $test_null_counting = \substr_count($input, "\x0", 0, $test_length);
3549 7
            if (($test_null_counting / $test_length) > 0.25) {
3550 5
                return true;
3551
            }
3552
        }
3553
3554 38
        if ($strict) {
3555 38
            if (self::$SUPPORT['finfo'] === false) {
3556
                throw new \RuntimeException('ext-fileinfo: is not installed');
3557
            }
3558
3559
            /**
3560
             * @psalm-suppress ImpureMethodCall - it will return the same result for the same file ...
3561
             */
3562 38
            $finfo_encoding = (new \finfo(\FILEINFO_MIME_ENCODING))->buffer($input);
3563 38
            if ($finfo_encoding && $finfo_encoding === 'binary') {
3564 20
                return true;
3565
            }
3566
        }
3567
3568 33
        return false;
3569
    }
3570
3571
    /**
3572
     * Check if the file is binary.
3573
     *
3574
     * EXAMPLE: <code>UTF8::is_binary('./utf32.txt'); // true</code>
3575
     *
3576
     * @param string $file
3577
     *
3578
     * @return bool
3579
     */
3580 6
    public static function is_binary_file($file): bool
3581
    {
3582
        // init
3583 6
        $block = '';
3584
3585 6
        $fp = \fopen($file, 'rb');
3586 6
        if (\is_resource($fp)) {
3587 6
            $block = \fread($fp, 512);
3588 6
            \fclose($fp);
3589
        }
3590
3591 6
        if ($block === '' || $block === false) {
3592 2
            return false;
3593
        }
3594
3595 6
        return self::is_binary($block, true);
3596
    }
3597
3598
    /**
3599
     * Returns true if the string contains only whitespace chars, false otherwise.
3600
     *
3601
     * @param string $str <p>The input string.</p>
3602
     *
3603
     * @psalm-pure
3604
     *
3605
     * @return bool
3606
     *              <p>Whether or not $str contains only whitespace characters.</p>
3607
     */
3608 15
    public static function is_blank(string $str): bool
3609
    {
3610 15
        if (self::$SUPPORT['mbstring'] === true) {
3611 15
            return \mb_ereg_match('^[[:space:]]*$', $str);
3612
        }
3613
3614
        return self::str_matches_pattern($str, '^[[:space:]]*$');
3615
    }
3616
3617
    /**
3618
     * Checks if the given string is equal to any "Byte Order Mark".
3619
     *
3620
     * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
3621
     *
3622
     * EXAMPLE: <code>UTF8::is_bom("\xef\xbb\xbf"); // true</code>
3623
     *
3624
     * @param string $str <p>The input string.</p>
3625
     *
3626
     * @psalm-pure
3627
     *
3628
     * @return bool
3629
     *              <p><strong>true</strong> if the $utf8_chr is Byte Order Mark, <strong>false</strong> otherwise.</p>
3630
     */
3631 2
    public static function is_bom($str): bool
3632
    {
3633
        /** @noinspection PhpUnusedLocalVariableInspection */
3634 2
        foreach (self::$BOM as $bom_string => &$bom_byte_length) {
3635 2
            if ($str === $bom_string) {
3636 2
                return true;
3637
            }
3638
        }
3639
3640 2
        return false;
3641
    }
3642
3643
    /**
3644
     * Determine whether the string is considered to be empty.
3645
     *
3646
     * A variable is considered empty if it does not exist or if its value equals FALSE.
3647
     * empty() does not generate a warning if the variable does not exist.
3648
     *
3649
     * @param array|float|int|string $str
3650
     *
3651
     * @psalm-pure
3652
     *
3653
     * @return bool
3654
     *              <p>Whether or not $str is empty().</p>
3655
     */
3656 1
    public static function is_empty($str): bool
3657
    {
3658 1
        return empty($str);
3659
    }
3660
3661
    /**
3662
     * Returns true if the string contains only hexadecimal chars, false otherwise.
3663
     *
3664
     * @param string $str <p>The input string.</p>
3665
     *
3666
     * @psalm-pure
3667
     *
3668
     * @return bool
3669
     *              <p>Whether or not $str contains only hexadecimal chars.</p>
3670
     */
3671 13
    public static function is_hexadecimal(string $str): bool
3672
    {
3673 13
        if (self::$SUPPORT['mbstring'] === true) {
3674 13
            return \mb_ereg_match('^[[:xdigit:]]*$', $str);
3675
        }
3676
3677
        return self::str_matches_pattern($str, '^[[:xdigit:]]*$');
3678
    }
3679
3680
    /**
3681
     * Check if the string contains any HTML tags.
3682
     *
3683
     * EXAMPLE: <code>UTF8::is_html('<b>lall</b>'); // true</code>
3684
     *
3685
     * @param string $str <p>The input string.</p>
3686
     *
3687
     * @psalm-pure
3688
     *
3689
     * @return bool
3690
     *              <p>Whether or not $str contains html elements.</p>
3691
     */
3692 3
    public static function is_html(string $str): bool
3693
    {
3694 3
        if ($str === '') {
3695 3
            return false;
3696
        }
3697
3698
        // init
3699 3
        $matches = [];
3700
3701 3
        $str = self::emoji_encode($str); // hack for emoji support :/
3702
3703 3
        \preg_match("/<\\/?\\w+(?:(?:\\s+\\w+(?:\\s*=\\s*(?:\".*?\"|'.*?'|[^'\">\\s]+))?)*\\s*|\\s*)\\/?>/u", $str, $matches);
3704
3705 3
        return $matches !== [];
3706
    }
3707
3708
    /**
3709
     * Check if $url is an correct url.
3710
     *
3711
     * @param string $url
3712
     * @param bool   $disallow_localhost
3713
     *
3714
     * @psalm-pure
3715
     *
3716
     * @return bool
3717
     */
3718 1
    public static function is_url(string $url, bool $disallow_localhost = false): bool
3719
    {
3720 1
        if ($url === '') {
3721 1
            return false;
3722
        }
3723
3724
        // WARNING: keep this as hack protection
3725 1
        if (!self::str_istarts_with_any($url, ['http://', 'https://'])) {
3726 1
            return false;
3727
        }
3728
3729
        // e.g. -> the server itself connect to "https://foo.localhost/phpmyadmin/...
3730 1
        if ($disallow_localhost) {
3731 1
            if (self::str_istarts_with_any(
3732
                $url,
3733
                [
3734 1
                    'http://localhost',
3735
                    'https://localhost',
3736
                    'http://127.0.0.1',
3737
                    'https://127.0.0.1',
3738
                    'http://::1',
3739
                    'https://::1',
3740
                ]
3741
            )) {
3742 1
                return false;
3743
            }
3744
3745 1
            $regex = '/^(?:http(?:s)?:\/\/).*?(?:\.localhost)/iu';
3746 1
            if (\preg_match($regex, $url)) {
3747 1
                return false;
3748
            }
3749
        }
3750
3751
        // INFO: this is needed for e.g. "http://müller.de/" (internationalized domain names) and non ASCII-parameters
3752 1
        $regex = '/^(?:http(?:s)?:\\/\\/)(?:[\p{L}0-9][\p{L}0-9_-]*(?:\\.[\p{L}0-9][\p{L}0-9_-]*))(?:\\d+)?(?:\\/\\.*)?/iu';
3753 1
        if (\preg_match($regex, $url)) {
3754 1
            return true;
3755
        }
3756
3757 1
        return \filter_var($url, \FILTER_VALIDATE_URL) !== false;
3758
    }
3759
3760
    /**
3761
     * Try to check if "$str" is a JSON-string.
3762
     *
3763
     * EXAMPLE: <code>UTF8::is_json('{"array":[1,"¥","ä"]}'); // true</code>
3764
     *
3765
     * @param string $str                                    <p>The input string.</p>
3766
     * @param bool   $only_array_or_object_results_are_valid [optional] <p>Only array and objects are valid json
3767
     *                                                       results.</p>
3768
     *
3769
     * @return bool
3770
     *              <p>Whether or not the $str is in JSON format.</p>
3771
     */
3772 42
    public static function is_json(string $str, bool $only_array_or_object_results_are_valid = true): bool
3773
    {
3774 42
        if ($str === '') {
3775 4
            return false;
3776
        }
3777
3778 40
        if (self::$SUPPORT['json'] === false) {
3779
            throw new \RuntimeException('ext-json: is not installed');
3780
        }
3781
3782 40
        $jsonOrNull = self::json_decode($str);
3783 40
        if ($jsonOrNull === null && \strtoupper($str) !== 'NULL') {
3784 18
            return false;
3785
        }
3786
3787
        if (
3788 24
            $only_array_or_object_results_are_valid
3789
            &&
3790 24
            !\is_object($jsonOrNull)
3791
            &&
3792 24
            !\is_array($jsonOrNull)
3793
        ) {
3794 5
            return false;
3795
        }
3796
3797 19
        return \json_last_error() === \JSON_ERROR_NONE;
3798
    }
3799
3800
    /**
3801
     * @param string $str <p>The input string.</p>
3802
     *
3803
     * @psalm-pure
3804
     *
3805
     * @return bool
3806
     *              <p>Whether or not $str contains only lowercase chars.</p>
3807
     */
3808 8
    public static function is_lowercase(string $str): bool
3809
    {
3810 8
        if (self::$SUPPORT['mbstring'] === true) {
3811 8
            return \mb_ereg_match('^[[:lower:]]*$', $str);
3812
        }
3813
3814
        return self::str_matches_pattern($str, '^[[:lower:]]*$');
3815
    }
3816
3817
    /**
3818
     * Returns true if the string is serialized, false otherwise.
3819
     *
3820
     * @param string $str <p>The input string.</p>
3821
     *
3822
     * @psalm-pure
3823
     *
3824
     * @return bool
3825
     *              <p>Whether or not $str is serialized.</p>
3826
     */
3827 7
    public static function is_serialized(string $str): bool
3828
    {
3829 7
        if ($str === '') {
3830 1
            return false;
3831
        }
3832
3833
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
3834
        /** @noinspection UnserializeExploitsInspection */
3835 6
        return $str === 'b:0;'
3836
               ||
3837 6
               @\unserialize($str, []) !== false;
3838
    }
3839
3840
    /**
3841
     * Returns true if the string contains only lower case chars, false
3842
     * otherwise.
3843
     *
3844
     * @param string $str <p>The input string.</p>
3845
     *
3846
     * @psalm-pure
3847
     *
3848
     * @return bool
3849
     *              <p>Whether or not $str contains only lower case characters.</p>
3850
     */
3851 8
    public static function is_uppercase(string $str): bool
3852
    {
3853 8
        if (self::$SUPPORT['mbstring'] === true) {
3854 8
            return \mb_ereg_match('^[[:upper:]]*$', $str);
3855
        }
3856
3857
        return self::str_matches_pattern($str, '^[[:upper:]]*$');
3858
    }
3859
3860
    /**
3861
     * Check if the string is UTF-16.
3862
     *
3863
     * EXAMPLE: <code>
3864
     * UTF8::is_utf16(file_get_contents('utf-16-le.txt')); // 1
3865
     * //
3866
     * UTF8::is_utf16(file_get_contents('utf-16-be.txt')); // 2
3867
     * //
3868
     * UTF8::is_utf16(file_get_contents('utf-8.txt')); // false
3869
     * </code>
3870
     *
3871
     * @param string $str                       <p>The input string.</p>
3872
     * @param bool   $check_if_string_is_binary
3873
     *
3874
     * @psalm-pure
3875
     *
3876
     * @return false|int
3877
     *                   <strong>false</strong> if is't not UTF-16,<br>
3878
     *                   <strong>1</strong> for UTF-16LE,<br>
3879
     *                   <strong>2</strong> for UTF-16BE
3880
     */
3881 21
    public static function is_utf16($str, bool $check_if_string_is_binary = true)
3882
    {
3883
        // init
3884 21
        $str = (string) $str;
3885 21
        $str_chars = [];
3886
3887
        // fix for the "binary"-check
3888 21
        if ($check_if_string_is_binary !== false && self::string_has_bom($str)) {
3889 2
            $check_if_string_is_binary = false;
3890
        }
3891
3892
        if (
3893 21
            $check_if_string_is_binary
3894
            &&
3895 21
            !self::is_binary($str, true)
3896
        ) {
3897 2
            return false;
3898
        }
3899
3900 21
        if (self::$SUPPORT['mbstring'] === false) {
3901
            /**
3902
             * @psalm-suppress ImpureFunctionCall - is is only a warning
3903
             */
3904 3
            \trigger_error('UTF8::is_utf16() without mbstring may did not work correctly', \E_USER_WARNING);
3905
        }
3906
3907 21
        $str = self::remove_bom($str);
3908
3909 21
        $maybe_utf16le = 0;
3910 21
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
3911 21
        if ($test) {
3912 21
            $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
3913 21
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
3914 21
            if ($test3 === $test) {
3915
                /**
3916
                 * @psalm-suppress RedundantCondition
3917
                 */
3918 21
                if ($str_chars === []) {
3919 21
                    $str_chars = self::count_chars($str, true, false);
3920
                }
3921 21
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
It seems like $test3 can also be of type array; however, parameter $str of voku\helper\UTF8::count_chars() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

3921
                foreach (self::count_chars(/** @scrutinizer ignore-type */ $test3) as $test3char => &$test3charEmpty) {
Loading history...
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3922 21
                    if (\in_array($test3char, $str_chars, true)) {
3923 5
                        ++$maybe_utf16le;
3924
                    }
3925
                }
3926 21
                unset($test3charEmpty);
3927
            }
3928
        }
3929
3930 21
        $maybe_utf16be = 0;
3931 21
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
3932 21
        if ($test) {
3933 21
            $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
3934 21
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
3935 21
            if ($test3 === $test) {
3936 21
                if ($str_chars === []) {
3937 11
                    $str_chars = self::count_chars($str, true, false);
3938
                }
3939 21
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3940 21
                    if (\in_array($test3char, $str_chars, true)) {
3941 6
                        ++$maybe_utf16be;
3942
                    }
3943
                }
3944 21
                unset($test3charEmpty);
3945
            }
3946
        }
3947
3948 21
        if ($maybe_utf16be !== $maybe_utf16le) {
3949 7
            if ($maybe_utf16le > $maybe_utf16be) {
3950 5
                return 1;
3951
            }
3952
3953 6
            return 2;
3954
        }
3955
3956 17
        return false;
3957
    }
3958
3959
    /**
3960
     * Check if the string is UTF-32.
3961
     *
3962
     * EXAMPLE: <code>
3963
     * UTF8::is_utf32(file_get_contents('utf-32-le.txt')); // 1
3964
     * //
3965
     * UTF8::is_utf32(file_get_contents('utf-32-be.txt')); // 2
3966
     * //
3967
     * UTF8::is_utf32(file_get_contents('utf-8.txt')); // false
3968
     * </code>
3969
     *
3970
     * @param string $str                       <p>The input string.</p>
3971
     * @param bool   $check_if_string_is_binary
3972
     *
3973
     * @psalm-pure
3974
     *
3975
     * @return false|int
3976
     *                   <strong>false</strong> if is't not UTF-32,<br>
3977
     *                   <strong>1</strong> for UTF-32LE,<br>
3978
     *                   <strong>2</strong> for UTF-32BE
3979
     */
3980 19
    public static function is_utf32($str, bool $check_if_string_is_binary = true)
3981
    {
3982
        // init
3983 19
        $str = (string) $str;
3984 19
        $str_chars = [];
3985
3986
        // fix for the "binary"-check
3987 19
        if ($check_if_string_is_binary !== false && self::string_has_bom($str)) {
3988 2
            $check_if_string_is_binary = false;
3989
        }
3990
3991
        if (
3992 19
            $check_if_string_is_binary
3993
            &&
3994 19
            !self::is_binary($str, true)
3995
        ) {
3996 2
            return false;
3997
        }
3998
3999 19
        if (self::$SUPPORT['mbstring'] === false) {
4000
            /**
4001
             * @psalm-suppress ImpureFunctionCall - is is only a warning
4002
             */
4003 3
            \trigger_error('UTF8::is_utf32() without mbstring may did not work correctly', \E_USER_WARNING);
4004
        }
4005
4006 19
        $str = self::remove_bom($str);
4007
4008 19
        $maybe_utf32le = 0;
4009 19
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
4010 19
        if ($test) {
4011 19
            $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
4012 19
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
4013 19
            if ($test3 === $test) {
4014
                /**
4015
                 * @psalm-suppress RedundantCondition
4016
                 */
4017 19
                if ($str_chars === []) {
4018 19
                    $str_chars = self::count_chars($str, true, false);
4019
                }
4020 19
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
It seems like $test3 can also be of type array; however, parameter $str of voku\helper\UTF8::count_chars() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

4020
                foreach (self::count_chars(/** @scrutinizer ignore-type */ $test3) as $test3char => &$test3charEmpty) {
Loading history...
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
4021 19
                    if (\in_array($test3char, $str_chars, true)) {
4022 2
                        ++$maybe_utf32le;
4023
                    }
4024
                }
4025 19
                unset($test3charEmpty);
4026
            }
4027
        }
4028
4029 19
        $maybe_utf32be = 0;
4030 19
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
4031 19
        if ($test) {
4032 19
            $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
4033 19
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
4034 19
            if ($test3 === $test) {
4035 19
                if ($str_chars === []) {
4036 11
                    $str_chars = self::count_chars($str, true, false);
4037
                }
4038 19
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
4039 19
                    if (\in_array($test3char, $str_chars, true)) {
4040 3
                        ++$maybe_utf32be;
4041
                    }
4042
                }
4043 19
                unset($test3charEmpty);
4044
            }
4045
        }
4046
4047 19
        if ($maybe_utf32be !== $maybe_utf32le) {
4048 3
            if ($maybe_utf32le > $maybe_utf32be) {
4049 2
                return 1;
4050
            }
4051
4052 3
            return 2;
4053
        }
4054
4055 19
        return false;
4056
    }
4057
4058
    /**
4059
     * Checks whether the passed input contains only byte sequences that appear valid UTF-8.
4060
     *
4061
     * EXAMPLE: <code>
4062
     * UTF8::is_utf8(['Iñtërnâtiônàlizætiøn', 'foo']); // true
4063
     * //
4064
     * UTF8::is_utf8(["Iñtërnâtiônàlizætiøn\xA0\xA1", 'bar']); // false
4065
     * </code>
4066
     *
4067
     * @param int|string|string[]|null $str    <p>The input to be checked.</p>
4068
     * @param bool                     $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
4069
     *
4070
     * @psalm-pure
4071
     *
4072
     * @return bool
4073
     */
4074 83
    public static function is_utf8($str, bool $strict = false): bool
4075
    {
4076 83
        if (\is_array($str)) {
4077 2
            foreach ($str as &$v) {
4078 2
                if (!self::is_utf8($v, $strict)) {
4079 2
                    return false;
4080
                }
4081
            }
4082
4083
            return true;
4084
        }
4085
4086 83
        return self::is_utf8_string((string) $str, $strict);
4087
    }
4088
4089
    /**
4090
     * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
4091
     * Decodes a JSON string
4092
     *
4093
     * EXAMPLE: <code>UTF8::json_decode('[1,"\u00a5","\u00e4"]'); // array(1, '¥', 'ä')</code>
4094
     *
4095
     * @see http://php.net/manual/en/function.json-decode.php
4096
     *
4097
     * @param string $json    <p>
4098
     *                        The <i>json</i> string being decoded.
4099
     *                        </p>
4100
     *                        <p>
4101
     *                        This function only works with UTF-8 encoded strings.
4102
     *                        </p>
4103
     *                        <p>PHP implements a superset of
4104
     *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
4105
     *                        only supports these values when they are nested inside an array or an object.
4106
     *                        </p>
4107
     * @param bool   $assoc   [optional] <p>
4108
     *                        When <b>TRUE</b>, returned objects will be converted into
4109
     *                        associative arrays.
4110
     *                        </p>
4111
     * @param int    $depth   [optional] <p>
4112
     *                        User specified recursion depth.
4113
     *                        </p>
4114
     * @param int    $options [optional] <p>
4115
     *                        Bitmask of JSON decode options. Currently only
4116
     *                        <b>JSON_BIGINT_AS_STRING</b>
4117
     *                        is supported (default is to cast large integers as floats)
4118
     *                        </p>
4119
     *
4120
     * @psalm-pure
4121
     *
4122
     * @return mixed
4123
     *               <p>The value encoded in <i>json</i> in appropriate PHP type. Values true, false and
4124
     *               null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b> and <b>NULL</b> respectively.
4125
     *               <b>NULL</b> is returned if the <i>json</i> cannot be decoded or if the encoded data
4126
     *               is deeper than the recursion limit.</p>
4127
     */
4128 43
    public static function json_decode(
4129
        string $json,
4130
        bool $assoc = false,
4131
        int $depth = 512,
4132
        int $options = 0
4133
    ) {
4134 43
        $json = self::filter($json);
4135
4136 43
        if (self::$SUPPORT['json'] === false) {
4137
            throw new \RuntimeException('ext-json: is not installed');
4138
        }
4139
4140 43
        if ($depth < 1) {
4141
            $depth = 1;
4142
        }
4143
4144 43
        return \json_decode($json, $assoc, $depth, $options);
4145
    }
4146
4147
    /**
4148
     * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
4149
     * Returns the JSON representation of a value.
4150
     *
4151
     * EXAMPLE: <code>UTF8::json_enocde(array(1, '¥', 'ä')); // '[1,"\u00a5","\u00e4"]'</code>
4152
     *
4153
     * @see http://php.net/manual/en/function.json-encode.php
4154
     *
4155
     * @param mixed $value   <p>
4156
     *                       The <i>value</i> being encoded. Can be any type except
4157
     *                       a resource.
4158
     *                       </p>
4159
     *                       <p>
4160
     *                       All string data must be UTF-8 encoded.
4161
     *                       </p>
4162
     *                       <p>PHP implements a superset of
4163
     *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
4164
     *                       only supports these values when they are nested inside an array or an object.
4165
     *                       </p>
4166
     * @param int   $options [optional] <p>
4167
     *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
4168
     *                       <b>JSON_HEX_TAG</b>,
4169
     *                       <b>JSON_HEX_AMP</b>,
4170
     *                       <b>JSON_HEX_APOS</b>,
4171
     *                       <b>JSON_NUMERIC_CHECK</b>,
4172
     *                       <b>JSON_PRETTY_PRINT</b>,
4173
     *                       <b>JSON_UNESCAPED_SLASHES</b>,
4174
     *                       <b>JSON_FORCE_OBJECT</b>,
4175
     *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
4176
     *                       constants is described on
4177
     *                       the JSON constants page.
4178
     *                       </p>
4179
     * @param int   $depth   [optional] <p>
4180
     *                       Set the maximum depth. Must be greater than zero.
4181
     *                       </p>
4182
     *
4183
     * @psalm-pure
4184
     *
4185
     * @return false|string
4186
     *                      A JSON encoded <strong>string</strong> on success or<br>
4187
     *                      <strong>FALSE</strong> on failure
4188
     */
4189 5
    public static function json_encode($value, int $options = 0, int $depth = 512)
4190
    {
4191 5
        $value = self::filter($value);
4192
4193 5
        if (self::$SUPPORT['json'] === false) {
4194
            throw new \RuntimeException('ext-json: is not installed');
4195
        }
4196
4197 5
        if ($depth < 1) {
4198
            $depth = 1;
4199
        }
4200
4201 5
        return \json_encode($value, $options, $depth);
4202
    }
4203
4204
    /**
4205
     * Checks whether JSON is available on the server.
4206
     *
4207
     * @psalm-pure
4208
     *
4209
     * @return bool
4210
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
4211
     *
4212
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
4213
     */
4214
    public static function json_loaded(): bool
4215
    {
4216
        return \function_exists('json_decode');
4217
    }
4218
4219
    /**
4220
     * Makes string's first char lowercase.
4221
     *
4222
     * EXAMPLE: <code>UTF8::lcfirst('ÑTËRNÂTIÔNÀLIZÆTIØN'); // ñTËRNÂTIÔNÀLIZÆTIØN</code>
4223
     *
4224
     * @param string      $str                           <p>The input string</p>
4225
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
4226
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
4227
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
4228
     *                                                   tr</p>
4229
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
4230
     *                                                   -> ß</p>
4231
     *
4232
     * @psalm-pure
4233
     *
4234
     * @return string the resulting string
4235
     */
4236 46
    public static function lcfirst(
4237
        string $str,
4238
        string $encoding = 'UTF-8',
4239
        bool $clean_utf8 = false,
4240
        string $lang = null,
4241
        bool $try_to_keep_the_string_length = false
4242
    ): string {
4243 46
        if ($clean_utf8) {
4244
            $str = self::clean($str);
4245
        }
4246
4247 46
        $use_mb_functions = ($lang === null && !$try_to_keep_the_string_length);
4248
4249 46
        if ($encoding === 'UTF-8') {
4250 43
            $str_part_two = (string) \mb_substr($str, 1);
4251
4252 43
            if ($use_mb_functions) {
4253 43
                $str_part_one = \mb_strtolower(
4254 43
                    (string) \mb_substr($str, 0, 1)
4255
                );
4256
            } else {
4257 43
                $str_part_one = self::strtolower(
4258
                    (string) \mb_substr($str, 0, 1),
4259
                    $encoding,
4260
                    false,
4261
                    $lang,
4262
                    $try_to_keep_the_string_length
4263
                );
4264
            }
4265
        } else {
4266 3
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4267
4268 3
            $str_part_two = (string) self::substr($str, 1, null, $encoding);
4269
4270 3
            $str_part_one = self::strtolower(
4271 3
                (string) self::substr($str, 0, 1, $encoding),
4272
                $encoding,
4273
                false,
4274
                $lang,
4275
                $try_to_keep_the_string_length
4276
            );
4277
        }
4278
4279 46
        return $str_part_one . $str_part_two;
4280
    }
4281
4282
    /**
4283
     * Lowercase for all words in the string.
4284
     *
4285
     * @param string      $str                           <p>The input string.</p>
4286
     * @param string[]    $exceptions                    [optional] <p>Exclusion for some words.</p>
4287
     * @param string      $char_list                     [optional] <p>Additional chars that contains to words and do
4288
     *                                                   not start a new word.</p>
4289
     * @param string      $encoding                      [optional] <p>Set the charset.</p>
4290
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
4291
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
4292
     *                                                   tr</p>
4293
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
4294
     *                                                   -> ß</p>
4295
     *
4296
     * @psalm-pure
4297
     *
4298
     * @return string
4299
     */
4300 4
    public static function lcwords(
4301
        string $str,
4302
        array $exceptions = [],
4303
        string $char_list = '',
4304
        string $encoding = 'UTF-8',
4305
        bool $clean_utf8 = false,
4306
        string $lang = null,
4307
        bool $try_to_keep_the_string_length = false
4308
    ): string {
4309 4
        if (!$str) {
4310 2
            return '';
4311
        }
4312
4313 4
        $words = self::str_to_words($str, $char_list);
4314 4
        $use_exceptions = $exceptions !== [];
4315
4316 4
        $words_str = '';
4317 4
        foreach ($words as &$word) {
4318 4
            if (!$word) {
4319 4
                continue;
4320
            }
4321
4322
            if (
4323 4
                !$use_exceptions
4324
                ||
4325 4
                !\in_array($word, $exceptions, true)
4326
            ) {
4327 4
                $words_str .= self::lcfirst($word, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
4328
            } else {
4329 2
                $words_str .= $word;
4330
            }
4331
        }
4332
4333 4
        return $words_str;
4334
    }
4335
4336
    /**
4337
     * Calculate Levenshtein distance between two strings.
4338
     *
4339
     * For better performance, in a real application with a single input string
4340
     * matched against many strings from a database, you will probably want to pre-
4341
     * encode the input only once and use \levenshtein().
4342
     *
4343
     * Source: https://github.com/KEINOS/mb_levenshtein
4344
     *
4345
     * @see https://www.php.net/manual/en/function.levenshtein
4346
     *
4347
     * @param string $str1            <p>One of the strings being evaluated for Levenshtein distance.</p>
4348
     * @param string $str2            <p>One of the strings being evaluated for Levenshtein distance.</p>
4349
     * @param int    $insertionCost   [optional] <p>Defines the cost of insertion.</p>
4350
     * @param int    $replacementCost [optional] <p>Defines the cost of replacement.</p>
4351
     * @param int    $deletionCost    [optional] <p>Defines the cost of deletion.</p>
4352
     *
4353
     * @return int
4354
     */
4355 5
    public static function levenshtein(
4356
        string $str1,
4357
        string $str2,
4358
        int $insertionCost = 1,
4359
        int $replacementCost = 1,
4360
        int $deletionCost = 1
4361
    ): int {
4362 5
        $result = ASCII::to_ascii_remap($str1, $str2);
4363
4364 5
        return \levenshtein($result[0], $result[1], $insertionCost, $replacementCost, $deletionCost);
4365
    }
4366
4367
    /**
4368
     * Strip whitespace or other characters from the beginning of a UTF-8 string.
4369
     *
4370
     * EXAMPLE: <code>UTF8::ltrim(' 中文空白  '); // '中文空白  '</code>
4371
     *
4372
     * @param string      $str   <p>The string to be trimmed</p>
4373
     * @param string|null $chars <p>Optional characters to be stripped</p>
4374
     *
4375
     * @psalm-pure
4376
     *
4377
     * @return string the string with unwanted characters stripped from the left
4378
     */
4379 23
    public static function ltrim(string $str = '', string $chars = null): string
4380
    {
4381 23
        if ($str === '') {
4382 3
            return '';
4383
        }
4384
4385 22
        if (self::$SUPPORT['mbstring'] === true) {
4386 22
            if ($chars !== null) {
4387
                /** @noinspection PregQuoteUsageInspection */
4388 11
                $chars = \preg_quote($chars);
4389 11
                $pattern = "^[{$chars}]+";
4390
            } else {
4391 14
                $pattern = '^[\\s]+';
4392
            }
4393
4394 22
            return (string) \mb_ereg_replace($pattern, '', $str);
4395
        }
4396
4397
        if ($chars !== null) {
4398
            $chars = \preg_quote($chars, '/');
4399
            $pattern = "^[{$chars}]+";
4400
        } else {
4401
            $pattern = '^[\\s]+';
4402
        }
4403
4404
        return self::regex_replace($str, $pattern, '');
4405
    }
4406
4407
    /**
4408
     * Returns the UTF-8 character with the maximum code point in the given data.
4409
     *
4410
     * EXAMPLE: <code>UTF8::max('abc-äöü-中文空白'); // 'ø'</code>
4411
     *
4412
     * @param string|string[] $arg <p>A UTF-8 encoded string or an array of such strings.</p>
4413
     *
4414
     * @psalm-pure
4415
     *
4416
     * @return string|null the character with the highest code point than others, returns null on failure or empty input
4417
     */
4418 2
    public static function max($arg)
4419
    {
4420 2
        if (\is_array($arg)) {
4421 2
            $arg = \implode('', $arg);
4422
        }
4423
4424 2
        $codepoints = self::codepoints($arg);
4425 2
        if ($codepoints === []) {
4426 2
            return null;
4427
        }
4428
4429 2
        $codepoint_max = \max($codepoints);
4430
4431 2
        return self::chr((int) $codepoint_max);
4432
    }
4433
4434
    /**
4435
     * Calculates and returns the maximum number of bytes taken by any
4436
     * UTF-8 encoded character in the given string.
4437
     *
4438
     * EXAMPLE: <code>UTF8::max_chr_width('Intërnâtiônàlizætiøn'); // 2</code>
4439
     *
4440
     * @param string $str <p>The original Unicode string.</p>
4441
     *
4442
     * @psalm-pure
4443
     *
4444
     * @return int
4445
     *             <p>Max byte lengths of the given chars.</p>
4446
     */
4447 2
    public static function max_chr_width(string $str): int
4448
    {
4449 2
        $bytes = self::chr_size_list($str);
4450 2
        if ($bytes !== []) {
4451 2
            return (int) \max($bytes);
4452
        }
4453
4454 2
        return 0;
4455
    }
4456
4457
    /**
4458
     * Checks whether mbstring is available on the server.
4459
     *
4460
     * @psalm-pure
4461
     *
4462
     * @return bool
4463
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
4464
     *
4465
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
4466
     */
4467 29
    public static function mbstring_loaded(): bool
4468
    {
4469 29
        return \extension_loaded('mbstring');
4470
    }
4471
4472
    /**
4473
     * Returns the UTF-8 character with the minimum code point in the given data.
4474
     *
4475
     * EXAMPLE: <code>UTF8::min('abc-äöü-中文空白'); // '-'</code>
4476
     *
4477
     * @param string|string[] $arg <strong>A UTF-8 encoded string or an array of such strings.</strong>
4478
     *
4479
     * @psalm-pure
4480
     *
4481
     * @return string|null
4482
     *                     <p>The character with the lowest code point than others, returns null on failure or empty input.</p>
4483
     */
4484 2
    public static function min($arg)
4485
    {
4486 2
        if (\is_array($arg)) {
4487 2
            $arg = \implode('', $arg);
4488
        }
4489
4490 2
        $codepoints = self::codepoints($arg);
4491 2
        if ($codepoints === []) {
4492 2
            return null;
4493
        }
4494
4495 2
        $codepoint_min = \min($codepoints);
4496
4497 2
        return self::chr((int) $codepoint_min);
4498
    }
4499
4500
    /**
4501
     * Normalize the encoding-"name" input.
4502
     *
4503
     * EXAMPLE: <code>UTF8::normalize_encoding('UTF8'); // 'UTF-8'</code>
4504
     *
4505
     * @param mixed $encoding <p>e.g.: ISO, UTF8, WINDOWS-1251 etc.</p>
4506
     * @param mixed $fallback <p>e.g.: UTF-8</p>
4507
     *
4508
     * @psalm-pure
4509
     *
4510
     * @return mixed|string
4511
     *                      <p>e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.<br>Will return a empty string as fallback (by default)</p>
4512
     *
4513
     * @template TNormalizeEncodingFallback
4514
     * @phpstan-param string|TNormalizeEncodingFallback $fallback
4515
     * @phpstan-return string|TNormalizeEncodingFallback
4516
     */
4517 339
    public static function normalize_encoding($encoding, $fallback = '')
4518
    {
4519
        /**
4520
         * @psalm-suppress ImpureStaticVariable
4521
         *
4522
         * @var array<string,string>
4523
         */
4524 339
        static $STATIC_NORMALIZE_ENCODING_CACHE = [];
4525
4526
        // init
4527 339
        $encoding = (string) $encoding;
4528
4529 339
        if (!$encoding) {
4530 290
            return $fallback;
4531
        }
4532
4533
        if (
4534 53
            $encoding === 'UTF-8'
4535
            ||
4536 53
            $encoding === 'UTF8'
4537
        ) {
4538 29
            return 'UTF-8';
4539
        }
4540
4541
        if (
4542 44
            $encoding === '8BIT'
4543
            ||
4544 44
            $encoding === 'BINARY'
4545
        ) {
4546
            return 'CP850';
4547
        }
4548
4549
        if (
4550 44
            $encoding === 'HTML'
4551
            ||
4552 44
            $encoding === 'HTML-ENTITIES'
4553
        ) {
4554 2
            return 'HTML-ENTITIES';
4555
        }
4556
4557
        if (
4558 44
            $encoding === 'ISO'
4559
            ||
4560 44
            $encoding === 'ISO-8859-1'
4561
        ) {
4562 39
            return 'ISO-8859-1';
4563
        }
4564
4565
        if (
4566 11
            $encoding === '1' // only a fallback, for non "strict_types" usage ...
4567
            ||
4568 11
            $encoding === '0' // only a fallback, for non "strict_types" usage ...
4569
        ) {
4570
            return $fallback;
4571
        }
4572
4573 11
        if (isset($STATIC_NORMALIZE_ENCODING_CACHE[$encoding])) {
4574 8
            return $STATIC_NORMALIZE_ENCODING_CACHE[$encoding];
4575
        }
4576
4577 5
        if (self::$ENCODINGS === null) {
4578 1
            self::$ENCODINGS = self::getData('encodings');
4579
        }
4580
4581 5
        if (\in_array($encoding, self::$ENCODINGS, true)) {
4582 3
            $STATIC_NORMALIZE_ENCODING_CACHE[$encoding] = $encoding;
4583
4584 3
            return $encoding;
4585
        }
4586
4587 4
        $encoding_original = $encoding;
4588 4
        $encoding = \strtoupper($encoding);
4589 4
        $encoding_upper_helper = (string) \preg_replace('/[^a-zA-Z0-9]/u', '', $encoding);
4590
4591 4
        $equivalences = [
4592
            'ISO8859'     => 'ISO-8859-1',
4593
            'ISO88591'    => 'ISO-8859-1',
4594
            'ISO'         => 'ISO-8859-1',
4595
            'LATIN'       => 'ISO-8859-1',
4596
            'LATIN1'      => 'ISO-8859-1', // Western European
4597
            'ISO88592'    => 'ISO-8859-2',
4598
            'LATIN2'      => 'ISO-8859-2', // Central European
4599
            'ISO88593'    => 'ISO-8859-3',
4600
            'LATIN3'      => 'ISO-8859-3', // Southern European
4601
            'ISO88594'    => 'ISO-8859-4',
4602
            'LATIN4'      => 'ISO-8859-4', // Northern European
4603
            'ISO88595'    => 'ISO-8859-5',
4604
            'ISO88596'    => 'ISO-8859-6', // Greek
4605
            'ISO88597'    => 'ISO-8859-7',
4606
            'ISO88598'    => 'ISO-8859-8', // Hebrew
4607
            'ISO88599'    => 'ISO-8859-9',
4608
            'LATIN5'      => 'ISO-8859-9', // Turkish
4609
            'ISO885911'   => 'ISO-8859-11',
4610
            'TIS620'      => 'ISO-8859-11', // Thai
4611
            'ISO885910'   => 'ISO-8859-10',
4612
            'LATIN6'      => 'ISO-8859-10', // Nordic
4613
            'ISO885913'   => 'ISO-8859-13',
4614
            'LATIN7'      => 'ISO-8859-13', // Baltic
4615
            'ISO885914'   => 'ISO-8859-14',
4616
            'LATIN8'      => 'ISO-8859-14', // Celtic
4617
            'ISO885915'   => 'ISO-8859-15',
4618
            'LATIN9'      => 'ISO-8859-15', // Western European (with some extra chars e.g. €)
4619
            'ISO885916'   => 'ISO-8859-16',
4620
            'LATIN10'     => 'ISO-8859-16', // Southeast European
4621
            'CP1250'      => 'WINDOWS-1250',
4622
            'WIN1250'     => 'WINDOWS-1250',
4623
            'WINDOWS1250' => 'WINDOWS-1250',
4624
            'CP1251'      => 'WINDOWS-1251',
4625
            'WIN1251'     => 'WINDOWS-1251',
4626
            'WINDOWS1251' => 'WINDOWS-1251',
4627
            'CP1252'      => 'WINDOWS-1252',
4628
            'WIN1252'     => 'WINDOWS-1252',
4629
            'WINDOWS1252' => 'WINDOWS-1252',
4630
            'CP1253'      => 'WINDOWS-1253',
4631
            'WIN1253'     => 'WINDOWS-1253',
4632
            'WINDOWS1253' => 'WINDOWS-1253',
4633
            'CP1254'      => 'WINDOWS-1254',
4634
            'WIN1254'     => 'WINDOWS-1254',
4635
            'WINDOWS1254' => 'WINDOWS-1254',
4636
            'CP1255'      => 'WINDOWS-1255',
4637
            'WIN1255'     => 'WINDOWS-1255',
4638
            'WINDOWS1255' => 'WINDOWS-1255',
4639
            'CP1256'      => 'WINDOWS-1256',
4640
            'WIN1256'     => 'WINDOWS-1256',
4641
            'WINDOWS1256' => 'WINDOWS-1256',
4642
            'CP1257'      => 'WINDOWS-1257',
4643
            'WIN1257'     => 'WINDOWS-1257',
4644
            'WINDOWS1257' => 'WINDOWS-1257',
4645
            'CP1258'      => 'WINDOWS-1258',
4646
            'WIN1258'     => 'WINDOWS-1258',
4647
            'WINDOWS1258' => 'WINDOWS-1258',
4648
            'UTF16'       => 'UTF-16',
4649
            'UTF32'       => 'UTF-32',
4650
            'UTF8'        => 'UTF-8',
4651
            'UTF'         => 'UTF-8',
4652
            'UTF7'        => 'UTF-7',
4653
            '8BIT'        => 'CP850',
4654
            'BINARY'      => 'CP850',
4655
        ];
4656
4657 4
        if (!empty($equivalences[$encoding_upper_helper])) {
4658 3
            $encoding = $equivalences[$encoding_upper_helper];
4659
        }
4660
4661 4
        $STATIC_NORMALIZE_ENCODING_CACHE[$encoding_original] = $encoding;
4662
4663 4
        return $encoding;
4664
    }
4665
4666
    /**
4667
     * Standardize line ending to unix-like.
4668
     *
4669
     * @param string          $str      <p>The input string.</p>
4670
     * @param string|string[] $replacer <p>The replacer char e.g. "\n" (Linux) or "\r\n" (Windows). You can also use \PHP_EOL
4671
     *                                  here.</p>
4672
     *
4673
     * @psalm-pure
4674
     *
4675
     * @return string
4676
     *                <p>A string with normalized line ending.</p>
4677
     */
4678 4
    public static function normalize_line_ending(string $str, $replacer = "\n"): string
4679
    {
4680 4
        return \str_replace(["\r\n", "\r", "\n"], $replacer, $str);
4681
    }
4682
4683
    /**
4684
     * Normalize some MS Word special characters.
4685
     *
4686
     * EXAMPLE: <code>UTF8::normalize_msword('„Abcdef…”'); // '"Abcdef..."'</code>
4687
     *
4688
     * @param string $str <p>The string to be normalized.</p>
4689
     *
4690
     * @psalm-pure
4691
     *
4692
     * @return string
4693
     *                <p>A string with normalized characters for commonly used chars in Word documents.</p>
4694
     */
4695 10
    public static function normalize_msword(string $str): string
4696
    {
4697 10
        return ASCII::normalize_msword($str);
4698
    }
4699
4700
    /**
4701
     * Normalize the whitespace.
4702
     *
4703
     * EXAMPLE: <code>UTF8::normalize_whitespace("abc-\xc2\xa0-öäü-\xe2\x80\xaf-\xE2\x80\xAC", true); // "abc-\xc2\xa0-öäü- -"</code>
4704
     *
4705
     * @param string $str                          <p>The string to be normalized.</p>
4706
     * @param bool   $keep_non_breaking_space      [optional] <p>Set to true, to keep non-breaking-spaces.</p>
4707
     * @param bool   $keep_bidi_unicode_controls   [optional] <p>Set to true, to keep non-printable (for the web)
4708
     *                                             bidirectional text chars.</p>
4709
     * @param bool   $normalize_control_characters [optional] <p>Set to true, to convert e.g. LINE-, PARAGRAPH-SEPARATOR with "\n" and LINE TABULATION with "\t".</p>
4710
     *
4711
     * @psalm-pure
4712
     *
4713
     * @return string
4714
     *                <p>A string with normalized whitespace.</p>
4715
     */
4716 61
    public static function normalize_whitespace(
4717
        string $str,
4718
        bool $keep_non_breaking_space = false,
4719
        bool $keep_bidi_unicode_controls = false,
4720
        bool $normalize_control_characters = false
4721
    ): string {
4722 61
        return ASCII::normalize_whitespace(
4723
            $str,
4724
            $keep_non_breaking_space,
4725
            $keep_bidi_unicode_controls,
4726
            $normalize_control_characters
4727
        );
4728
    }
4729
4730
    /**
4731
     * Calculates Unicode code point of the given UTF-8 encoded character.
4732
     *
4733
     * INFO: opposite to UTF8::chr()
4734
     *
4735
     * EXAMPLE: <code>UTF8::ord('☃'); // 0x2603</code>
4736
     *
4737
     * @param string $chr      <p>The character of which to calculate code point.<p/>
4738
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
4739
     *
4740
     * @psalm-pure
4741
     *
4742
     * @return int
4743
     *             <p>Unicode code point of the given character,<br>
4744
     *             0 on invalid UTF-8 byte sequence</p>
4745
     */
4746 27
    public static function ord($chr, string $encoding = 'UTF-8'): int
4747
    {
4748
        /**
4749
         * @psalm-suppress ImpureStaticVariable
4750
         *
4751
         * @var array<string,int>
4752
         */
4753 27
        static $CHAR_CACHE = [];
4754
4755
        // init
4756 27
        $chr = (string) $chr;
4757
4758 27
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
4759 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4760
        }
4761
4762 27
        $cache_key = $chr . '_' . $encoding;
4763 27
        if (isset($CHAR_CACHE[$cache_key])) {
4764 27
            return $CHAR_CACHE[$cache_key];
4765
        }
4766
4767
        // check again, if it's still not UTF-8
4768 11
        if ($encoding !== 'UTF-8') {
4769 3
            $chr = self::encode($encoding, $chr);
4770
        }
4771
4772 11
        if (self::$ORD === null) {
4773 1
            self::$ORD = self::getData('ord');
4774
        }
4775
4776 11
        if (isset(self::$ORD[$chr])) {
4777 11
            return $CHAR_CACHE[$cache_key] = self::$ORD[$chr];
4778
        }
4779
4780
        //
4781
        // fallback via "IntlChar"
4782
        //
4783
4784 6
        if (self::$SUPPORT['intlChar'] === true) {
4785 5
            $code = \IntlChar::ord($chr);
4786 5
            if ($code) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $code of type integer|null is loosely compared to true; this is ambiguous if the integer can be 0. You might want to explicitly use !== null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
4787 5
                return $CHAR_CACHE[$cache_key] = $code;
4788
            }
4789
        }
4790
4791
        //
4792
        // fallback via vanilla php
4793
        //
4794
4795 1
        $chr = \unpack('C*', (string) \substr($chr, 0, 4));
4796
        /** @noinspection PhpSillyAssignmentInspection - hack for phpstan */
4797
        /** @var int[] $chr - "unpack": only false if the format string contains errors */
4798 1
        $chr = $chr;
4799 1
        $code = $chr ? $chr[1] : 0;
4800
4801 1
        if ($code >= 0xF0 && isset($chr[4])) {
4802
            return $CHAR_CACHE[$cache_key] = ((($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80);
4803
        }
4804
4805 1
        if ($code >= 0xE0 && isset($chr[3])) {
4806 1
            return $CHAR_CACHE[$cache_key] = ((($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80);
4807
        }
4808
4809 1
        if ($code >= 0xC0 && isset($chr[2])) {
4810 1
            return $CHAR_CACHE[$cache_key] = ((($code - 0xC0) << 6) + $chr[2] - 0x80);
4811
        }
4812
4813
        return $CHAR_CACHE[$cache_key] = $code;
4814
    }
4815
4816
    /**
4817
     * Parses the string into an array (into the the second parameter).
4818
     *
4819
     * WARNING: Unlike "parse_str()", this method does not (re-)place variables in the current scope,
4820
     *          if the second parameter is not set!
4821
     *
4822
     * EXAMPLE: <code>
4823
     * UTF8::parse_str('Iñtërnâtiônéàlizætiøn=測試&arr[]=foo+測試&arr[]=ການທົດສອບ', $array);
4824
     * echo $array['Iñtërnâtiônéàlizætiøn']; // '測試'
4825
     * </code>
4826
     *
4827
     * @see http://php.net/manual/en/function.parse-str.php
4828
     *
4829
     * @param string               $str        <p>The input string.</p>
4830
     * @param array<string, mixed> $result     <p>The result will be returned into this reference parameter.</p>
4831
     * @param bool                 $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4832
     *
4833
     * @psalm-pure
4834
     *
4835
     * @return bool
4836
     *              <p>Will return <strong>false</strong> if php can't parse the string and we haven't any $result.</p>
4837
     */
4838 2
    public static function parse_str(string $str, &$result, bool $clean_utf8 = false): bool
4839
    {
4840 2
        if ($clean_utf8) {
4841 2
            $str = self::clean($str);
4842
        }
4843
4844 2
        if (self::$SUPPORT['mbstring'] === true) {
4845 2
            $return = \mb_parse_str($str, $result);
4846
4847 2
            return $return !== false && $result !== [];
4848
        }
4849
4850
        /**
4851
         * @psalm-suppress ImpureFunctionCall - we use the second parameter, so we don't change variables by magic
4852
         */
4853
        \parse_str($str, $result);
4854
4855
        return $result !== [];
4856
    }
4857
4858
    /**
4859
     * Checks if \u modifier is available that enables Unicode support in PCRE.
4860
     *
4861
     * @psalm-pure
4862
     *
4863
     * @return bool
4864
     *              <p>
4865
     *              <strong>true</strong> if support is available,<br>
4866
     *              <strong>false</strong> otherwise
4867
     *              </p>
4868
     */
4869
    public static function pcre_utf8_support(): bool
4870
    {
4871
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
4872
        return (bool) @\preg_match('//u', '');
4873
    }
4874
4875
    /**
4876
     * Create an array containing a range of UTF-8 characters.
4877
     *
4878
     * EXAMPLE: <code>UTF8::range('κ', 'ζ'); // array('κ', 'ι', 'θ', 'η', 'ζ',)</code>
4879
     *
4880
     * @param int|string $var1      <p>Numeric or hexadecimal code points, or a UTF-8 character to start from.</p>
4881
     * @param int|string $var2      <p>Numeric or hexadecimal code points, or a UTF-8 character to end at.</p>
4882
     * @param bool       $use_ctype <p>use ctype to detect numeric and hexadecimal, otherwise we will use a simple
4883
     *                              "is_numeric"</p>
4884
     * @param string     $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
4885
     * @param float|int  $step      [optional] <p>
4886
     *                              If a step value is given, it will be used as the
4887
     *                              increment between elements in the sequence. step
4888
     *                              should be given as a positive number. If not specified,
4889
     *                              step will default to 1.
4890
     *                              </p>
4891
     *
4892
     * @psalm-pure
4893
     *
4894
     * @return string[]
4895
     */
4896 2
    public static function range(
4897
        $var1,
4898
        $var2,
4899
        bool $use_ctype = true,
4900
        string $encoding = 'UTF-8',
4901
        $step = 1
4902
    ): array {
4903 2
        if (!$var1 || !$var2) {
4904 2
            return [];
4905
        }
4906
4907 2
        if ($step !== 1) {
4908
            /**
4909
             * @psalm-suppress RedundantConditionGivenDocblockType
4910
             * @psalm-suppress DocblockTypeContradiction
4911
             */
4912 1
            if (!\is_numeric($step)) {
0 ignored issues
show
introduced by
The condition is_numeric($step) is always true.
Loading history...
4913
                throw new \InvalidArgumentException('$step need to be a number, type given: ' . \gettype($step));
4914
            }
4915
4916
            /**
4917
             * @psalm-suppress RedundantConditionGivenDocblockType - false-positive from psalm?
4918
             */
4919 1
            if ($step <= 0) {
4920
                throw new \InvalidArgumentException('$step need to be a positive number, given: ' . $step);
4921
            }
4922
        }
4923
4924 2
        if ($use_ctype && self::$SUPPORT['ctype'] === false) {
4925
            throw new \RuntimeException('ext-ctype: is not installed');
4926
        }
4927
4928 2
        $is_digit = false;
4929 2
        $is_xdigit = false;
4930
4931 2
        if ($use_ctype && \ctype_digit((string) $var1) && \ctype_digit((string) $var2)) {
4932 2
            $is_digit = true;
4933 2
            $start = (int) $var1;
4934 2
        } elseif ($use_ctype && \ctype_xdigit($var1) && \ctype_xdigit($var2)) {
4935
            $is_xdigit = true;
4936
            $start = (int) self::hex_to_int((string) $var1);
4937 2
        } elseif (!$use_ctype && \is_numeric($var1)) {
4938 1
            $start = (int) $var1;
4939
        } else {
4940 2
            $start = self::ord((string) $var1);
4941
        }
4942
4943 2
        if (!$start) {
4944
            return [];
4945
        }
4946
4947 2
        if ($is_digit) {
4948 2
            $end = (int) $var2;
4949 2
        } elseif ($is_xdigit) {
4950
            $end = (int) self::hex_to_int((string) $var2);
4951 2
        } elseif (!$use_ctype && \is_numeric($var2)) {
4952 1
            $end = (int) $var2;
4953
        } else {
4954 2
            $end = self::ord((string) $var2);
4955
        }
4956
4957 2
        if (!$end) {
4958
            return [];
4959
        }
4960
4961 2
        $array = [];
4962 2
        foreach (\range($start, $end, $step) as $i) {
4963 2
            $array[] = (string) self::chr((int) $i, $encoding);
4964
        }
4965
4966 2
        return $array;
4967
    }
4968
4969
    /**
4970
     * Multi decode HTML entity + fix urlencoded-win1252-chars.
4971
     *
4972
     * EXAMPLE: <code>UTF8::rawurldecode('tes%20öäü%20\u00edtest+test'); // 'tes öäü ítest+test'</code>
4973
     *
4974
     * e.g:
4975
     * 'test+test'                     => 'test+test'
4976
     * 'D&#252;sseldorf'               => 'Düsseldorf'
4977
     * 'D%FCsseldorf'                  => 'Düsseldorf'
4978
     * 'D&#xFC;sseldorf'               => 'Düsseldorf'
4979
     * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
4980
     * 'Düsseldorf'                   => 'Düsseldorf'
4981
     * 'D%C3%BCsseldorf'               => 'Düsseldorf'
4982
     * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
4983
     * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
4984
     *
4985
     * @param string $str          <p>The input string.</p>
4986
     * @param bool   $multi_decode <p>Decode as often as possible.</p>
4987
     *
4988
     * @psalm-pure
4989
     *
4990
     * @return string
4991
     *                <p>The decoded URL, as a string.</p>
4992
     */
4993 6
    public static function rawurldecode(string $str, bool $multi_decode = true): string
4994
    {
4995 6
        if ($str === '') {
4996 4
            return '';
4997
        }
4998
4999 6
        $str = self::urldecode_unicode_helper($str);
5000
5001 6
        if ($multi_decode) {
5002
            do {
5003 5
                $str_compare = $str;
5004
5005
                /**
5006
                 * @psalm-suppress PossiblyInvalidArgument
5007
                 */
5008 5
                $str = \rawurldecode(
5009 5
                    self::html_entity_decode(
5010 5
                        self::to_utf8($str),
5011 5
                        \ENT_QUOTES | \ENT_HTML5
5012
                    )
5013
                );
5014 5
            } while ($str_compare !== $str);
5015
        } else {
5016
            /**
5017
             * @psalm-suppress PossiblyInvalidArgument
5018
             */
5019 1
            $str = \rawurldecode(
5020 1
                self::html_entity_decode(
5021 1
                    self::to_utf8($str),
5022 1
                    \ENT_QUOTES | \ENT_HTML5
5023
                )
5024
            );
5025
        }
5026
5027 6
        return self::fix_simple_utf8($str);
5028
    }
5029
5030
    /**
5031
     * Replaces all occurrences of $pattern in $str by $replacement.
5032
     *
5033
     * @param string $str         <p>The input string.</p>
5034
     * @param string $pattern     <p>The regular expression pattern.</p>
5035
     * @param string $replacement <p>The string to replace with.</p>
5036
     * @param string $options     [optional] <p>Matching conditions to be used.</p>
5037
     * @param string $delimiter   [optional] <p>Delimiter the the regex. Default: '/'</p>
5038
     *
5039
     * @psalm-pure
5040
     *
5041
     * @return string
5042
     */
5043 18
    public static function regex_replace(
5044
        string $str,
5045
        string $pattern,
5046
        string $replacement,
5047
        string $options = '',
5048
        string $delimiter = '/'
5049
    ): string {
5050 18
        if ($options === 'msr') {
5051 9
            $options = 'ms';
5052
        }
5053
5054
        // fallback
5055 18
        if (!$delimiter) {
5056
            $delimiter = '/';
5057
        }
5058
5059 18
        return (string) \preg_replace(
5060 18
            $delimiter . $pattern . $delimiter . 'u' . $options,
5061
            $replacement,
5062
            $str
5063
        );
5064
    }
5065
5066
    /**
5067
     * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
5068
     *
5069
     * EXAMPLE: <code>UTF8::remove_bom("\xEF\xBB\xBFΜπορώ να"); // 'Μπορώ να'</code>
5070
     *
5071
     * @param string $str <p>The input string.</p>
5072
     *
5073
     * @psalm-pure
5074
     *
5075
     * @return string
5076
     *                <p>A string without UTF-BOM.</p>
5077
     */
5078 54
    public static function remove_bom(string $str): string
5079
    {
5080 54
        if ($str === '') {
5081 9
            return '';
5082
        }
5083
5084 54
        $str_length = \strlen($str);
5085 54
        foreach (self::$BOM as $bom_string => $bom_byte_length) {
5086 54
            if (\strncmp($str, $bom_string, $bom_byte_length) === 0) {
5087
                /** @var false|string $str_tmp - needed for PhpStan (stubs error) */
5088 9
                $str_tmp = \substr($str, $bom_byte_length, $str_length);
5089 9
                if ($str_tmp === false) {
5090
                    return '';
5091
                }
5092
5093 9
                $str_length -= $bom_byte_length;
5094
5095 9
                $str = (string) $str_tmp;
5096
            }
5097
        }
5098
5099 54
        return $str;
5100
    }
5101
5102
    /**
5103
     * Removes duplicate occurrences of a string in another string.
5104
     *
5105
     * EXAMPLE: <code>UTF8::remove_duplicates('öäü-κόσμεκόσμε-äöü', 'κόσμε'); // 'öäü-κόσμε-äöü'</code>
5106
     *
5107
     * @param string          $str  <p>The base string.</p>
5108
     * @param string|string[] $what <p>String to search for in the base string.</p>
5109
     *
5110
     * @psalm-pure
5111
     *
5112
     * @return string
5113
     *                <p>A string with removed duplicates.</p>
5114
     */
5115 2
    public static function remove_duplicates(string $str, $what = ' '): string
5116
    {
5117 2
        if (\is_string($what)) {
5118 2
            $what = [$what];
5119
        }
5120
5121
        /**
5122
         * @psalm-suppress RedundantConditionGivenDocblockType
5123
         */
5124 2
        if (\is_array($what)) {
0 ignored issues
show
introduced by
The condition is_array($what) is always true.
Loading history...
5125 2
            foreach ($what as $item) {
5126 2
                $str = (string) \preg_replace('/(' . \preg_quote($item, '/') . ')+/u', $item, $str);
5127
            }
5128
        }
5129
5130 2
        return $str;
5131
    }
5132
5133
    /**
5134
     * Remove html via "strip_tags()" from the string.
5135
     *
5136
     * @param string $str            <p>The input string.</p>
5137
     * @param string $allowable_tags [optional] <p>You can use the optional second parameter to specify tags which
5138
     *                               should not be stripped. Default: null
5139
     *                               </p>
5140
     *
5141
     * @psalm-pure
5142
     *
5143
     * @return string
5144
     *                <p>A string with without html tags.</p>
5145
     */
5146 6
    public static function remove_html(string $str, string $allowable_tags = ''): string
5147
    {
5148 6
        return \strip_tags($str, $allowable_tags);
5149
    }
5150
5151
    /**
5152
     * Remove all breaks [<br> | \r\n | \r | \n | ...] from the string.
5153
     *
5154
     * @param string $str         <p>The input string.</p>
5155
     * @param string $replacement [optional] <p>Default is a empty string.</p>
5156
     *
5157
     * @psalm-pure
5158
     *
5159
     * @return string
5160
     *                <p>A string without breaks.</p>
5161
     */
5162 6
    public static function remove_html_breaks(string $str, string $replacement = ''): string
5163
    {
5164 6
        return (string) \preg_replace("#/\r\n|\r|\n|<br.*/?>#isU", $replacement, $str);
5165
    }
5166
5167
    /**
5168
     * Remove invisible characters from a string.
5169
     *
5170
     * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
5171
     *
5172
     * EXAMPLE: <code>UTF8::remove_invisible_characters("κόσ\0με"); // 'κόσμε'</code>
5173
     *
5174
     * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
5175
     *
5176
     * @param string $str                           <p>The input string.</p>
5177
     * @param bool   $url_encoded                   [optional] <p>
5178
     *                                              Try to remove url encoded control character.
5179
     *                                              WARNING: maybe contains false-positives e.g. aa%0Baa -> aaaa.
5180
     *                                              <br>
5181
     *                                              Default: false
5182
     *                                              </p>
5183
     * @param string $replacement                   [optional] <p>The replacement character.</p>
5184
     * @param bool   $keep_basic_control_characters [optional] <p>Keep control characters like [LRM] or [LSEP].</p>
5185
     *
5186
     * @psalm-pure
5187
     *
5188
     * @return string
5189
     *                <p>A string without invisible chars.</p>
5190
     */
5191 96
    public static function remove_invisible_characters(
5192
        string $str,
5193
        bool $url_encoded = false,
5194
        string $replacement = '',
5195
        bool $keep_basic_control_characters = true
5196
    ): string {
5197 96
        return ASCII::remove_invisible_characters(
5198
            $str,
5199
            $url_encoded,
5200
            $replacement,
5201
            $keep_basic_control_characters
5202
        );
5203
    }
5204
5205
    /**
5206
     * Returns a new string with the prefix $substring removed, if present.
5207
     *
5208
     * @param string $str       <p>The input string.</p>
5209
     * @param string $substring <p>The prefix to remove.</p>
5210
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
5211
     *
5212
     * @psalm-pure
5213
     *
5214
     * @return string
5215
     *                <p>A string without the prefix $substring.</p>
5216
     */
5217 12
    public static function remove_left(
5218
        string $str,
5219
        string $substring,
5220
        string $encoding = 'UTF-8'
5221
    ): string {
5222
        if (
5223 12
            $substring
5224
            &&
5225 12
            \strpos($str, $substring) === 0
5226
        ) {
5227 6
            if ($encoding === 'UTF-8') {
5228 4
                return (string) \mb_substr(
5229
                    $str,
5230 4
                    (int) \mb_strlen($substring)
5231
                );
5232
            }
5233
5234 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
5235
5236 2
            return (string) self::substr(
5237
                $str,
5238 2
                (int) self::strlen($substring, $encoding),
5239
                null,
5240
                $encoding
5241
            );
5242
        }
5243
5244 6
        return $str;
5245
    }
5246
5247
    /**
5248
     * Returns a new string with the suffix $substring removed, if present.
5249
     *
5250
     * @param string $str
5251
     * @param string $substring <p>The suffix to remove.</p>
5252
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
5253
     *
5254
     * @psalm-pure
5255
     *
5256
     * @return string
5257
     *                <p>A string having a $str without the suffix $substring.</p>
5258
     */
5259 12
    public static function remove_right(
5260
        string $str,
5261
        string $substring,
5262
        string $encoding = 'UTF-8'
5263
    ): string {
5264 12
        if ($substring && \substr($str, -\strlen($substring)) === $substring) {
5265 6
            if ($encoding === 'UTF-8') {
5266 4
                return (string) \mb_substr(
5267
                    $str,
5268
                    0,
5269 4
                    (int) \mb_strlen($str) - (int) \mb_strlen($substring)
5270
                );
5271
            }
5272
5273 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
5274
5275 2
            return (string) self::substr(
5276
                $str,
5277
                0,
5278 2
                (int) self::strlen($str, $encoding) - (int) self::strlen($substring, $encoding),
5279
                $encoding
5280
            );
5281
        }
5282
5283 6
        return $str;
5284
    }
5285
5286
    /**
5287
     * Replaces all occurrences of $search in $str by $replacement.
5288
     *
5289
     * @param string $str            <p>The input string.</p>
5290
     * @param string $search         <p>The needle to search for.</p>
5291
     * @param string $replacement    <p>The string to replace with.</p>
5292
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5293
     *
5294
     * @psalm-pure
5295
     *
5296
     * @return string
5297
     *                <p>A string with replaced parts.</p>
5298
     */
5299 29
    public static function replace(
5300
        string $str,
5301
        string $search,
5302
        string $replacement,
5303
        bool $case_sensitive = true
5304
    ): string {
5305 29
        if ($case_sensitive) {
5306 22
            return \str_replace($search, $replacement, $str);
5307
        }
5308
5309 7
        return self::str_ireplace($search, $replacement, $str);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::str_ireplac...ch, $replacement, $str) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
5310
    }
5311
5312
    /**
5313
     * Replaces all occurrences of $search in $str by $replacement.
5314
     *
5315
     * @param string          $str            <p>The input string.</p>
5316
     * @param string[]        $search         <p>The elements to search for.</p>
5317
     * @param string|string[] $replacement    <p>The string to replace with.</p>
5318
     * @param bool            $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5319
     *
5320
     * @psalm-pure
5321
     *
5322
     * @return string
5323
     *                <p>A string with replaced parts.</p>
5324
     */
5325 30
    public static function replace_all(
5326
        string $str,
5327
        array $search,
5328
        $replacement,
5329
        bool $case_sensitive = true
5330
    ): string {
5331 30
        if ($case_sensitive) {
5332 23
            return \str_replace($search, $replacement, $str);
5333
        }
5334
5335 7
        return self::str_ireplace($search, $replacement, $str);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::str_ireplac...ch, $replacement, $str) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
5336
    }
5337
5338
    /**
5339
     * Replace the diamond question mark (�) and invalid-UTF8 chars with the replacement.
5340
     *
5341
     * EXAMPLE: <code>UTF8::replace_diamond_question_mark('中文空白�', ''); // '中文空白'</code>
5342
     *
5343
     * @param string $str                        <p>The input string</p>
5344
     * @param string $replacement_char           <p>The replacement character.</p>
5345
     * @param bool   $process_invalid_utf8_chars <p>Convert invalid UTF-8 chars </p>
5346
     *
5347
     * @psalm-pure
5348
     *
5349
     * @return string
5350
     *                <p>A string without diamond question marks (�).</p>
5351
     */
5352 35
    public static function replace_diamond_question_mark(
5353
        string $str,
5354
        string $replacement_char = '',
5355
        bool $process_invalid_utf8_chars = true
5356
    ): string {
5357 35
        if ($str === '') {
5358 9
            return '';
5359
        }
5360
5361 35
        if ($process_invalid_utf8_chars) {
5362 35
            if ($replacement_char === '') {
5363 35
                $replacement_char_helper = 'none';
5364
            } else {
5365 2
                $replacement_char_helper = \ord($replacement_char);
5366
            }
5367
5368 35
            if (self::$SUPPORT['mbstring'] === false) {
5369
                // if there is no native support for "mbstring",
5370
                // then we need to clean the string before ...
5371
                $str = self::clean($str);
5372
            }
5373
5374
            /**
5375
             * @psalm-suppress ImpureFunctionCall - we will reset the value in the next step
5376
             */
5377 35
            $save = \mb_substitute_character();
5378
            /** @noinspection PhpUsageOfSilenceOperatorInspection - ignore "Unknown character" warnings, it's working anyway */
5379 35
            @\mb_substitute_character($replacement_char_helper);
0 ignored issues
show
Security Best Practice introduced by
It seems like you do not handle an error condition for mb_substitute_character(). This can introduce security issues, and is generally not recommended. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-unhandled  annotation

5379
            /** @scrutinizer ignore-unhandled */ @\mb_substitute_character($replacement_char_helper);

If you suppress an error, we recommend checking for the error condition explicitly:

// For example instead of
@mkdir($dir);

// Better use
if (@mkdir($dir) === false) {
    throw new \RuntimeException('The directory '.$dir.' could not be created.');
}
Loading history...
5380
            // the polyfill maybe return false, so cast to string
5381 35
            $str = (string) \mb_convert_encoding($str, 'UTF-8', 'UTF-8');
5382 35
            \mb_substitute_character($save);
0 ignored issues
show
Bug introduced by
It seems like $save can also be of type true; however, parameter $substitute_character of mb_substitute_character() does only seem to accept integer|null|string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

5382
            \mb_substitute_character(/** @scrutinizer ignore-type */ $save);
Loading history...
5383
        }
5384
5385 35
        return \str_replace(
5386
            [
5387 35
                "\xEF\xBF\xBD",
5388
                '�',
5389
            ],
5390
            [
5391 35
                $replacement_char,
5392
                $replacement_char,
5393
            ],
5394
            $str
5395
        );
5396
    }
5397
5398
    /**
5399
     * Strip whitespace or other characters from the end of a UTF-8 string.
5400
     *
5401
     * EXAMPLE: <code>UTF8::rtrim('-ABC-中文空白-  '); // '-ABC-中文空白-'</code>
5402
     *
5403
     * @param string      $str   <p>The string to be trimmed.</p>
5404
     * @param string|null $chars <p>Optional characters to be stripped.</p>
5405
     *
5406
     * @psalm-pure
5407
     *
5408
     * @return string
5409
     *                <p>A string with unwanted characters stripped from the right.</p>
5410
     */
5411 21
    public static function rtrim(string $str = '', string $chars = null): string
5412
    {
5413 21
        if ($str === '') {
5414 3
            return '';
5415
        }
5416
5417 20
        if (self::$SUPPORT['mbstring'] === true) {
5418 20
            if ($chars !== null) {
5419
                /** @noinspection PregQuoteUsageInspection */
5420 9
                $chars = \preg_quote($chars);
5421 9
                $pattern = "[{$chars}]+$";
5422
            } else {
5423 14
                $pattern = '[\\s]+$';
5424
            }
5425
5426 20
            return (string) \mb_ereg_replace($pattern, '', $str);
5427
        }
5428
5429
        if ($chars !== null) {
5430
            $chars = \preg_quote($chars, '/');
5431
            $pattern = "[{$chars}]+$";
5432
        } else {
5433
            $pattern = '[\\s]+$';
5434
        }
5435
5436
        return self::regex_replace($str, $pattern, '');
5437
    }
5438
5439
    /**
5440
     * WARNING: Print native UTF-8 support (libs) by default, e.g. for debugging.
5441
     *
5442
     * @param bool $useEcho
5443
     *
5444
     * @psalm-pure
5445
     *
5446
     * @return string|void
5447
     */
5448 2
    public static function showSupport(bool $useEcho = true)
5449
    {
5450
        // init
5451 2
        $html = '';
5452
5453 2
        $html .= '<pre>';
5454 2
        foreach (self::$SUPPORT as $key => &$value) {
5455 2
            $html .= $key . ' - ' . \print_r($value, true) . "\n<br>";
0 ignored issues
show
Bug introduced by
Are you sure print_r($value, true) of type string|true can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

5455
            $html .= $key . ' - ' . /** @scrutinizer ignore-type */ \print_r($value, true) . "\n<br>";
Loading history...
5456
        }
5457 2
        $html .= '</pre>';
5458
5459 2
        if ($useEcho) {
5460 1
            echo $html;
5461
        }
5462
5463 2
        return $html;
5464
    }
5465
5466
    /**
5467
     * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
5468
     *
5469
     * EXAMPLE: <code>UTF8::single_chr_html_encode('κ'); // '&#954;'</code>
5470
     *
5471
     * @param string $char             <p>The Unicode character to be encoded as numbered entity.</p>
5472
     * @param bool   $keep_ascii_chars <p>Set to <strong>true</strong> to keep ASCII chars.</>
5473
     * @param string $encoding         [optional] <p>Set the charset for e.g. "mb_" function</p>
5474
     *
5475
     * @psalm-pure
5476
     *
5477
     * @return string
5478
     *                <p>The HTML numbered entity for the given character.</p>
5479
     */
5480 2
    public static function single_chr_html_encode(
5481
        string $char,
5482
        bool $keep_ascii_chars = false,
5483
        string $encoding = 'UTF-8'
5484
    ): string {
5485 2
        if ($char === '') {
5486 2
            return '';
5487
        }
5488
5489
        if (
5490 2
            $keep_ascii_chars
5491
            &&
5492 2
            ASCII::is_ascii($char)
5493
        ) {
5494 2
            return $char;
5495
        }
5496
5497 2
        return '&#' . self::ord($char, $encoding) . ';';
5498
    }
5499
5500
    /**
5501
     * @param string $str
5502
     * @param int    $tab_length
5503
     *
5504
     * @psalm-pure
5505
     *
5506
     * @return string
5507
     */
5508 5
    public static function spaces_to_tabs(string $str, int $tab_length = 4): string
5509
    {
5510 5
        if ($tab_length === 4) {
5511 3
            $tab = '    ';
5512 2
        } elseif ($tab_length === 2) {
5513 1
            $tab = '  ';
5514
        } else {
5515 1
            $tab = \str_repeat(' ', $tab_length);
5516
        }
5517
5518 5
        return \str_replace($tab, "\t", $str);
5519
    }
5520
5521
    /**
5522
     * Returns a camelCase version of the string. Trims surrounding spaces,
5523
     * capitalizes letters following digits, spaces, dashes and underscores,
5524
     * and removes spaces, dashes, as well as underscores.
5525
     *
5526
     * @param string      $str                           <p>The input string.</p>
5527
     * @param string      $encoding                      [optional] <p>Default: 'UTF-8'</p>
5528
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
5529
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
5530
     *                                                   tr</p>
5531
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
5532
     *                                                   -> ß</p>
5533
     *
5534
     * @psalm-pure
5535
     *
5536
     * @return string
5537
     */
5538 32
    public static function str_camelize(
5539
        string $str,
5540
        string $encoding = 'UTF-8',
5541
        bool $clean_utf8 = false,
5542
        string $lang = null,
5543
        bool $try_to_keep_the_string_length = false
5544
    ): string {
5545 32
        if ($clean_utf8) {
5546
            $str = self::clean($str);
5547
        }
5548
5549 32
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
5550 26
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
5551
        }
5552
5553 32
        $str = self::lcfirst(
5554 32
            \trim($str),
5555
            $encoding,
5556
            false,
5557
            $lang,
5558
            $try_to_keep_the_string_length
5559
        );
5560 32
        $str = (string) \preg_replace('/^[-_]+/', '', $str);
5561
5562 32
        $use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
5563
5564 32
        $str = (string) \preg_replace_callback(
5565
            '/[-_\\s]+(.)?/u',
5566
            /**
5567
             * @param array $match
5568
             *
5569
             * @psalm-pure
5570
             *
5571
             * @return string
5572
             */
5573 32
            static function (array $match) use ($use_mb_functions, $encoding, $lang, $try_to_keep_the_string_length): string {
5574 27
                if (isset($match[1])) {
5575 27
                    if ($use_mb_functions) {
5576 27
                        if ($encoding === 'UTF-8') {
5577 27
                            return \mb_strtoupper($match[1]);
5578
                        }
5579
5580
                        return \mb_strtoupper($match[1], $encoding);
5581
                    }
5582
5583
                    return self::strtoupper($match[1], $encoding, false, $lang, $try_to_keep_the_string_length);
5584
                }
5585
5586 1
                return '';
5587
            },
5588
            $str
5589
        );
5590
5591 32
        return (string) \preg_replace_callback(
5592
            '/[\\p{N}]+(.)?/u',
5593
            /**
5594
             * @param array $match
5595
             *
5596
             * @psalm-pure
5597
             *
5598
             * @return string
5599
             */
5600 32
            static function (array $match) use ($use_mb_functions, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length): string {
5601 6
                if ($use_mb_functions) {
5602 6
                    if ($encoding === 'UTF-8') {
5603 6
                        return \mb_strtoupper($match[0]);
5604
                    }
5605
5606
                    return \mb_strtoupper($match[0], $encoding);
5607
                }
5608
5609
                return self::strtoupper($match[0], $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
5610
            },
5611
            $str
5612
        );
5613
    }
5614
5615
    /**
5616
     * Returns the string with the first letter of each word capitalized,
5617
     * except for when the word is a name which shouldn't be capitalized.
5618
     *
5619
     * @param string $str
5620
     *
5621
     * @psalm-pure
5622
     *
5623
     * @return string
5624
     *                <p>A string with $str capitalized.</p>
5625
     */
5626 1
    public static function str_capitalize_name(string $str): string
5627
    {
5628 1
        return self::str_capitalize_name_helper(
5629 1
            self::str_capitalize_name_helper(
5630 1
                self::collapse_whitespace($str),
5631
                ' '
5632
            ),
5633
            '-'
5634
        );
5635
    }
5636
5637
    /**
5638
     * Returns true if the string contains $needle, false otherwise. By default
5639
     * the comparison is case-sensitive, but can be made insensitive by setting
5640
     * $case_sensitive to false.
5641
     *
5642
     * @param string $haystack       <p>The input string.</p>
5643
     * @param string $needle         <p>Substring to look for.</p>
5644
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5645
     *
5646
     * @psalm-pure
5647
     *
5648
     * @return bool
5649
     *              <p>Whether or not $haystack contains $needle.</p>
5650
     */
5651 21
    public static function str_contains(
5652
        string $haystack,
5653
        string $needle,
5654
        bool $case_sensitive = true
5655
    ): bool {
5656 21
        if ($case_sensitive) {
5657 11
            if (\PHP_VERSION_ID >= 80000) {
5658
                /** @phpstan-ignore-next-line - only for PHP8 */
5659 11
                return \str_contains($haystack, $needle);
5660
            }
5661
5662
            return \strpos($haystack, $needle) !== false;
5663
        }
5664
5665 10
        return \mb_stripos($haystack, $needle) !== false;
5666
    }
5667
5668
    /**
5669
     * Returns true if the string contains all $needles, false otherwise. By
5670
     * default the comparison is case-sensitive, but can be made insensitive by
5671
     * setting $case_sensitive to false.
5672
     *
5673
     * @param string   $haystack       <p>The input string.</p>
5674
     * @param scalar[] $needles        <p>SubStrings to look for.</p>
5675
     * @param bool     $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5676
     *
5677
     * @psalm-pure
5678
     *
5679
     * @return bool
5680
     *              <p>Whether or not $haystack contains $needle.</p>
5681
     */
5682 45
    public static function str_contains_all(
5683
        string $haystack,
5684
        array $needles,
5685
        bool $case_sensitive = true
5686
    ): bool {
5687 45
        if ($haystack === '' || $needles === []) {
5688 1
            return false;
5689
        }
5690
5691 44
        foreach ($needles as &$needle) {
5692 44
            if ($case_sensitive) {
5693 24
                if (!$needle || \strpos($haystack, (string) $needle) === false) {
5694 12
                    return false;
5695
                }
5696
            }
5697
5698 33
            if (!$needle || \mb_stripos($haystack, (string) $needle) === false) {
5699 8
                return false;
5700
            }
5701
        }
5702
5703 24
        return true;
5704
    }
5705
5706
    /**
5707
     * Returns true if the string contains any $needles, false otherwise. By
5708
     * default the comparison is case-sensitive, but can be made insensitive by
5709
     * setting $case_sensitive to false.
5710
     *
5711
     * @param string   $haystack       <p>The input string.</p>
5712
     * @param scalar[] $needles        <p>SubStrings to look for.</p>
5713
     * @param bool     $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5714
     *
5715
     * @psalm-pure
5716
     *
5717
     * @return bool
5718
     *              <p>Whether or not $str contains $needle.</p>
5719
     */
5720 46
    public static function str_contains_any(
5721
        string $haystack,
5722
        array $needles,
5723
        bool $case_sensitive = true
5724
    ): bool {
5725 46
        if ($haystack === '' || $needles === []) {
5726 1
            return false;
5727
        }
5728
5729 45
        foreach ($needles as &$needle) {
5730 45
            if (!$needle) {
5731
                continue;
5732
            }
5733
5734 45
            if ($case_sensitive) {
5735 25
                if (\strpos($haystack, (string) $needle) !== false) {
5736 14
                    return true;
5737
                }
5738
5739 13
                continue;
5740
            }
5741
5742 20
            if (\mb_stripos($haystack, (string) $needle) !== false) {
5743 12
                return true;
5744
            }
5745
        }
5746
5747 19
        return false;
5748
    }
5749
5750
    /**
5751
     * Returns a lowercase and trimmed string separated by dashes. Dashes are
5752
     * inserted before uppercase characters (with the exception of the first
5753
     * character of the string), and in place of spaces as well as underscores.
5754
     *
5755
     * @param string $str      <p>The input string.</p>
5756
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5757
     *
5758
     * @psalm-pure
5759
     *
5760
     * @return string
5761
     */
5762 19
    public static function str_dasherize(string $str, string $encoding = 'UTF-8'): string
5763
    {
5764 19
        return self::str_delimit($str, '-', $encoding);
5765
    }
5766
5767
    /**
5768
     * Returns a lowercase and trimmed string separated by the given delimiter.
5769
     * Delimiters are inserted before uppercase characters (with the exception
5770
     * of the first character of the string), and in place of spaces, dashes,
5771
     * and underscores. Alpha delimiters are not converted to lowercase.
5772
     *
5773
     * @param string      $str                           <p>The input string.</p>
5774
     * @param string      $delimiter                     <p>Sequence used to separate parts of the string.</p>
5775
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
5776
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
5777
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
5778
     *                                                   tr</p>
5779
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ ->
5780
     *                                                   ß</p>
5781
     *
5782
     * @psalm-pure
5783
     *
5784
     * @return string
5785
     */
5786 49
    public static function str_delimit(
5787
        string $str,
5788
        string $delimiter,
5789
        string $encoding = 'UTF-8',
5790
        bool $clean_utf8 = false,
5791
        string $lang = null,
5792
        bool $try_to_keep_the_string_length = false
5793
    ): string {
5794 49
        if (self::$SUPPORT['mbstring'] === true) {
5795 49
            $str = (string) \mb_ereg_replace('\\B(\\p{Lu})', '-\1', \trim($str));
5796
5797 49
            $use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
5798 49
            if ($use_mb_functions && $encoding === 'UTF-8') {
5799 22
                $str = \mb_strtolower($str);
5800
            } else {
5801 27
                $str = self::strtolower($str, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
5802
            }
5803
5804 49
            return (string) \mb_ereg_replace('[\\-_\\s]+', $delimiter, $str);
5805
        }
5806
5807
        $str = (string) \preg_replace('/\\B(\\p{Lu})/u', '-\1', \trim($str));
5808
5809
        $use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
5810
        if ($use_mb_functions && $encoding === 'UTF-8') {
5811
            $str = \mb_strtolower($str);
5812
        } else {
5813
            $str = self::strtolower($str, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
5814
        }
5815
5816
        return (string) \preg_replace('/[\\-_\\s]+/u', $delimiter, $str);
5817
    }
5818
5819
    /**
5820
     * Optimized "mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
5821
     *
5822
     * EXAMPLE: <code>
5823
     * UTF8::str_detect_encoding('中文空白'); // 'UTF-8'
5824
     * UTF8::str_detect_encoding('Abc'); // 'ASCII'
5825
     * </code>
5826
     *
5827
     * @param string $str <p>The input string.</p>
5828
     *
5829
     * @psalm-pure
5830
     *
5831
     * @return false|string
5832
     *                      <p>
5833
     *                      The detected string-encoding e.g. UTF-8 or UTF-16BE,<br>
5834
     *                      otherwise it will return false e.g. for BINARY or not detected encoding.
5835
     *                      </p>
5836
     */
5837 30
    public static function str_detect_encoding($str)
5838
    {
5839
        // init
5840 30
        $str = (string) $str;
5841
5842
        //
5843
        // 1.) check binary strings (010001001...) like UTF-16 / UTF-32 / PDF / Images / ...
5844
        //
5845
5846 30
        if (self::is_binary($str, self::string_has_bom($str) ? false : true)) {
5847 10
            $is_utf32 = self::is_utf32($str, false);
5848 10
            if ($is_utf32 === 1) {
5849
                return 'UTF-32LE';
5850
            }
5851 10
            if ($is_utf32 === 2) {
5852 1
                return 'UTF-32BE';
5853
            }
5854
5855 10
            $is_utf16 = self::is_utf16($str, false);
5856 10
            if ($is_utf16 === 1) {
5857 3
                return 'UTF-16LE';
5858
            }
5859 10
            if ($is_utf16 === 2) {
5860 2
                return 'UTF-16BE';
5861
            }
5862
5863
            // is binary but not "UTF-16" or "UTF-32"
5864 8
            return false;
5865
        }
5866
5867
        //
5868
        // 2.) simple check for ASCII chars
5869
        //
5870
5871 27
        if (ASCII::is_ascii($str)) {
5872 10
            return 'ASCII';
5873
        }
5874
5875
        //
5876
        // 3.) simple check for UTF-8 chars
5877
        //
5878
5879 27
        if (self::is_utf8_string($str)) {
5880 19
            return 'UTF-8';
5881
        }
5882
5883
        //
5884
        // 4.) check via "mb_detect_encoding()"
5885
        //
5886
        // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "mb_detect_encoding()"
5887
5888 16
        $encoding_detecting_order = [
5889
            'ISO-8859-1',
5890
            'ISO-8859-2',
5891
            'ISO-8859-3',
5892
            'ISO-8859-4',
5893
            'ISO-8859-5',
5894
            'ISO-8859-6',
5895
            'ISO-8859-7',
5896
            'ISO-8859-8',
5897
            'ISO-8859-9',
5898
            'ISO-8859-10',
5899
            'ISO-8859-13',
5900
            'ISO-8859-14',
5901
            'ISO-8859-15',
5902
            'ISO-8859-16',
5903
            'WINDOWS-1251',
5904
            'WINDOWS-1252',
5905
            'WINDOWS-1254',
5906
            'CP932',
5907
            'CP936',
5908
            'CP950',
5909
            'CP866',
5910
            'CP850',
5911
            'CP51932',
5912
            'CP50220',
5913
            'CP50221',
5914
            'CP50222',
5915
            'ISO-2022-JP',
5916
            'ISO-2022-KR',
5917
            'JIS',
5918
            'JIS-ms',
5919
            'EUC-CN',
5920
            'EUC-JP',
5921
        ];
5922
5923 16
        if (self::$SUPPORT['mbstring'] === true) {
5924
            // info: do not use the symfony polyfill here
5925 16
            $encoding = \mb_detect_encoding($str, $encoding_detecting_order, true);
5926 16
            if ($encoding) {
5927 16
                return $encoding;
5928
            }
5929
        }
5930
5931
        //
5932
        // 5.) check via "iconv()"
5933
        //
5934
5935
        if (self::$ENCODINGS === null) {
5936
            self::$ENCODINGS = self::getData('encodings');
5937
        }
5938
5939
        foreach (self::$ENCODINGS as $encoding_tmp) {
5940
            // INFO: //IGNORE but still throw notice
5941
            /** @noinspection PhpUsageOfSilenceOperatorInspection */
5942
            if ((string) @\iconv($encoding_tmp, $encoding_tmp . '//IGNORE', $str) === $str) {
5943
                return $encoding_tmp;
5944
            }
5945
        }
5946
5947
        return false;
5948
    }
5949
5950
    /**
5951
     * Check if the string ends with the given substring.
5952
     *
5953
     * EXAMPLE: <code>
5954
     * UTF8::str_ends_with('BeginMiddleΚόσμε', 'Κόσμε'); // true
5955
     * UTF8::str_ends_with('BeginMiddleΚόσμε', 'κόσμε'); // false
5956
     * </code>
5957
     *
5958
     * @param string $haystack <p>The string to search in.</p>
5959
     * @param string $needle   <p>The substring to search for.</p>
5960
     *
5961
     * @psalm-pure
5962
     *
5963
     * @return bool
5964
     */
5965 9
    public static function str_ends_with(string $haystack, string $needle): bool
5966
    {
5967 9
        if ($needle === '') {
5968 2
            return true;
5969
        }
5970
5971 9
        if ($haystack === '') {
5972 1
            return false;
5973
        }
5974
5975 9
        if (\PHP_VERSION_ID >= 80000) {
5976
            /** @phpstan-ignore-next-line - only for PHP8 */
5977 9
            return \str_ends_with($haystack, $needle);
5978
        }
5979
5980
        return \substr($haystack, -\strlen($needle)) === $needle;
5981
    }
5982
5983
    /**
5984
     * Returns true if the string ends with any of $substrings, false otherwise.
5985
     *
5986
     * - case-sensitive
5987
     *
5988
     * @param string   $str        <p>The input string.</p>
5989
     * @param string[] $substrings <p>Substrings to look for.</p>
5990
     *
5991
     * @psalm-pure
5992
     *
5993
     * @return bool
5994
     *              <p>Whether or not $str ends with $substring.</p>
5995
     */
5996 7
    public static function str_ends_with_any(string $str, array $substrings): bool
5997
    {
5998 7
        if ($substrings === []) {
5999
            return false;
6000
        }
6001
6002 7
        foreach ($substrings as &$substring) {
6003 7
            if (\substr($str, -\strlen($substring)) === $substring) {
6004 1
                return true;
6005
            }
6006
        }
6007
6008 6
        return false;
6009
    }
6010
6011
    /**
6012
     * Ensures that the string begins with $substring. If it doesn't, it's
6013
     * prepended.
6014
     *
6015
     * @param string $str       <p>The input string.</p>
6016
     * @param string $substring <p>The substring to add if not present.</p>
6017
     *
6018
     * @psalm-pure
6019
     *
6020
     * @return string
6021
     */
6022 10
    public static function str_ensure_left(string $str, string $substring): string
6023
    {
6024
        if (
6025 10
            $substring !== ''
6026
            &&
6027 10
            \strpos($str, $substring) === 0
6028
        ) {
6029 6
            return $str;
6030
        }
6031
6032 4
        return $substring . $str;
6033
    }
6034
6035
    /**
6036
     * Ensures that the string ends with $substring. If it doesn't, it's appended.
6037
     *
6038
     * @param string $str       <p>The input string.</p>
6039
     * @param string $substring <p>The substring to add if not present.</p>
6040
     *
6041
     * @psalm-pure
6042
     *
6043
     * @return string
6044
     */
6045 10
    public static function str_ensure_right(string $str, string $substring): string
6046
    {
6047
        if (
6048 10
            $str === ''
6049
            ||
6050 10
            $substring === ''
6051
            ||
6052 10
            \substr($str, -\strlen($substring)) !== $substring
6053
        ) {
6054 4
            $str .= $substring;
6055
        }
6056
6057 10
        return $str;
6058
    }
6059
6060
    /**
6061
     * Capitalizes the first word of the string, replaces underscores with
6062
     * spaces, and strips '_id'.
6063
     *
6064
     * @param string $str
6065
     *
6066
     * @psalm-pure
6067
     *
6068
     * @return string
6069
     */
6070 3
    public static function str_humanize($str): string
6071
    {
6072 3
        $str = \str_replace(
6073
            [
6074 3
                '_id',
6075
                '_',
6076
            ],
6077
            [
6078 3
                '',
6079
                ' ',
6080
            ],
6081
            $str
6082
        );
6083
6084 3
        return self::ucfirst(\trim($str));
6085
    }
6086
6087
    /**
6088
     * Check if the string ends with the given substring, case-insensitive.
6089
     *
6090
     * EXAMPLE: <code>
6091
     * UTF8::str_iends_with('BeginMiddleΚόσμε', 'Κόσμε'); // true
6092
     * UTF8::str_iends_with('BeginMiddleΚόσμε', 'κόσμε'); // true
6093
     * </code>
6094
     *
6095
     * @param string $haystack <p>The string to search in.</p>
6096
     * @param string $needle   <p>The substring to search for.</p>
6097
     *
6098
     * @psalm-pure
6099
     *
6100
     * @return bool
6101
     */
6102 12
    public static function str_iends_with(string $haystack, string $needle): bool
6103
    {
6104 12
        if ($needle === '') {
6105 2
            return true;
6106
        }
6107
6108 12
        if ($haystack === '') {
6109
            return false;
6110
        }
6111
6112 12
        return self::strcasecmp(\substr($haystack, -\strlen($needle)), $needle) === 0;
6113
    }
6114
6115
    /**
6116
     * Returns true if the string ends with any of $substrings, false otherwise.
6117
     *
6118
     * - case-insensitive
6119
     *
6120
     * @param string   $str        <p>The input string.</p>
6121
     * @param string[] $substrings <p>Substrings to look for.</p>
6122
     *
6123
     * @psalm-pure
6124
     *
6125
     * @return bool
6126
     *              <p>Whether or not $str ends with $substring.</p>
6127
     */
6128 4
    public static function str_iends_with_any(string $str, array $substrings): bool
6129
    {
6130 4
        if ($substrings === []) {
6131
            return false;
6132
        }
6133
6134 4
        foreach ($substrings as &$substring) {
6135 4
            if (self::str_iends_with($str, $substring)) {
6136 4
                return true;
6137
            }
6138
        }
6139
6140
        return false;
6141
    }
6142
6143
    /**
6144
     * Inserts $substring into the string at the $index provided.
6145
     *
6146
     * @param string $str       <p>The input string.</p>
6147
     * @param string $substring <p>String to be inserted.</p>
6148
     * @param int    $index     <p>The index at which to insert the substring.</p>
6149
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
6150
     *
6151
     * @psalm-pure
6152
     *
6153
     * @return string
6154
     */
6155 8
    public static function str_insert(
6156
        string $str,
6157
        string $substring,
6158
        int $index,
6159
        string $encoding = 'UTF-8'
6160
    ): string {
6161 8
        if ($encoding === 'UTF-8') {
6162 4
            $len = (int) \mb_strlen($str);
6163 4
            if ($index > $len) {
6164
                return $str;
6165
            }
6166
6167
            /** @noinspection UnnecessaryCastingInspection */
6168 4
            return (string) \mb_substr($str, 0, $index) .
6169
                   $substring .
6170 4
                   (string) \mb_substr($str, $index, $len);
6171
        }
6172
6173 4
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6174
6175 4
        $len = (int) self::strlen($str, $encoding);
6176 4
        if ($index > $len) {
6177 1
            return $str;
6178
        }
6179
6180 3
        return ((string) self::substr($str, 0, $index, $encoding)) .
6181
               $substring .
6182 3
               ((string) self::substr($str, $index, $len, $encoding));
6183
    }
6184
6185
    /**
6186
     * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
6187
     *
6188
     * EXAMPLE: <code>
6189
     * UTF8::str_ireplace('lIzÆ', 'lise', 'Iñtërnâtiônàlizætiøn'); // 'Iñtërnâtiônàlisetiøn'
6190
     * </code>
6191
     *
6192
     * @see http://php.net/manual/en/function.str-ireplace.php
6193
     *
6194
     * @param string|string[] $search      <p>
6195
     *                                     Every replacement with search array is
6196
     *                                     performed on the result of previous replacement.
6197
     *                                     </p>
6198
     * @param string|string[] $replacement <p>The replacement.</p>
6199
     * @param string|string[] $subject     <p>
6200
     *                                     If subject is an array, then the search and
6201
     *                                     replace is performed with every entry of
6202
     *                                     subject, and the return value is an array as
6203
     *                                     well.
6204
     *                                     </p>
6205
     * @param int             $count       [optional] <p>
6206
     *                                     The number of matched and replaced needles will
6207
     *                                     be returned in count which is passed by
6208
     *                                     reference.
6209
     *                                     </p>
6210
     *
6211
     * @psalm-pure
6212
     *
6213
     * @return string|string[]
6214
     *                         <p>A string or an array of replacements.</p>
6215
     *
6216
     * @template TStrIReplaceSubject
6217
     * @phpstan-param TStrIReplaceSubject $subject
6218
     * @phpstan-return TStrIReplaceSubject
6219
     */
6220 29
    public static function str_ireplace($search, $replacement, $subject, &$count = null)
6221
    {
6222 29
        $search = (array) $search;
6223
6224
        /** @noinspection AlterInForeachInspection */
6225 29
        foreach ($search as &$s) {
6226 29
            $s = (string) $s;
6227 29
            if ($s === '') {
6228 6
                $s = '/^(?<=.)$/';
6229
            } else {
6230 24
                $s = '/' . \preg_quote($s, '/') . '/ui';
6231
            }
6232
        }
6233
6234
        // fallback
6235
        /** @phpstan-ignore-next-line - only a fallback for PHP8 */
6236 29
        if ($replacement === null) {
0 ignored issues
show
introduced by
The condition $replacement === null is always false.
Loading history...
6237 1
            $replacement = '';
6238
        }
6239
        /** @phpstan-ignore-next-line - only a fallback for PHP8 */
6240 29
        if ($subject === null) {
6241 1
            $subject = '';
6242
        }
6243
6244
        /**
6245
         * @psalm-suppress PossiblyNullArgument
6246
         * @phpstan-var TStrIReplaceSubject $subject
6247
         */
6248 29
        $subject = \preg_replace($search, $replacement, $subject, -1, $count);
6249
6250 29
        return $subject;
6251
    }
6252
6253
    /**
6254
     * Replaces $search from the beginning of string with $replacement.
6255
     *
6256
     * @param string $str         <p>The input string.</p>
6257
     * @param string $search      <p>The string to search for.</p>
6258
     * @param string $replacement <p>The replacement.</p>
6259
     *
6260
     * @psalm-pure
6261
     *
6262
     * @return string
6263
     *                <p>The string after the replacement.</p>
6264
     */
6265 17
    public static function str_ireplace_beginning(string $str, string $search, string $replacement): string
6266
    {
6267 17
        if ($str === '') {
6268 4
            if ($replacement === '') {
6269 2
                return '';
6270
            }
6271
6272 2
            if ($search === '') {
6273 2
                return $replacement;
6274
            }
6275
        }
6276
6277 13
        if ($search === '') {
6278 2
            return $str . $replacement;
6279
        }
6280
6281 11
        $searchLength = \strlen($search);
6282 11
        if (\strncasecmp($str, $search, $searchLength) === 0) {
6283 10
            return $replacement . \substr($str, $searchLength);
6284
        }
6285
6286 1
        return $str;
6287
    }
6288
6289
    /**
6290
     * Replaces $search from the ending of string with $replacement.
6291
     *
6292
     * @param string $str         <p>The input string.</p>
6293
     * @param string $search      <p>The string to search for.</p>
6294
     * @param string $replacement <p>The replacement.</p>
6295
     *
6296
     * @psalm-pure
6297
     *
6298
     * @return string
6299
     *                <p>The string after the replacement.</p>
6300
     */
6301 17
    public static function str_ireplace_ending(string $str, string $search, string $replacement): string
6302
    {
6303 17
        if ($str === '') {
6304 4
            if ($replacement === '') {
6305 2
                return '';
6306
            }
6307
6308 2
            if ($search === '') {
6309 2
                return $replacement;
6310
            }
6311
        }
6312
6313 13
        if ($search === '') {
6314 2
            return $str . $replacement;
6315
        }
6316
6317 11
        if (\stripos($str, $search, \strlen($str) - \strlen($search)) !== false) {
6318 9
            $str = \substr($str, 0, -\strlen($search)) . $replacement;
6319
        }
6320
6321 11
        return $str;
6322
    }
6323
6324
    /**
6325
     * Check if the string starts with the given substring, case-insensitive.
6326
     *
6327
     * EXAMPLE: <code>
6328
     * UTF8::str_istarts_with('ΚόσμεMiddleEnd', 'Κόσμε'); // true
6329
     * UTF8::str_istarts_with('ΚόσμεMiddleEnd', 'κόσμε'); // true
6330
     * </code>
6331
     *
6332
     * @param string $haystack <p>The string to search in.</p>
6333
     * @param string $needle   <p>The substring to search for.</p>
6334
     *
6335
     * @psalm-pure
6336
     *
6337
     * @return bool
6338
     */
6339 13
    public static function str_istarts_with(string $haystack, string $needle): bool
6340
    {
6341 13
        if ($needle === '') {
6342 2
            return true;
6343
        }
6344
6345 13
        if ($haystack === '') {
6346
            return false;
6347
        }
6348
6349 13
        return self::stripos($haystack, $needle) === 0;
6350
    }
6351
6352
    /**
6353
     * Returns true if the string begins with any of $substrings, false otherwise.
6354
     *
6355
     * - case-insensitive
6356
     *
6357
     * @param string   $str        <p>The input string.</p>
6358
     * @param scalar[] $substrings <p>Substrings to look for.</p>
6359
     *
6360
     * @psalm-pure
6361
     *
6362
     * @return bool
6363
     *              <p>Whether or not $str starts with $substring.</p>
6364
     */
6365 5
    public static function str_istarts_with_any(string $str, array $substrings): bool
6366
    {
6367 5
        if ($str === '') {
6368
            return false;
6369
        }
6370
6371 5
        if ($substrings === []) {
6372
            return false;
6373
        }
6374
6375 5
        foreach ($substrings as &$substring) {
6376 5
            if (self::str_istarts_with($str, (string) $substring)) {
6377 5
                return true;
6378
            }
6379
        }
6380
6381 1
        return false;
6382
    }
6383
6384
    /**
6385
     * Gets the substring after the first occurrence of a separator.
6386
     *
6387
     * @param string $str       <p>The input string.</p>
6388
     * @param string $separator <p>The string separator.</p>
6389
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6390
     *
6391
     * @psalm-pure
6392
     *
6393
     * @return string
6394
     */
6395 1
    public static function str_isubstr_after_first_separator(
6396
        string $str,
6397
        string $separator,
6398
        string $encoding = 'UTF-8'
6399
    ): string {
6400 1
        if ($separator === '' || $str === '') {
6401 1
            return '';
6402
        }
6403
6404 1
        $offset = self::stripos($str, $separator);
6405 1
        if ($offset === false) {
6406 1
            return '';
6407
        }
6408
6409 1
        if ($encoding === 'UTF-8') {
6410 1
            return (string) \mb_substr(
6411
                $str,
6412 1
                $offset + (int) \mb_strlen($separator)
6413
            );
6414
        }
6415
6416
        return (string) self::substr(
6417
            $str,
6418
            $offset + (int) self::strlen($separator, $encoding),
6419
            null,
6420
            $encoding
6421
        );
6422
    }
6423
6424
    /**
6425
     * Gets the substring after the last occurrence of a separator.
6426
     *
6427
     * @param string $str       <p>The input string.</p>
6428
     * @param string $separator <p>The string separator.</p>
6429
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6430
     *
6431
     * @psalm-pure
6432
     *
6433
     * @return string
6434
     */
6435 1
    public static function str_isubstr_after_last_separator(
6436
        string $str,
6437
        string $separator,
6438
        string $encoding = 'UTF-8'
6439
    ): string {
6440 1
        if ($separator === '' || $str === '') {
6441 1
            return '';
6442
        }
6443
6444 1
        $offset = self::strripos($str, $separator);
6445 1
        if ($offset === false) {
6446 1
            return '';
6447
        }
6448
6449 1
        if ($encoding === 'UTF-8') {
6450 1
            return (string) \mb_substr(
6451
                $str,
6452 1
                $offset + (int) self::strlen($separator)
6453
            );
6454
        }
6455
6456
        return (string) self::substr(
6457
            $str,
6458
            $offset + (int) self::strlen($separator, $encoding),
6459
            null,
6460
            $encoding
6461
        );
6462
    }
6463
6464
    /**
6465
     * Gets the substring before the first occurrence of a separator.
6466
     *
6467
     * @param string $str       <p>The input string.</p>
6468
     * @param string $separator <p>The string separator.</p>
6469
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6470
     *
6471
     * @psalm-pure
6472
     *
6473
     * @return string
6474
     */
6475 1
    public static function str_isubstr_before_first_separator(
6476
        string $str,
6477
        string $separator,
6478
        string $encoding = 'UTF-8'
6479
    ): string {
6480 1
        if ($separator === '' || $str === '') {
6481 1
            return '';
6482
        }
6483
6484 1
        $offset = self::stripos($str, $separator);
6485 1
        if ($offset === false) {
6486 1
            return '';
6487
        }
6488
6489 1
        if ($encoding === 'UTF-8') {
6490 1
            return (string) \mb_substr($str, 0, $offset);
6491
        }
6492
6493
        return (string) self::substr($str, 0, $offset, $encoding);
6494
    }
6495
6496
    /**
6497
     * Gets the substring before the last occurrence of a separator.
6498
     *
6499
     * @param string $str       <p>The input string.</p>
6500
     * @param string $separator <p>The string separator.</p>
6501
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6502
     *
6503
     * @psalm-pure
6504
     *
6505
     * @return string
6506
     */
6507 1
    public static function str_isubstr_before_last_separator(
6508
        string $str,
6509
        string $separator,
6510
        string $encoding = 'UTF-8'
6511
    ): string {
6512 1
        if ($separator === '' || $str === '') {
6513 1
            return '';
6514
        }
6515
6516 1
        if ($encoding === 'UTF-8') {
6517 1
            $offset = \mb_strripos($str, $separator);
6518 1
            if ($offset === false) {
6519 1
                return '';
6520
            }
6521
6522 1
            return (string) \mb_substr($str, 0, $offset);
6523
        }
6524
6525
        $offset = self::strripos($str, $separator, 0, $encoding);
6526
        if ($offset === false) {
6527
            return '';
6528
        }
6529
6530
        return (string) self::substr($str, 0, $offset, $encoding);
6531
    }
6532
6533
    /**
6534
     * Gets the substring after (or before via "$before_needle") the first occurrence of the "$needle".
6535
     *
6536
     * @param string $str           <p>The input string.</p>
6537
     * @param string $needle        <p>The string to look for.</p>
6538
     * @param bool   $before_needle [optional] <p>Default: false</p>
6539
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
6540
     *
6541
     * @psalm-pure
6542
     *
6543
     * @return string
6544
     */
6545 2
    public static function str_isubstr_first(
6546
        string $str,
6547
        string $needle,
6548
        bool $before_needle = false,
6549
        string $encoding = 'UTF-8'
6550
    ): string {
6551
        if (
6552 2
            $needle === ''
6553
            ||
6554 2
            $str === ''
6555
        ) {
6556 2
            return '';
6557
        }
6558
6559 2
        $part = self::stristr(
6560
            $str,
6561
            $needle,
6562
            $before_needle,
6563
            $encoding
6564
        );
6565 2
        if ($part === false) {
6566 2
            return '';
6567
        }
6568
6569 2
        return $part;
6570
    }
6571
6572
    /**
6573
     * Gets the substring after (or before via "$before_needle") the last occurrence of the "$needle".
6574
     *
6575
     * @param string $str           <p>The input string.</p>
6576
     * @param string $needle        <p>The string to look for.</p>
6577
     * @param bool   $before_needle [optional] <p>Default: false</p>
6578
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
6579
     *
6580
     * @psalm-pure
6581
     *
6582
     * @return string
6583
     */
6584 1
    public static function str_isubstr_last(
6585
        string $str,
6586
        string $needle,
6587
        bool $before_needle = false,
6588
        string $encoding = 'UTF-8'
6589
    ): string {
6590
        if (
6591 1
            $needle === ''
6592
            ||
6593 1
            $str === ''
6594
        ) {
6595 1
            return '';
6596
        }
6597
6598 1
        $part = self::strrichr(
6599
            $str,
6600
            $needle,
6601
            $before_needle,
6602
            $encoding
6603
        );
6604 1
        if ($part === false) {
6605 1
            return '';
6606
        }
6607
6608 1
        return $part;
6609
    }
6610
6611
    /**
6612
     * Returns the last $n characters of the string.
6613
     *
6614
     * @param string $str      <p>The input string.</p>
6615
     * @param int    $n        <p>Number of characters to retrieve from the end.</p>
6616
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6617
     *
6618
     * @psalm-pure
6619
     *
6620
     * @return string
6621
     */
6622 12
    public static function str_last_char(
6623
        string $str,
6624
        int $n = 1,
6625
        string $encoding = 'UTF-8'
6626
    ): string {
6627 12
        if ($str === '' || $n <= 0) {
6628 4
            return '';
6629
        }
6630
6631 8
        if ($encoding === 'UTF-8') {
6632 4
            return (string) \mb_substr($str, -$n);
6633
        }
6634
6635 4
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6636
6637 4
        return (string) self::substr($str, -$n, null, $encoding);
6638
    }
6639
6640
    /**
6641
     * Limit the number of characters in a string.
6642
     *
6643
     * @param string $str        <p>The input string.</p>
6644
     * @param int    $length     [optional] <p>Default: 100</p>
6645
     * @param string $str_add_on [optional] <p>Default: …</p>
6646
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
6647
     *
6648
     * @psalm-pure
6649
     *
6650
     * @return string
6651
     */
6652 2
    public static function str_limit(
6653
        string $str,
6654
        int $length = 100,
6655
        string $str_add_on = '…',
6656
        string $encoding = 'UTF-8'
6657
    ): string {
6658 2
        if ($str === '' || $length <= 0) {
6659 2
            return '';
6660
        }
6661
6662 2
        if ($encoding === 'UTF-8') {
6663 2
            if ((int) \mb_strlen($str) <= $length) {
6664 2
                return $str;
6665
            }
6666
6667
            /** @noinspection UnnecessaryCastingInspection */
6668 2
            return (string) \mb_substr($str, 0, $length - (int) self::strlen($str_add_on)) . $str_add_on;
6669
        }
6670
6671
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6672
6673
        if ((int) self::strlen($str, $encoding) <= $length) {
6674
            return $str;
6675
        }
6676
6677
        return ((string) self::substr($str, 0, $length - (int) self::strlen($str_add_on), $encoding)) . $str_add_on;
6678
    }
6679
6680
    /**
6681
     * Limit the number of characters in a string, but also after the next word.
6682
     *
6683
     * EXAMPLE: <code>UTF8::str_limit_after_word('fòô bàř fòô', 8, ''); // 'fòô bàř'</code>
6684
     *
6685
     * @param string $str        <p>The input string.</p>
6686
     * @param int    $length     [optional] <p>Default: 100</p>
6687
     * @param string $str_add_on [optional] <p>Default: …</p>
6688
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
6689
     *
6690
     * @psalm-pure
6691
     *
6692
     * @return string
6693
     */
6694 6
    public static function str_limit_after_word(
6695
        string $str,
6696
        int $length = 100,
6697
        string $str_add_on = '…',
6698
        string $encoding = 'UTF-8'
6699
    ): string {
6700 6
        if ($str === '' || $length <= 0) {
6701 2
            return '';
6702
        }
6703
6704 6
        if ($encoding === 'UTF-8') {
6705 2
            if ((int) \mb_strlen($str) <= $length) {
6706 2
                return $str;
6707
            }
6708
6709 2
            if (\mb_substr($str, $length - 1, 1) === ' ') {
6710 2
                return ((string) \mb_substr($str, 0, $length - 1)) . $str_add_on;
6711
            }
6712
6713 2
            $str = \mb_substr($str, 0, $length);
6714
6715 2
            $array = \explode(' ', $str, -1);
6716 2
            $new_str = \implode(' ', $array);
6717
6718 2
            if ($new_str === '') {
6719 2
                return ((string) \mb_substr($str, 0, $length - 1)) . $str_add_on;
6720
            }
6721
        } else {
6722 4
            if ((int) self::strlen($str, $encoding) <= $length) {
6723
                return $str;
6724
            }
6725
6726 4
            if (self::substr($str, $length - 1, 1, $encoding) === ' ') {
6727 3
                return ((string) self::substr($str, 0, $length - 1, $encoding)) . $str_add_on;
6728
            }
6729
6730
            /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
6731 1
            $str = self::substr($str, 0, $length, $encoding);
6732 1
            if ($str === false) {
6733
                return '' . $str_add_on;
6734
            }
6735
6736 1
            $array = \explode(' ', $str, -1);
6737 1
            $new_str = \implode(' ', $array);
6738
6739 1
            if ($new_str === '') {
6740
                return ((string) self::substr($str, 0, $length - 1, $encoding)) . $str_add_on;
6741
            }
6742
        }
6743
6744 3
        return $new_str . $str_add_on;
6745
    }
6746
6747
    /**
6748
     * Returns the longest common prefix between the $str1 and $str2.
6749
     *
6750
     * @param string $str1     <p>The input sting.</p>
6751
     * @param string $str2     <p>Second string for comparison.</p>
6752
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6753
     *
6754
     * @psalm-pure
6755
     *
6756
     * @return string
6757
     */
6758 10
    public static function str_longest_common_prefix(
6759
        string $str1,
6760
        string $str2,
6761
        string $encoding = 'UTF-8'
6762
    ): string {
6763
        // init
6764 10
        $longest_common_prefix = '';
6765
6766 10
        if ($encoding === 'UTF-8') {
6767 5
            $max_length = (int) \min(
6768 5
                \mb_strlen($str1),
6769 5
                \mb_strlen($str2)
6770
            );
6771
6772 5
            for ($i = 0; $i < $max_length; ++$i) {
6773 4
                $char = \mb_substr($str1, $i, 1);
6774
6775
                if (
6776 4
                    $char !== false
6777
                    &&
6778 4
                    $char === \mb_substr($str2, $i, 1)
6779
                ) {
6780 3
                    $longest_common_prefix .= $char;
6781
                } else {
6782 3
                    break;
6783
                }
6784
            }
6785
        } else {
6786 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
6787
6788 5
            $max_length = (int) \min(
6789 5
                self::strlen($str1, $encoding),
6790 5
                self::strlen($str2, $encoding)
6791
            );
6792
6793 5
            for ($i = 0; $i < $max_length; ++$i) {
6794 4
                $char = self::substr($str1, $i, 1, $encoding);
6795
6796
                if (
6797 4
                    $char !== false
6798
                    &&
6799 4
                    $char === self::substr($str2, $i, 1, $encoding)
6800
                ) {
6801 3
                    $longest_common_prefix .= $char;
6802
                } else {
6803 3
                    break;
6804
                }
6805
            }
6806
        }
6807
6808 10
        return $longest_common_prefix;
6809
    }
6810
6811
    /**
6812
     * Returns the longest common substring between the $str1 and $str2.
6813
     * In the case of ties, it returns that which occurs first.
6814
     *
6815
     * @param string $str1
6816
     * @param string $str2     <p>Second string for comparison.</p>
6817
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6818
     *
6819
     * @psalm-pure
6820
     *
6821
     * @return string
6822
     *                <p>A string with its $str being the longest common substring.</p>
6823
     */
6824 11
    public static function str_longest_common_substring(
6825
        string $str1,
6826
        string $str2,
6827
        string $encoding = 'UTF-8'
6828
    ): string {
6829 11
        if ($str1 === '' || $str2 === '') {
6830 2
            return '';
6831
        }
6832
6833
        // Uses dynamic programming to solve
6834
        // http://en.wikipedia.org/wiki/Longest_common_substring_problem
6835
6836 9
        if ($encoding === 'UTF-8') {
6837 4
            $str_length = (int) \mb_strlen($str1);
6838 4
            $other_length = (int) \mb_strlen($str2);
6839
        } else {
6840 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
6841
6842 5
            $str_length = (int) self::strlen($str1, $encoding);
6843 5
            $other_length = (int) self::strlen($str2, $encoding);
6844
        }
6845
6846
        // Return if either string is empty
6847 9
        if ($str_length === 0 || $other_length === 0) {
6848
            return '';
6849
        }
6850
6851 9
        $len = 0;
6852 9
        $end = 0;
6853 9
        $table = \array_fill(
6854
            0,
6855 9
            $str_length + 1,
6856 9
            \array_fill(0, $other_length + 1, 0)
6857
        );
6858
6859 9
        if ($encoding === 'UTF-8') {
6860 9
            for ($i = 1; $i <= $str_length; ++$i) {
6861 9
                for ($j = 1; $j <= $other_length; ++$j) {
6862 9
                    $str_char = \mb_substr($str1, $i - 1, 1);
6863 9
                    $other_char = \mb_substr($str2, $j - 1, 1);
6864
6865 9
                    if ($str_char === $other_char) {
6866 8
                        $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
6867 8
                        if ($table[$i][$j] > $len) {
6868 8
                            $len = $table[$i][$j];
6869 8
                            $end = $i;
6870
                        }
6871
                    } else {
6872 9
                        $table[$i][$j] = 0;
6873
                    }
6874
                }
6875
            }
6876
        } else {
6877
            for ($i = 1; $i <= $str_length; ++$i) {
6878
                for ($j = 1; $j <= $other_length; ++$j) {
6879
                    $str_char = self::substr($str1, $i - 1, 1, $encoding);
6880
                    $other_char = self::substr($str2, $j - 1, 1, $encoding);
6881
6882
                    if ($str_char === $other_char) {
6883
                        $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
6884
                        if ($table[$i][$j] > $len) {
6885
                            $len = $table[$i][$j];
6886
                            $end = $i;
6887
                        }
6888
                    } else {
6889
                        $table[$i][$j] = 0;
6890
                    }
6891
                }
6892
            }
6893
        }
6894
6895 9
        if ($encoding === 'UTF-8') {
6896 9
            return (string) \mb_substr($str1, $end - $len, $len);
6897
        }
6898
6899
        return (string) self::substr($str1, $end - $len, $len, $encoding);
6900
    }
6901
6902
    /**
6903
     * Returns the longest common suffix between the $str1 and $str2.
6904
     *
6905
     * @param string $str1
6906
     * @param string $str2     <p>Second string for comparison.</p>
6907
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6908
     *
6909
     * @psalm-pure
6910
     *
6911
     * @return string
6912
     */
6913 10
    public static function str_longest_common_suffix(
6914
        string $str1,
6915
        string $str2,
6916
        string $encoding = 'UTF-8'
6917
    ): string {
6918 10
        if ($str1 === '' || $str2 === '') {
6919 2
            return '';
6920
        }
6921
6922 8
        if ($encoding === 'UTF-8') {
6923 4
            $max_length = (int) \min(
6924 4
                \mb_strlen($str1, $encoding),
6925 4
                \mb_strlen($str2, $encoding)
6926
            );
6927
6928 4
            $longest_common_suffix = '';
6929 4
            for ($i = 1; $i <= $max_length; ++$i) {
6930 4
                $char = \mb_substr($str1, -$i, 1);
6931
6932
                if (
6933 4
                    $char !== false
6934
                    &&
6935 4
                    $char === \mb_substr($str2, -$i, 1)
6936
                ) {
6937 3
                    $longest_common_suffix = $char . $longest_common_suffix;
6938
                } else {
6939 3
                    break;
6940
                }
6941
            }
6942
        } else {
6943 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
6944
6945 4
            $max_length = (int) \min(
6946 4
                self::strlen($str1, $encoding),
6947 4
                self::strlen($str2, $encoding)
6948
            );
6949
6950 4
            $longest_common_suffix = '';
6951 4
            for ($i = 1; $i <= $max_length; ++$i) {
6952 4
                $char = self::substr($str1, -$i, 1, $encoding);
6953
6954
                if (
6955 4
                    $char !== false
6956
                    &&
6957 4
                    $char === self::substr($str2, -$i, 1, $encoding)
6958
                ) {
6959 3
                    $longest_common_suffix = $char . $longest_common_suffix;
6960
                } else {
6961 3
                    break;
6962
                }
6963
            }
6964
        }
6965
6966 8
        return $longest_common_suffix;
6967
    }
6968
6969
    /**
6970
     * Returns true if $str matches the supplied pattern, false otherwise.
6971
     *
6972
     * @param string $str     <p>The input string.</p>
6973
     * @param string $pattern <p>Regex pattern to match against.</p>
6974
     *
6975
     * @psalm-pure
6976
     *
6977
     * @return bool
6978
     *              <p>Whether or not $str matches the pattern.</p>
6979
     */
6980 10
    public static function str_matches_pattern(string $str, string $pattern): bool
6981
    {
6982 10
        return (bool) \preg_match('/' . $pattern . '/u', $str);
6983
    }
6984
6985
    /**
6986
     * Returns whether or not a character exists at an index. Offsets may be
6987
     * negative to count from the last character in the string. Implements
6988
     * part of the ArrayAccess interface.
6989
     *
6990
     * @param string $str      <p>The input string.</p>
6991
     * @param int    $offset   <p>The index to check.</p>
6992
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6993
     *
6994
     * @psalm-pure
6995
     *
6996
     * @return bool
6997
     *              <p>Whether or not the index exists.</p>
6998
     */
6999 6
    public static function str_offset_exists(string $str, int $offset, string $encoding = 'UTF-8'): bool
7000
    {
7001
        // init
7002 6
        $length = (int) self::strlen($str, $encoding);
7003
7004 6
        if ($offset >= 0) {
7005 3
            return $length > $offset;
7006
        }
7007
7008 3
        return $length >= \abs($offset);
7009
    }
7010
7011
    /**
7012
     * Returns the character at the given index. Offsets may be negative to
7013
     * count from the last character in the string. Implements part of the
7014
     * ArrayAccess interface, and throws an OutOfBoundsException if the index
7015
     * does not exist.
7016
     *
7017
     * @param string $str      <p>The input string.</p>
7018
     * @param int    $index    <p>The <strong>index</strong> from which to retrieve the char.</p>
7019
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7020
     *
7021
     * @throws \OutOfBoundsException if the positive or negative offset does not exist
7022
     *
7023
     * @return string
7024
     *                <p>The character at the specified index.</p>
7025
     *
7026
     * @psalm-pure
7027
     */
7028 2
    public static function str_offset_get(string $str, int $index, string $encoding = 'UTF-8'): string
7029
    {
7030
        // init
7031 2
        $length = (int) self::strlen($str);
7032
7033
        if (
7034 2
            ($index >= 0 && $length <= $index)
7035
            ||
7036 2
            $length < \abs($index)
7037
        ) {
7038 1
            throw new \OutOfBoundsException('No character exists at the index');
7039
        }
7040
7041 1
        return self::char_at($str, $index, $encoding);
7042
    }
7043
7044
    /**
7045
     * Pad a UTF-8 string to a given length with another string.
7046
     *
7047
     * EXAMPLE: <code>UTF8::str_pad('中文空白', 10, '_', STR_PAD_BOTH); // '___中文空白___'</code>
7048
     *
7049
     * @param string     $str        <p>The input string.</p>
7050
     * @param int        $pad_length <p>The length of return string.</p>
7051
     * @param string     $pad_string [optional] <p>String to use for padding the input string.</p>
7052
     * @param int|string $pad_type   [optional] <p>
7053
     *                               Can be <strong>STR_PAD_RIGHT</strong> (default), [or string "right"]<br>
7054
     *                               <strong>STR_PAD_LEFT</strong> [or string "left"] or<br>
7055
     *                               <strong>STR_PAD_BOTH</strong> [or string "both"]
7056
     *                               </p>
7057
     * @param string     $encoding   [optional] <p>Default: 'UTF-8'</p>
7058
     *
7059
     * @psalm-pure
7060
     *
7061
     * @return string
7062
     *                <p>Returns the padded string.</p>
7063
     */
7064 41
    public static function str_pad(
7065
        string $str,
7066
        int $pad_length,
7067
        string $pad_string = ' ',
7068
        $pad_type = \STR_PAD_RIGHT,
7069
        string $encoding = 'UTF-8'
7070
    ): string {
7071 41
        if ($pad_length === 0 || $pad_string === '') {
7072 1
            return $str;
7073
        }
7074
7075 41
        if ($pad_type !== (int) $pad_type) {
7076 13
            if ($pad_type === 'left') {
7077 3
                $pad_type = \STR_PAD_LEFT;
7078 10
            } elseif ($pad_type === 'right') {
7079 6
                $pad_type = \STR_PAD_RIGHT;
7080 4
            } elseif ($pad_type === 'both') {
7081 3
                $pad_type = \STR_PAD_BOTH;
7082
            } else {
7083 1
                throw new \InvalidArgumentException(
7084 1
                    'Pad expects $pad_type to be "STR_PAD_*" or ' . "to be one of 'left', 'right' or 'both'"
7085
                );
7086
            }
7087
        }
7088
7089 40
        if ($encoding === 'UTF-8') {
7090 25
            $str_length = (int) \mb_strlen($str);
7091
7092 25
            if ($pad_length >= $str_length) {
7093 25
                switch ($pad_type) {
7094
                    case \STR_PAD_LEFT:
7095 8
                        $ps_length = (int) \mb_strlen($pad_string);
7096
7097 8
                        $diff = ($pad_length - $str_length);
7098
7099 8
                        $pre = (string) \mb_substr(
7100 8
                            \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
7101
                            0,
7102
                            $diff
7103
                        );
7104 8
                        $post = '';
7105
7106 8
                        break;
7107
7108
                    case \STR_PAD_BOTH:
7109 14
                        $diff = ($pad_length - $str_length);
7110
7111 14
                        $ps_length_left = (int) \floor($diff / 2);
7112
7113 14
                        $ps_length_right = (int) \ceil($diff / 2);
7114
7115 14
                        $pre = (string) \mb_substr(
7116 14
                            \str_repeat($pad_string, $ps_length_left),
7117
                            0,
7118
                            $ps_length_left
7119
                        );
7120 14
                        $post = (string) \mb_substr(
7121 14
                            \str_repeat($pad_string, $ps_length_right),
7122
                            0,
7123
                            $ps_length_right
7124
                        );
7125
7126 14
                        break;
7127
7128
                    case \STR_PAD_RIGHT:
7129
                    default:
7130 9
                        $ps_length = (int) \mb_strlen($pad_string);
7131
7132 9
                        $diff = ($pad_length - $str_length);
7133
7134 9
                        $post = (string) \mb_substr(
7135 9
                            \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
7136
                            0,
7137
                            $diff
7138
                        );
7139 9
                        $pre = '';
7140
                }
7141
7142 25
                return $pre . $str . $post;
7143
            }
7144
7145 3
            return $str;
7146
        }
7147
7148 15
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
7149
7150 15
        $str_length = (int) self::strlen($str, $encoding);
7151
7152 15
        if ($pad_length >= $str_length) {
7153 14
            switch ($pad_type) {
7154
                case \STR_PAD_LEFT:
7155 5
                    $ps_length = (int) self::strlen($pad_string, $encoding);
7156
7157 5
                    $diff = ($pad_length - $str_length);
7158
7159 5
                    $pre = (string) self::substr(
7160 5
                        \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
7161
                        0,
7162
                        $diff,
7163
                        $encoding
7164
                    );
7165 5
                    $post = '';
7166
7167 5
                    break;
7168
7169
                case \STR_PAD_BOTH:
7170 3
                    $diff = ($pad_length - $str_length);
7171
7172 3
                    $ps_length_left = (int) \floor($diff / 2);
7173
7174 3
                    $ps_length_right = (int) \ceil($diff / 2);
7175
7176 3
                    $pre = (string) self::substr(
7177 3
                        \str_repeat($pad_string, $ps_length_left),
7178
                        0,
7179
                        $ps_length_left,
7180
                        $encoding
7181
                    );
7182 3
                    $post = (string) self::substr(
7183 3
                        \str_repeat($pad_string, $ps_length_right),
7184
                        0,
7185
                        $ps_length_right,
7186
                        $encoding
7187
                    );
7188
7189 3
                    break;
7190
7191
                case \STR_PAD_RIGHT:
7192
                default:
7193 6
                    $ps_length = (int) self::strlen($pad_string, $encoding);
7194
7195 6
                    $diff = ($pad_length - $str_length);
7196
7197 6
                    $post = (string) self::substr(
7198 6
                        \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
7199
                        0,
7200
                        $diff,
7201
                        $encoding
7202
                    );
7203 6
                    $pre = '';
7204
            }
7205
7206 14
            return $pre . $str . $post;
7207
        }
7208
7209 1
        return $str;
7210
    }
7211
7212
    /**
7213
     * Returns a new string of a given length such that both sides of the
7214
     * string are padded. Alias for "UTF8::str_pad()" with a $pad_type of 'both'.
7215
     *
7216
     * @param string $str
7217
     * @param int    $length   <p>Desired string length after padding.</p>
7218
     * @param string $pad_str  [optional] <p>String used to pad, defaults to space. Default: ' '</p>
7219
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7220
     *
7221
     * @psalm-pure
7222
     *
7223
     * @return string
7224
     *                <p>The string with padding applied.</p>
7225
     */
7226 11
    public static function str_pad_both(
7227
        string $str,
7228
        int $length,
7229
        string $pad_str = ' ',
7230
        string $encoding = 'UTF-8'
7231
    ): string {
7232 11
        return self::str_pad(
7233
            $str,
7234
            $length,
7235
            $pad_str,
7236
            \STR_PAD_BOTH,
7237
            $encoding
7238
        );
7239
    }
7240
7241
    /**
7242
     * Returns a new string of a given length such that the beginning of the
7243
     * string is padded. Alias for "UTF8::str_pad()" with a $pad_type of 'left'.
7244
     *
7245
     * @param string $str
7246
     * @param int    $length   <p>Desired string length after padding.</p>
7247
     * @param string $pad_str  [optional] <p>String used to pad, defaults to space. Default: ' '</p>
7248
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7249
     *
7250
     * @psalm-pure
7251
     *
7252
     * @return string
7253
     *                <p>The string with left padding.</p>
7254
     */
7255 7
    public static function str_pad_left(
7256
        string $str,
7257
        int $length,
7258
        string $pad_str = ' ',
7259
        string $encoding = 'UTF-8'
7260
    ): string {
7261 7
        return self::str_pad(
7262
            $str,
7263
            $length,
7264
            $pad_str,
7265
            \STR_PAD_LEFT,
7266
            $encoding
7267
        );
7268
    }
7269
7270
    /**
7271
     * Returns a new string of a given length such that the end of the string
7272
     * is padded. Alias for "UTF8::str_pad()" with a $pad_type of 'right'.
7273
     *
7274
     * @param string $str
7275
     * @param int    $length   <p>Desired string length after padding.</p>
7276
     * @param string $pad_str  [optional] <p>String used to pad, defaults to space. Default: ' '</p>
7277
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7278
     *
7279
     * @psalm-pure
7280
     *
7281
     * @return string
7282
     *                <p>The string with right padding.</p>
7283
     */
7284 7
    public static function str_pad_right(
7285
        string $str,
7286
        int $length,
7287
        string $pad_str = ' ',
7288
        string $encoding = 'UTF-8'
7289
    ): string {
7290 7
        return self::str_pad(
7291
            $str,
7292
            $length,
7293
            $pad_str,
7294
            \STR_PAD_RIGHT,
7295
            $encoding
7296
        );
7297
    }
7298
7299
    /**
7300
     * Repeat a string.
7301
     *
7302
     * EXAMPLE: <code>UTF8::str_repeat("°~\xf0\x90\x28\xbc", 2); // '°~ð(¼°~ð(¼'</code>
7303
     *
7304
     * @param string $str        <p>
7305
     *                           The string to be repeated.
7306
     *                           </p>
7307
     * @param int    $multiplier <p>
7308
     *                           Number of time the input string should be
7309
     *                           repeated.
7310
     *                           </p>
7311
     *                           <p>
7312
     *                           multiplier has to be greater than or equal to 0.
7313
     *                           If the multiplier is set to 0, the function
7314
     *                           will return an empty string.
7315
     *                           </p>
7316
     *
7317
     * @psalm-pure
7318
     *
7319
     * @return string
7320
     *                <p>The repeated string.</p>
7321
     */
7322 9
    public static function str_repeat(string $str, int $multiplier): string
7323
    {
7324 9
        $str = self::filter($str);
7325
7326 9
        return \str_repeat($str, $multiplier);
7327
    }
7328
7329
    /**
7330
     * INFO: This is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe.
7331
     *
7332
     * Replace all occurrences of the search string with the replacement string
7333
     *
7334
     * @see http://php.net/manual/en/function.str-replace.php
7335
     *
7336
     * @param string|string[] $search  <p>
7337
     *                                 The value being searched for, otherwise known as the needle.
7338
     *                                 An array may be used to designate multiple needles.
7339
     *                                 </p>
7340
     * @param string|string[] $replace <p>
7341
     *                                 The replacement value that replaces found search
7342
     *                                 values. An array may be used to designate multiple replacements.
7343
     *                                 </p>
7344
     * @param string|string[] $subject <p>
7345
     *                                 The string or array of strings being searched and replaced on,
7346
     *                                 otherwise known as the haystack.
7347
     *                                 </p>
7348
     *                                 <p>
7349
     *                                 If subject is an array, then the search and
7350
     *                                 replace is performed with every entry of
7351
     *                                 subject, and the return value is an array as
7352
     *                                 well.
7353
     *                                 </p>
7354
     * @param int|null        $count   [optional] <p>
7355
     *                                 If passed, this will hold the number of matched and replaced needles.
7356
     *                                 </p>
7357
     *
7358
     * @psalm-pure
7359
     *
7360
     * @return string|string[]
7361
     *                         <p>This function returns a string or an array with the replaced values.</p>
7362
     *
7363
     * @template TStrReplaceSubject
7364
     * @phpstan-param TStrReplaceSubject $subject
7365
     * @phpstan-return TStrReplaceSubject
7366
     *
7367
     * @deprecated please use \str_replace() instead
7368
     */
7369 12
    public static function str_replace(
7370
        $search,
7371
        $replace,
7372
        $subject,
7373
        int &$count = null
7374
    ) {
7375
        /**
7376
         * @psalm-suppress PossiblyNullArgument
7377
         * @phpstan-var TStrReplaceSubject $return;
7378
         */
7379 12
        $return = \str_replace(
7380
            $search,
7381
            $replace,
7382
            $subject,
7383
            $count
7384
        );
7385
7386 12
        return $return;
7387
    }
7388
7389
    /**
7390
     * Replaces $search from the beginning of string with $replacement.
7391
     *
7392
     * @param string $str         <p>The input string.</p>
7393
     * @param string $search      <p>The string to search for.</p>
7394
     * @param string $replacement <p>The replacement.</p>
7395
     *
7396
     * @psalm-pure
7397
     *
7398
     * @return string
7399
     *                <p>A string after the replacements.</p>
7400
     */
7401 17
    public static function str_replace_beginning(
7402
        string $str,
7403
        string $search,
7404
        string $replacement
7405
    ): string {
7406 17
        if ($str === '') {
7407 4
            if ($replacement === '') {
7408 2
                return '';
7409
            }
7410
7411 2
            if ($search === '') {
7412 2
                return $replacement;
7413
            }
7414
        }
7415
7416 13
        if ($search === '') {
7417 2
            return $str . $replacement;
7418
        }
7419
7420 11
        $searchLength = \strlen($search);
7421 11
        if (\strncmp($str, $search, $searchLength) === 0) {
7422 9
            return $replacement . \substr($str, $searchLength);
7423
        }
7424
7425 2
        return $str;
7426
    }
7427
7428
    /**
7429
     * Replaces $search from the ending of string with $replacement.
7430
     *
7431
     * @param string $str         <p>The input string.</p>
7432
     * @param string $search      <p>The string to search for.</p>
7433
     * @param string $replacement <p>The replacement.</p>
7434
     *
7435
     * @psalm-pure
7436
     *
7437
     * @return string
7438
     *                <p>A string after the replacements.</p>
7439
     */
7440 17
    public static function str_replace_ending(
7441
        string $str,
7442
        string $search,
7443
        string $replacement
7444
    ): string {
7445 17
        if ($str === '') {
7446 4
            if ($replacement === '') {
7447 2
                return '';
7448
            }
7449
7450 2
            if ($search === '') {
7451 2
                return $replacement;
7452
            }
7453
        }
7454
7455 13
        if ($search === '') {
7456 2
            return $str . $replacement;
7457
        }
7458
7459 11
        if (\strpos($str, $search, \strlen($str) - \strlen($search)) !== false) {
7460 8
            $str = \substr($str, 0, -\strlen($search)) . $replacement;
7461
        }
7462
7463 11
        return $str;
7464
    }
7465
7466
    /**
7467
     * Replace the first "$search"-term with the "$replace"-term.
7468
     *
7469
     * @param string $search
7470
     * @param string $replace
7471
     * @param string $subject
7472
     *
7473
     * @psalm-pure
7474
     *
7475
     * @return string
7476
     *
7477
     * @psalm-suppress InvalidReturnType
7478
     */
7479 2
    public static function str_replace_first(
7480
        string $search,
7481
        string $replace,
7482
        string $subject
7483
    ): string {
7484 2
        $pos = self::strpos($subject, $search);
7485
7486 2
        if ($pos !== false) {
7487
            /**
7488
             * @psalm-suppress InvalidReturnStatement
7489
             */
7490 2
            return self::substr_replace(
7491
                $subject,
7492
                $replace,
7493
                $pos,
7494 2
                (int) self::strlen($search)
7495
            );
7496
        }
7497
7498
        return $subject;
7499
    }
7500
7501
    /**
7502
     * Replace the last "$search"-term with the "$replace"-term.
7503
     *
7504
     * @param string $search
7505
     * @param string $replace
7506
     * @param string $subject
7507
     *
7508
     * @psalm-pure
7509
     *
7510
     * @return string
7511
     *
7512
     * @psalm-suppress InvalidReturnType
7513
     */
7514 2
    public static function str_replace_last(
7515
        string $search,
7516
        string $replace,
7517
        string $subject
7518
    ): string {
7519 2
        $pos = self::strrpos($subject, $search);
7520 2
        if ($pos !== false) {
7521
            /**
7522
             * @psalm-suppress InvalidReturnStatement
7523
             */
7524 2
            return self::substr_replace(
7525
                $subject,
7526
                $replace,
7527
                $pos,
7528 2
                (int) self::strlen($search)
7529
            );
7530
        }
7531
7532
        return $subject;
7533
    }
7534
7535
    /**
7536
     * Shuffles all the characters in the string.
7537
     *
7538
     * INFO: uses random algorithm which is weak for cryptography purposes
7539
     *
7540
     * EXAMPLE: <code>UTF8::str_shuffle('fòô bàř fòô'); // 'àòôřb ffòô '</code>
7541
     *
7542
     * @param string $str      <p>The input string</p>
7543
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7544
     *
7545
     * @return string
7546
     *                <p>The shuffled string.</p>
7547
     */
7548 5
    public static function str_shuffle(string $str, string $encoding = 'UTF-8'): string
7549
    {
7550 5
        if ($encoding === 'UTF-8') {
7551 5
            $indexes = \range(0, (int) \mb_strlen($str) - 1);
7552 5
            \shuffle($indexes);
7553
7554
            // init
7555 5
            $shuffled_str = '';
7556
7557 5
            foreach ($indexes as &$i) {
7558 5
                $tmp_sub_str = \mb_substr($str, $i, 1);
7559 5
                if ($tmp_sub_str !== false) {
7560 5
                    $shuffled_str .= $tmp_sub_str;
7561
                }
7562
            }
7563
        } else {
7564
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7565
7566
            $indexes = \range(0, (int) self::strlen($str, $encoding) - 1);
7567
            \shuffle($indexes);
7568
7569
            // init
7570
            $shuffled_str = '';
7571
7572
            foreach ($indexes as &$i) {
7573
                $tmp_sub_str = self::substr($str, $i, 1, $encoding);
7574
                if ($tmp_sub_str !== false) {
7575
                    $shuffled_str .= $tmp_sub_str;
7576
                }
7577
            }
7578
        }
7579
7580 5
        return $shuffled_str;
7581
    }
7582
7583
    /**
7584
     * Returns the substring beginning at $start, and up to, but not including
7585
     * the index specified by $end. If $end is omitted, the function extracts
7586
     * the remaining string. If $end is negative, it is computed from the end
7587
     * of the string.
7588
     *
7589
     * @param string   $str
7590
     * @param int      $start    <p>Initial index from which to begin extraction.</p>
7591
     * @param int|null $end      [optional] <p>Index at which to end extraction. Default: null</p>
7592
     * @param string   $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7593
     *
7594
     * @psalm-pure
7595
     *
7596
     * @return false|string
7597
     *                      <p>The extracted substring.</p><p>If <i>str</i> is shorter than <i>start</i>
7598
     *                      characters long, <b>FALSE</b> will be returned.
7599
     */
7600 18
    public static function str_slice(
7601
        string $str,
7602
        int $start,
7603
        int $end = null,
7604
        string $encoding = 'UTF-8'
7605
    ) {
7606 18
        if ($encoding === 'UTF-8') {
7607 7
            if ($end === null) {
7608 1
                $length = (int) \mb_strlen($str);
7609 6
            } elseif ($end >= 0 && $end <= $start) {
7610 2
                return '';
7611 4
            } elseif ($end < 0) {
7612 1
                $length = (int) \mb_strlen($str) + $end - $start;
7613
            } else {
7614 3
                $length = $end - $start;
7615
            }
7616
7617 5
            return \mb_substr($str, $start, $length);
7618
        }
7619
7620 11
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
7621
7622 11
        if ($end === null) {
7623 5
            $length = (int) self::strlen($str, $encoding);
7624 6
        } elseif ($end >= 0 && $end <= $start) {
7625 2
            return '';
7626 4
        } elseif ($end < 0) {
7627 1
            $length = (int) self::strlen($str, $encoding) + $end - $start;
7628
        } else {
7629 3
            $length = $end - $start;
7630
        }
7631
7632 9
        return self::substr($str, $start, $length, $encoding);
7633
    }
7634
7635
    /**
7636
     * Convert a string to e.g.: "snake_case"
7637
     *
7638
     * @param string $str
7639
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7640
     *
7641
     * @psalm-pure
7642
     *
7643
     * @return string
7644
     *                <p>A string in snake_case.</p>
7645
     */
7646 22
    public static function str_snakeize(string $str, string $encoding = 'UTF-8'): string
7647
    {
7648 22
        if ($str === '') {
7649
            return '';
7650
        }
7651
7652 22
        $str = \str_replace(
7653
            '-',
7654
            '_',
7655 22
            self::normalize_whitespace($str)
7656
        );
7657
7658 22
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7659 19
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7660
        }
7661
7662 22
        $str = (string) \preg_replace_callback(
7663
            '/([\\p{N}|\\p{Lu}])/u',
7664
            /**
7665
             * @param string[] $matches
7666
             *
7667
             * @psalm-pure
7668
             *
7669
             * @return string
7670
             */
7671 22
            static function (array $matches) use ($encoding): string {
7672 9
                $match = $matches[1];
7673 9
                $match_int = (int) $match;
7674
7675 9
                if ((string) $match_int === $match) {
7676 4
                    return '_' . $match . '_';
7677
                }
7678
7679 5
                if ($encoding === 'UTF-8') {
7680 5
                    return '_' . \mb_strtolower($match);
7681
                }
7682
7683
                return '_' . self::strtolower($match, $encoding);
7684
            },
7685
            $str
7686
        );
7687
7688 22
        $str = (string) \preg_replace(
7689
            [
7690 22
                '/\\s+/u',           // convert spaces to "_"
7691
                '/^\\s+|\\s+$/u', // trim leading & trailing spaces
7692
                '/_+/',                 // remove double "_"
7693
            ],
7694
            [
7695 22
                '_',
7696
                '',
7697
                '_',
7698
            ],
7699
            $str
7700
        );
7701
7702 22
        return \trim(\trim($str, '_')); // trim leading & trailing "_" + whitespace
7703
    }
7704
7705
    /**
7706
     * Sort all characters according to code points.
7707
     *
7708
     * EXAMPLE: <code>UTF8::str_sort('  -ABC-中文空白-  '); // '    ---ABC中文白空'</code>
7709
     *
7710
     * @param string $str    <p>A UTF-8 string.</p>
7711
     * @param bool   $unique <p>Sort unique. If <strong>true</strong>, repeated characters are ignored.</p>
7712
     * @param bool   $desc   <p>If <strong>true</strong>, will sort characters in reverse code point order.</p>
7713
     *
7714
     * @psalm-pure
7715
     *
7716
     * @return string
7717
     *                <p>A string of sorted characters.</p>
7718
     */
7719 2
    public static function str_sort(string $str, bool $unique = false, bool $desc = false): string
7720
    {
7721
        /** @var int[] $array */
7722 2
        $array = self::codepoints($str);
7723
7724 2
        if ($unique) {
7725 2
            $array = \array_flip(\array_flip($array));
7726
        }
7727
7728 2
        if ($desc) {
7729 2
            \arsort($array);
7730
        } else {
7731 2
            \asort($array);
7732
        }
7733
7734 2
        return self::string($array);
7735
    }
7736
7737
    /**
7738
     * Convert a string to an array of Unicode characters.
7739
     *
7740
     * EXAMPLE: <code>
7741
     * UTF8::str_split_array(['中文空白', 'test'], 2); // [['中文', '空白'], ['te', 'st']]
7742
     * </code>
7743
     *
7744
     * @param int[]|string[] $input                   <p>The string[] or int[] to split into array.</p>
7745
     * @param int            $length                  [optional] <p>Max character length of each array
7746
     *                                                lement.</p>
7747
     * @param bool           $clean_utf8              [optional] <p>Remove non UTF-8 chars from the
7748
     *                                                string.</p>
7749
     * @param bool           $try_to_use_mb_functions [optional] <p>Set to false, if you don't want to use
7750
     *                                                "mb_substr"</p>
7751
     *
7752
     * @psalm-pure
7753
     *
7754
     * @return string[][]
7755
     *                    <p>An array containing chunks of the input.</p>
7756
     */
7757 1
    public static function str_split_array(
7758
        array $input,
7759
        int $length = 1,
7760
        bool $clean_utf8 = false,
7761
        bool $try_to_use_mb_functions = true
7762
    ): array {
7763 1
        foreach ($input as &$v) {
7764 1
            $v = self::str_split(
7765
                $v,
7766
                $length,
7767
                $clean_utf8,
7768
                $try_to_use_mb_functions
7769
            );
7770
        }
7771
7772
        /** @var string[][] $input */
7773 1
        return $input;
7774
    }
7775
7776
    /**
7777
     * Convert a string to an array of unicode characters.
7778
     *
7779
     * EXAMPLE: <code>UTF8::str_split('中文空白'); // array('中', '文', '空', '白')</code>
7780
     *
7781
     * @param int|string $input                   <p>The string or int to split into array.</p>
7782
     * @param int        $length                  [optional] <p>Max character length of each array
7783
     *                                            element.</p>
7784
     * @param bool       $clean_utf8              [optional] <p>Remove non UTF-8 chars from the
7785
     *                                            string.</p>
7786
     * @param bool       $try_to_use_mb_functions [optional] <p>Set to false, if you don't want to use
7787
     *                                            "mb_substr"</p>
7788
     *
7789
     * @psalm-pure
7790
     *
7791
     * @return string[]
7792
     *                  <p>An array containing chunks of chars from the input.</p>
7793
     */
7794 96
    public static function str_split(
7795
        $input,
7796
        int $length = 1,
7797
        bool $clean_utf8 = false,
7798
        bool $try_to_use_mb_functions = true
7799
    ): array {
7800 96
        if ($length <= 0) {
7801 3
            return [];
7802
        }
7803
7804
        // this is only an old fallback
7805
        /** @noinspection PhpSillyAssignmentInspection - hack for phpstan */
7806
        /** @var int|int[]|string|string[] $input */
7807 95
        $input = $input;
7808 95
        if (\is_array($input)) {
7809
            /** @psalm-suppress InvalidReturnStatement */
7810
            /** @phpstan-ignore-next-line - old code :/ */
7811
            return self::str_split_array(
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::str_split_a...ry_to_use_mb_functions) returns the type array<mixed,string[]> which is incompatible with the documented return type string[].
Loading history...
7812
                $input,
7813
                $length,
7814
                $clean_utf8,
7815
                $try_to_use_mb_functions
7816
            );
7817
        }
7818
7819
        // init
7820 95
        $input = (string) $input;
7821
7822 95
        if ($input === '') {
7823 14
            return [];
7824
        }
7825
7826 92
        if ($clean_utf8) {
7827 25
            $input = self::clean($input);
7828
        }
7829
7830
        if (
7831 92
            $try_to_use_mb_functions
7832
            &&
7833 92
            self::$SUPPORT['mbstring'] === true
7834
        ) {
7835 87
            if (\function_exists('mb_str_split')) {
7836
                /**
7837
                 * @psalm-suppress ImpureFunctionCall - why?
7838
                 */
7839 87
                $return = \mb_str_split($input, $length);
7840 87
                if ($return !== false) {
7841 87
                    return $return;
7842
                }
7843
            }
7844
7845
            $i_max = \mb_strlen($input);
7846
            if ($i_max <= 127) {
7847
                $ret = [];
7848
                for ($i = 0; $i < $i_max; ++$i) {
7849
                    $ret[] = \mb_substr($input, $i, 1);
7850
                }
7851
            } else {
7852
                $return_array = [];
7853
                \preg_match_all('/./us', $input, $return_array);
7854
                $ret = $return_array[0] ?? [];
7855
            }
7856 29
        } elseif (self::$SUPPORT['pcre_utf8'] === true) {
7857 22
            $return_array = [];
7858 22
            \preg_match_all('/./us', $input, $return_array);
7859 22
            $ret = $return_array[0] ?? [];
7860
        } else {
7861
7862
            // fallback
7863
7864 9
            $ret = [];
7865 9
            $len = \strlen($input);
7866
7867 9
            for ($i = 0; $i < $len; ++$i) {
7868 9
                if (($input[$i] & "\x80") === "\x00") {
7869 9
                    $ret[] = $input[$i];
7870
                } elseif (
7871 8
                    isset($input[$i + 1])
7872
                    &&
7873 8
                    ($input[$i] & "\xE0") === "\xC0"
7874
                ) {
7875 4
                    if (($input[$i + 1] & "\xC0") === "\x80") {
7876 4
                        $ret[] = $input[$i] . $input[$i + 1];
7877
7878 4
                        ++$i;
7879
                    }
7880
                } elseif (
7881 6
                    isset($input[$i + 2])
7882
                    &&
7883 6
                    ($input[$i] & "\xF0") === "\xE0"
7884
                ) {
7885
                    if (
7886 6
                        ($input[$i + 1] & "\xC0") === "\x80"
7887
                        &&
7888 6
                        ($input[$i + 2] & "\xC0") === "\x80"
7889
                    ) {
7890 6
                        $ret[] = $input[$i] . $input[$i + 1] . $input[$i + 2];
7891
7892 6
                        $i += 2;
7893
                    }
7894
                } elseif (
7895
                    isset($input[$i + 3])
7896
                    &&
7897
                    ($input[$i] & "\xF8") === "\xF0"
7898
                ) {
7899
                    if (
7900
                        ($input[$i + 1] & "\xC0") === "\x80"
7901
                        &&
7902
                        ($input[$i + 2] & "\xC0") === "\x80"
7903
                        &&
7904
                        ($input[$i + 3] & "\xC0") === "\x80"
7905
                    ) {
7906
                        $ret[] = $input[$i] . $input[$i + 1] . $input[$i + 2] . $input[$i + 3];
7907
7908
                        $i += 3;
7909
                    }
7910
                }
7911
            }
7912
        }
7913
7914 29
        if ($length > 1) {
7915 2
            return \array_map(
7916 2
                static function (array $item): string {
7917 2
                    return \implode('', $item);
7918
                },
7919 2
                \array_chunk($ret, $length)
7920
            );
7921
        }
7922
7923 29
        if (isset($ret[0]) && $ret[0] === '') {
7924
            return [];
7925
        }
7926
7927 29
        return $ret;
7928
    }
7929
7930
    /**
7931
     * Splits the string with the provided regular expression, returning an
7932
     * array of strings. An optional integer $limit will truncate the
7933
     * results.
7934
     *
7935
     * @param string $str
7936
     * @param string $pattern <p>The regex with which to split the string.</p>
7937
     * @param int    $limit   [optional] <p>Maximum number of results to return. Default: -1 === no limit</p>
7938
     *
7939
     * @psalm-pure
7940
     *
7941
     * @return string[]
7942
     *                  <p>An array of strings.</p>
7943
     */
7944 16
    public static function str_split_pattern(string $str, string $pattern, int $limit = -1): array
7945
    {
7946 16
        if ($limit === 0) {
7947 2
            return [];
7948
        }
7949
7950 14
        if ($pattern === '') {
7951 1
            return [$str];
7952
        }
7953
7954 13
        if (self::$SUPPORT['mbstring'] === true) {
7955 13
            if ($limit >= 0) {
7956 8
                $result_tmp = \mb_split($pattern, $str);
7957 8
                if ($result_tmp === false) {
7958
                    return [];
7959
                }
7960
7961 8
                $result = [];
7962 8
                foreach ($result_tmp as $item_tmp) {
7963 8
                    if ($limit === 0) {
7964 4
                        break;
7965
                    }
7966 8
                    --$limit;
7967
7968 8
                    $result[] = $item_tmp;
7969
                }
7970
7971 8
                return $result;
7972
            }
7973
7974 5
            $result = \mb_split($pattern, $str);
7975 5
            if ($result === false) {
7976
                return [];
7977
            }
7978
7979 5
            return $result;
7980
        }
7981
7982
        if ($limit > 0) {
7983
            ++$limit;
7984
        } else {
7985
            $limit = -1;
7986
        }
7987
7988
        $array = \preg_split('/' . \preg_quote($pattern, '/') . '/u', $str, $limit);
7989
        if ($array === false) {
7990
            return [];
7991
        }
7992
7993
        if ($limit > 0 && \count($array) === $limit) {
7994
            \array_pop($array);
7995
        }
7996
7997
        return $array;
7998
    }
7999
8000
    /**
8001
     * Check if the string starts with the given substring.
8002
     *
8003
     * EXAMPLE: <code>
8004
     * UTF8::str_starts_with('ΚόσμεMiddleEnd', 'Κόσμε'); // true
8005
     * UTF8::str_starts_with('ΚόσμεMiddleEnd', 'κόσμε'); // false
8006
     * </code>
8007
     *
8008
     * @param string $haystack <p>The string to search in.</p>
8009
     * @param string $needle   <p>The substring to search for.</p>
8010
     *
8011
     * @psalm-pure
8012
     *
8013
     * @return bool
8014
     */
8015 19
    public static function str_starts_with(string $haystack, string $needle): bool
8016
    {
8017 19
        if ($needle === '') {
8018 2
            return true;
8019
        }
8020
8021 19
        if ($haystack === '') {
8022 1
            return false;
8023
        }
8024
8025 19
        if (\PHP_VERSION_ID >= 80000) {
8026
            /** @phpstan-ignore-next-line - only for PHP8 */
8027 19
            return \str_starts_with($haystack, $needle);
8028
        }
8029
8030
        return \strncmp($haystack, $needle, \strlen($needle)) === 0;
8031
    }
8032
8033
    /**
8034
     * Returns true if the string begins with any of $substrings, false otherwise.
8035
     *
8036
     * - case-sensitive
8037
     *
8038
     * @param string   $str        <p>The input string.</p>
8039
     * @param scalar[] $substrings <p>Substrings to look for.</p>
8040
     *
8041
     * @psalm-pure
8042
     *
8043
     * @return bool
8044
     *              <p>Whether or not $str starts with $substring.</p>
8045
     */
8046 8
    public static function str_starts_with_any(string $str, array $substrings): bool
8047
    {
8048 8
        if ($str === '') {
8049
            return false;
8050
        }
8051
8052 8
        if ($substrings === []) {
8053
            return false;
8054
        }
8055
8056 8
        foreach ($substrings as &$substring) {
8057 8
            if (self::str_starts_with($str, (string) $substring)) {
8058 2
                return true;
8059
            }
8060
        }
8061
8062 6
        return false;
8063
    }
8064
8065
    /**
8066
     * Gets the substring after the first occurrence of a separator.
8067
     *
8068
     * @param string $str       <p>The input string.</p>
8069
     * @param string $separator <p>The string separator.</p>
8070
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
8071
     *
8072
     * @psalm-pure
8073
     *
8074
     * @return string
8075
     */
8076 1
    public static function str_substr_after_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
8077
    {
8078 1
        if ($separator === '' || $str === '') {
8079 1
            return '';
8080
        }
8081
8082 1
        if ($encoding === 'UTF-8') {
8083 1
            $offset = \mb_strpos($str, $separator);
8084 1
            if ($offset === false) {
8085 1
                return '';
8086
            }
8087
8088 1
            return (string) \mb_substr(
8089
                $str,
8090 1
                $offset + (int) \mb_strlen($separator)
8091
            );
8092
        }
8093
8094
        $offset = self::strpos($str, $separator, 0, $encoding);
8095
        if ($offset === false) {
8096
            return '';
8097
        }
8098
8099
        return (string) \mb_substr(
8100
            $str,
8101
            $offset + (int) self::strlen($separator, $encoding),
8102
            null,
8103
            $encoding
8104
        );
8105
    }
8106
8107
    /**
8108
     * Gets the substring after the last occurrence of a separator.
8109
     *
8110
     * @param string $str       <p>The input string.</p>
8111
     * @param string $separator <p>The string separator.</p>
8112
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
8113
     *
8114
     * @psalm-pure
8115
     *
8116
     * @return string
8117
     */
8118 1
    public static function str_substr_after_last_separator(
8119
        string $str,
8120
        string $separator,
8121
        string $encoding = 'UTF-8'
8122
    ): string {
8123 1
        if ($separator === '' || $str === '') {
8124 1
            return '';
8125
        }
8126
8127 1
        if ($encoding === 'UTF-8') {
8128 1
            $offset = \mb_strrpos($str, $separator);
8129 1
            if ($offset === false) {
8130 1
                return '';
8131
            }
8132
8133 1
            return (string) \mb_substr(
8134
                $str,
8135 1
                $offset + (int) \mb_strlen($separator)
8136
            );
8137
        }
8138
8139
        $offset = self::strrpos($str, $separator, 0, $encoding);
8140
        if ($offset === false) {
8141
            return '';
8142
        }
8143
8144
        return (string) self::substr(
8145
            $str,
8146
            $offset + (int) self::strlen($separator, $encoding),
8147
            null,
8148
            $encoding
8149
        );
8150
    }
8151
8152
    /**
8153
     * Gets the substring before the first occurrence of a separator.
8154
     *
8155
     * @param string $str       <p>The input string.</p>
8156
     * @param string $separator <p>The string separator.</p>
8157
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
8158
     *
8159
     * @psalm-pure
8160
     *
8161
     * @return string
8162
     */
8163 1
    public static function str_substr_before_first_separator(
8164
        string $str,
8165
        string $separator,
8166
        string $encoding = 'UTF-8'
8167
    ): string {
8168 1
        if ($separator === '' || $str === '') {
8169 1
            return '';
8170
        }
8171
8172 1
        if ($encoding === 'UTF-8') {
8173 1
            $offset = \mb_strpos($str, $separator);
8174 1
            if ($offset === false) {
8175 1
                return '';
8176
            }
8177
8178 1
            return (string) \mb_substr(
8179
                $str,
8180
                0,
8181
                $offset
8182
            );
8183
        }
8184
8185
        $offset = self::strpos($str, $separator, 0, $encoding);
8186
        if ($offset === false) {
8187
            return '';
8188
        }
8189
8190
        return (string) self::substr(
8191
            $str,
8192
            0,
8193
            $offset,
8194
            $encoding
8195
        );
8196
    }
8197
8198
    /**
8199
     * Gets the substring before the last occurrence of a separator.
8200
     *
8201
     * @param string $str       <p>The input string.</p>
8202
     * @param string $separator <p>The string separator.</p>
8203
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
8204
     *
8205
     * @psalm-pure
8206
     *
8207
     * @return string
8208
     */
8209 1
    public static function str_substr_before_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
8210
    {
8211 1
        if ($separator === '' || $str === '') {
8212 1
            return '';
8213
        }
8214
8215 1
        if ($encoding === 'UTF-8') {
8216 1
            $offset = \mb_strrpos($str, $separator);
8217 1
            if ($offset === false) {
8218 1
                return '';
8219
            }
8220
8221 1
            return (string) \mb_substr(
8222
                $str,
8223
                0,
8224
                $offset
8225
            );
8226
        }
8227
8228
        $offset = self::strrpos($str, $separator, 0, $encoding);
8229
        if ($offset === false) {
8230
            return '';
8231
        }
8232
8233
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
8234
8235
        return (string) self::substr(
8236
            $str,
8237
            0,
8238
            $offset,
8239
            $encoding
8240
        );
8241
    }
8242
8243
    /**
8244
     * Gets the substring after (or before via "$before_needle") the first occurrence of the "$needle".
8245
     *
8246
     * @param string $str           <p>The input string.</p>
8247
     * @param string $needle        <p>The string to look for.</p>
8248
     * @param bool   $before_needle [optional] <p>Default: false</p>
8249
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
8250
     *
8251
     * @psalm-pure
8252
     *
8253
     * @return string
8254
     */
8255 2
    public static function str_substr_first(
8256
        string $str,
8257
        string $needle,
8258
        bool $before_needle = false,
8259
        string $encoding = 'UTF-8'
8260
    ): string {
8261 2
        if ($str === '' || $needle === '') {
8262 2
            return '';
8263
        }
8264
8265 2
        if ($encoding === 'UTF-8') {
8266 2
            if ($before_needle) {
8267 1
                $part = \mb_strstr(
8268
                    $str,
8269
                    $needle,
8270
                    $before_needle
8271
                );
8272
            } else {
8273 2
                $part = \mb_strstr(
8274
                    $str,
8275
                    $needle
8276
                );
8277
            }
8278
        } else {
8279
            $part = self::strstr(
8280
                $str,
8281
                $needle,
8282
                $before_needle,
8283
                $encoding
8284
            );
8285
        }
8286
8287 2
        return $part === false ? '' : $part;
8288
    }
8289
8290
    /**
8291
     * Gets the substring after (or before via "$before_needle") the last occurrence of the "$needle".
8292
     *
8293
     * @param string $str           <p>The input string.</p>
8294
     * @param string $needle        <p>The string to look for.</p>
8295
     * @param bool   $before_needle [optional] <p>Default: false</p>
8296
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
8297
     *
8298
     * @psalm-pure
8299
     *
8300
     * @return string
8301
     */
8302 2
    public static function str_substr_last(
8303
        string $str,
8304
        string $needle,
8305
        bool $before_needle = false,
8306
        string $encoding = 'UTF-8'
8307
    ): string {
8308 2
        if ($str === '' || $needle === '') {
8309 2
            return '';
8310
        }
8311
8312 2
        if ($encoding === 'UTF-8') {
8313 2
            if ($before_needle) {
8314 1
                $part = \mb_strrchr(
8315
                    $str,
8316
                    $needle,
8317
                    $before_needle
8318
                );
8319
            } else {
8320 2
                $part = \mb_strrchr(
8321
                    $str,
8322
                    $needle
8323
                );
8324
            }
8325
        } else {
8326
            $part = self::strrchr(
8327
                $str,
8328
                $needle,
8329
                $before_needle,
8330
                $encoding
8331
            );
8332
        }
8333
8334 2
        return $part === false ? '' : $part;
8335
    }
8336
8337
    /**
8338
     * Surrounds $str with the given substring.
8339
     *
8340
     * @param string $str
8341
     * @param string $substring <p>The substring to add to both sides.</p>
8342
     *
8343
     * @psalm-pure
8344
     *
8345
     * @return string
8346
     *                <p>A string with the substring both prepended and appended.</p>
8347
     */
8348 5
    public static function str_surround(string $str, string $substring): string
8349
    {
8350 5
        return $substring . $str . $substring;
8351
    }
8352
8353
    /**
8354
     * Returns a trimmed string with the first letter of each word capitalized.
8355
     * Also accepts an array, $ignore, allowing you to list words not to be
8356
     * capitalized.
8357
     *
8358
     * @param string        $str
8359
     * @param string[]|null $ignore                        [optional] <p>An array of words not to capitalize or
8360
     *                                                     null. Default: null</p>
8361
     * @param string        $encoding                      [optional] <p>Default: 'UTF-8'</p>
8362
     * @param bool          $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the
8363
     *                                                     string.</p>
8364
     * @param string|null   $lang                          [optional] <p>Set the language for special cases: az,
8365
     *                                                     el, lt, tr</p>
8366
     * @param bool          $try_to_keep_the_string_length [optional] <p>true === try to keep the string length:
8367
     *                                                     e.g. ẞ -> ß</p>
8368
     * @param bool          $use_trim_first                [optional] <p>true === trim the input string,
8369
     *                                                     first</p>
8370
     * @param string|null   $word_define_chars             [optional] <p>An string of chars that will be used as
8371
     *                                                     whitespace separator === words.</p>
8372
     *
8373
     * @psalm-pure
8374
     *
8375
     * @return string
8376
     *                <p>The titleized string.</p>
8377
     */
8378 10
    public static function str_titleize(
8379
        string $str,
8380
        array $ignore = null,
8381
        string $encoding = 'UTF-8',
8382
        bool $clean_utf8 = false,
8383
        string $lang = null,
8384
        bool $try_to_keep_the_string_length = false,
8385
        bool $use_trim_first = true,
8386
        string $word_define_chars = null
8387
    ): string {
8388 10
        if ($str === '') {
8389
            return '';
8390
        }
8391
8392 10
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8393 9
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8394
        }
8395
8396 10
        if ($use_trim_first) {
8397 10
            $str = \trim($str);
8398
        }
8399
8400 10
        if ($clean_utf8) {
8401
            $str = self::clean($str);
8402
        }
8403
8404 10
        $use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
8405
8406 10
        if ($word_define_chars) {
8407 4
            $word_define_chars = \preg_quote($word_define_chars, '/');
8408
        } else {
8409 6
            $word_define_chars = '';
8410
        }
8411
8412 10
        $str = (string) \preg_replace_callback(
8413 10
            '/([^\\s' . $word_define_chars . ']+)/u',
8414 10
            static function (array $match) use ($try_to_keep_the_string_length, $lang, $ignore, $use_mb_functions, $encoding): string {
8415 10
                if ($ignore !== null && \in_array($match[0], $ignore, true)) {
8416 4
                    return $match[0];
8417
                }
8418
8419 10
                if ($use_mb_functions) {
8420 10
                    if ($encoding === 'UTF-8') {
8421 10
                        return \mb_strtoupper(\mb_substr($match[0], 0, 1))
8422 10
                               . \mb_strtolower(\mb_substr($match[0], 1));
8423
                    }
8424
8425
                    return \mb_strtoupper(\mb_substr($match[0], 0, 1, $encoding), $encoding)
8426
                           . \mb_strtolower(\mb_substr($match[0], 1, null, $encoding), $encoding);
8427
                }
8428
8429
                return self::ucfirst(
8430
                    self::strtolower(
8431
                        $match[0],
8432
                        $encoding,
8433
                        false,
8434
                        $lang,
8435
                        $try_to_keep_the_string_length
8436
                    ),
8437
                    $encoding,
8438
                    false,
8439
                    $lang,
8440
                    $try_to_keep_the_string_length
8441
                );
8442
            },
8443
            $str
8444
        );
8445
8446 10
        return $str;
8447
    }
8448
8449
    /**
8450
     * Convert a string into a obfuscate string.
8451
     *
8452
     * EXAMPLE: <code>
8453
     *
8454
     * UTF8::str_obfuscate('[email protected]', 0.5, '*', ['@', '.']); // e.g. "l***@m**lleke*.*r*"
8455
     * </code>
8456
     *
8457
     * @param string   $str
8458
     * @param float    $percent
8459
     * @param string   $obfuscateChar
8460
     * @param string[] $keepChars
8461
     *
8462
     * @psalm-pure
8463
     *
8464
     * @return string
8465
     *                <p>The obfuscate string.</p>
8466
     */
8467 1
    public static function str_obfuscate(
8468
        string $str,
8469
        float $percent = 0.5,
8470
        string $obfuscateChar = '*',
8471
        array $keepChars = []
8472
    ): string {
8473 1
        $obfuscateCharHelper = "\u{2603}";
8474 1
        $str = \str_replace($obfuscateChar, $obfuscateCharHelper, $str);
8475
8476 1
        $chars = self::chars($str);
8477 1
        $charsMax = \count($chars);
8478 1
        $charsMaxChange = \round($charsMax * $percent);
8479 1
        $charsCounter = 0;
8480 1
        $charKeyDone = [];
8481
8482 1
        while ($charsCounter < $charsMaxChange) {
8483 1
            foreach ($chars as $charKey => $char) {
8484 1
                if (isset($charKeyDone[$charKey])) {
8485 1
                    continue;
8486
                }
8487
8488 1
                if (\random_int(0, 100) > 50) {
8489 1
                    continue;
8490
                }
8491
8492 1
                if ($char === $obfuscateChar) {
8493
                    continue;
8494
                }
8495
8496 1
                ++$charsCounter;
8497 1
                $charKeyDone[$charKey] = true;
8498
8499 1
                if ($charsCounter > $charsMaxChange) {
8500
                    break;
8501
                }
8502
8503 1
                if (\in_array($char, $keepChars, true)) {
8504 1
                    continue;
8505
                }
8506
8507 1
                $chars[$charKey] = $obfuscateChar;
8508
            }
8509
        }
8510
8511 1
        $str = \implode('', $chars);
8512
8513 1
        return \str_replace($obfuscateCharHelper, $obfuscateChar, $str);
8514
    }
8515
8516
    /**
8517
     * Returns a trimmed string in proper title case.
8518
     *
8519
     * Also accepts an array, $ignore, allowing you to list words not to be
8520
     * capitalized.
8521
     *
8522
     * Adapted from John Gruber's script.
8523
     *
8524
     * @see https://gist.github.com/gruber/9f9e8650d68b13ce4d78
8525
     *
8526
     * @param string   $str
8527
     * @param string[] $ignore   <p>An array of words not to capitalize.</p>
8528
     * @param string   $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8529
     *
8530
     * @psalm-pure
8531
     *
8532
     * @return string
8533
     *                <p>The titleized string.</p>
8534
     */
8535 35
    public static function str_titleize_for_humans(
8536
        string $str,
8537
        array $ignore = [],
8538
        string $encoding = 'UTF-8'
8539
    ): string {
8540 35
        if ($str === '') {
8541
            return '';
8542
        }
8543
8544 35
        $small_words = [
8545
            '(?<!q&)a',
8546
            'an',
8547
            'and',
8548
            'as',
8549
            'at(?!&t)',
8550
            'but',
8551
            'by',
8552
            'en',
8553
            'for',
8554
            'if',
8555
            'in',
8556
            'of',
8557
            'on',
8558
            'or',
8559
            'the',
8560
            'to',
8561
            'v[.]?',
8562
            'via',
8563
            'vs[.]?',
8564
        ];
8565
8566 35
        if ($ignore !== []) {
8567 1
            $small_words = \array_merge($small_words, $ignore);
8568
        }
8569
8570 35
        $small_words_rx = \implode('|', $small_words);
8571 35
        $apostrophe_rx = '(?x: [\'’] [[:lower:]]* )?';
8572
8573 35
        $str = \trim($str);
8574
8575 35
        if (!self::has_lowercase($str)) {
8576 2
            $str = self::strtolower($str, $encoding);
8577
        }
8578
8579
        // the main substitutions
8580 35
        $str = (string) \preg_replace_callback(
8581
            '~\\b (_*) (?:                                                           # 1. Leading underscore and
8582
                        ( (?<=[ ][/\\\\]) [[:alpha:]]+ [-_[:alpha:]/\\\\]+ |                # 2. file path or 
8583
                          [-_[:alpha:]]+ [@.:] [-_[:alpha:]@.:/]+ ' . $apostrophe_rx . ' )  #    URL, domain, or email
8584
                        |
8585
                        ( (?i: ' . $small_words_rx . ' ) ' . $apostrophe_rx . ' )           # 3. or small word (case-insensitive)
8586
                        |
8587
                        ( [[:alpha:]] [[:lower:]\'’()\[\]{}]* ' . $apostrophe_rx . ' )     # 4. or word w/o internal caps
8588
                        |
8589
                        ( [[:alpha:]] [[:alpha:]\'’()\[\]{}]* ' . $apostrophe_rx . ' )     # 5. or some other word
8590
                      ) (_*) \\b                                                          # 6. With trailing underscore
8591
                    ~ux',
8592
            /**
8593
             * @param string[] $matches
8594
             *
8595
             * @psalm-pure
8596
             *
8597
             * @return string
8598
             */
8599 35
            static function (array $matches) use ($encoding): string {
8600
                // preserve leading underscore
8601 35
                $str = $matches[1];
8602 35
                if ($matches[2]) {
8603
                    // preserve URLs, domains, emails and file paths
8604 5
                    $str .= $matches[2];
8605 35
                } elseif ($matches[3]) {
8606
                    // lower-case small words
8607 25
                    $str .= self::strtolower($matches[3], $encoding);
8608 35
                } elseif ($matches[4]) {
8609
                    // capitalize word w/o internal caps
8610 34
                    $str .= static::ucfirst($matches[4], $encoding);
8611
                } else {
8612
                    // preserve other kinds of word (iPhone)
8613 7
                    $str .= $matches[5];
8614
                }
8615
                // preserve trailing underscore
8616 35
                $str .= $matches[6];
8617
8618 35
                return $str;
8619
            },
8620
            $str
8621
        );
8622
8623
        // Exceptions for small words: capitalize at start of title...
8624 35
        $str = (string) \preg_replace_callback(
8625
            '~(  \\A [[:punct:]]*            # start of title...
8626
                      |  [:.;?!][ ]+                # or of subsentence...
8627
                      |  [ ][\'"“‘(\[][ ]* )        # or of inserted subphrase...
8628
                      ( ' . $small_words_rx . ' ) \\b # ...followed by small word
8629
                     ~uxi',
8630
            /**
8631
             * @param string[] $matches
8632
             *
8633
             * @psalm-pure
8634
             *
8635
             * @return string
8636
             */
8637 35
            static function (array $matches) use ($encoding): string {
8638 11
                return $matches[1] . static::ucfirst($matches[2], $encoding);
8639
            },
8640
            $str
8641
        );
8642
8643
        // ...and end of title
8644 35
        $str = (string) \preg_replace_callback(
8645 35
            '~\\b ( ' . $small_words_rx . ' ) # small word...
8646
                      (?= [[:punct:]]* \Z          # ...at the end of the title...
8647
                      |   [\'"’”)\]] [ ] )         # ...or of an inserted subphrase?
8648
                     ~uxi',
8649
            /**
8650
             * @param string[] $matches
8651
             *
8652
             * @psalm-pure
8653
             *
8654
             * @return string
8655
             */
8656 35
            static function (array $matches) use ($encoding): string {
8657 3
                return static::ucfirst($matches[1], $encoding);
8658
            },
8659
            $str
8660
        );
8661
8662
        // Exceptions for small words in hyphenated compound words.
8663
        // e.g. "in-flight" -> In-Flight
8664 35
        $str = (string) \preg_replace_callback(
8665
            '~\\b
8666
                        (?<! -)                   # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (in-flight)
8667
                        ( ' . $small_words_rx . ' )
8668
                        (?= -[[:alpha:]]+)        # lookahead for "-someword"
8669
                       ~uxi',
8670
            /**
8671
             * @param string[] $matches
8672
             *
8673
             * @psalm-pure
8674
             *
8675
             * @return string
8676
             */
8677 35
            static function (array $matches) use ($encoding): string {
8678
                return static::ucfirst($matches[1], $encoding);
8679
            },
8680
            $str
8681
        );
8682
8683
        // e.g. "Stand-in" -> "Stand-In" (Stand is already capped at this point)
8684 35
        $str = (string) \preg_replace_callback(
8685
            '~\\b
8686
                      (?<!…)                    # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (stand-in)
8687
                      ( [[:alpha:]]+- )         # $1 = first word and hyphen, should already be properly capped
8688
                      ( ' . $small_words_rx . ' ) # ...followed by small word
8689
                      (?!	- )                 # Negative lookahead for another -
8690
                     ~uxi',
8691
            /**
8692
             * @param string[] $matches
8693
             *
8694
             * @psalm-pure
8695
             *
8696
             * @return string
8697
             */
8698 35
            static function (array $matches) use ($encoding): string {
8699
                return $matches[1] . static::ucfirst($matches[2], $encoding);
8700
            },
8701
            $str
8702
        );
8703
8704 35
        return $str;
8705
    }
8706
8707
    /**
8708
     * Get a binary representation of a specific string.
8709
     *
8710
     * EXAPLE: <code>UTF8::str_to_binary('😃'); // '11110000100111111001100010000011'</code>
8711
     *
8712
     * @param string $str <p>The input string.</p>
8713
     *
8714
     * @psalm-pure
8715
     *
8716
     * @return false|string
8717
     *                      <p>false on error</p>
8718
     */
8719 2
    public static function str_to_binary(string $str)
8720
    {
8721
        /** @var array|false $value - needed for PhpStan (stubs error) */
8722 2
        $value = \unpack('H*', $str);
8723 2
        if ($value === false) {
8724
            return false;
8725
        }
8726
8727
        /** @noinspection OffsetOperationsInspection */
8728 2
        return \base_convert($value[1], 16, 2);
8729
    }
8730
8731
    /**
8732
     * @param string   $str
8733
     * @param bool     $remove_empty_values <p>Remove empty values.</p>
8734
     * @param int|null $remove_short_values <p>The min. string length or null to disable</p>
8735
     *
8736
     * @psalm-pure
8737
     *
8738
     * @return string[]
8739
     */
8740 17
    public static function str_to_lines(string $str, bool $remove_empty_values = false, int $remove_short_values = null): array
8741
    {
8742 17
        if ($str === '') {
8743 1
            return $remove_empty_values ? [] : [''];
8744
        }
8745
8746 16
        if (self::$SUPPORT['mbstring'] === true) {
8747 16
            $return = \mb_split("[\r\n]{1,2}", $str);
8748
        } else {
8749
            $return = \preg_split("/[\r\n]{1,2}/u", $str);
8750
        }
8751
8752 16
        if ($return === false) {
8753
            return $remove_empty_values ? [] : [''];
8754
        }
8755
8756
        if (
8757 16
            $remove_short_values === null
8758
            &&
8759
            !$remove_empty_values
8760
        ) {
8761 16
            return $return;
8762
        }
8763
8764
        return self::reduce_string_array(
8765
            $return,
8766
            $remove_empty_values,
8767
            $remove_short_values
8768
        );
8769
    }
8770
8771
    /**
8772
     * Convert a string into an array of words.
8773
     *
8774
     * EXAMPLE: <code>UTF8::str_to_words('中文空白 oöäü#s', '#') // array('', '中文空白', ' ', 'oöäü#s', '')</code>
8775
     *
8776
     * @param string   $str
8777
     * @param string   $char_list           <p>Additional chars for the definition of "words".</p>
8778
     * @param bool     $remove_empty_values <p>Remove empty values.</p>
8779
     * @param int|null $remove_short_values <p>The min. string length or null to disable</p>
8780
     *
8781
     * @psalm-pure
8782
     *
8783
     * @return string[]
8784
     */
8785 16
    public static function str_to_words(
8786
        string $str,
8787
        string $char_list = '',
8788
        bool $remove_empty_values = false,
8789
        int $remove_short_values = null
8790
    ): array {
8791 16
        if ($str === '') {
8792 4
            return $remove_empty_values ? [] : [''];
8793
        }
8794
8795 16
        $char_list = self::rxClass($char_list, '\pL');
8796
8797 16
        $return = \preg_split("/({$char_list}+(?:[\p{Pd}’']{$char_list}+)*)/u", $str, -1, \PREG_SPLIT_DELIM_CAPTURE);
8798 16
        if ($return === false) {
8799
            return $remove_empty_values ? [] : [''];
8800
        }
8801
8802
        if (
8803 16
            $remove_short_values === null
8804
            &&
8805
            !$remove_empty_values
8806
        ) {
8807 16
            return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
8808
        }
8809
8810 2
        $tmp_return = self::reduce_string_array(
8811
            $return,
8812
            $remove_empty_values,
8813
            $remove_short_values
8814
        );
8815
8816 2
        foreach ($tmp_return as &$item) {
8817 2
            $item = (string) $item;
8818
        }
8819
8820 2
        return $tmp_return;
8821
    }
8822
8823
    /**
8824
     * Truncates the string to a given length. If $substring is provided, and
8825
     * truncating occurs, the string is further truncated so that the substring
8826
     * may be appended without exceeding the desired length.
8827
     *
8828
     * @param string $str
8829
     * @param int    $length    <p>Desired length of the truncated string.</p>
8830
     * @param string $substring [optional] <p>The substring to append if it can fit. Default: ''</p>
8831
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
8832
     *
8833
     * @psalm-pure
8834
     *
8835
     * @return string
8836
     *                <p>A string after truncating.</p>
8837
     */
8838 22
    public static function str_truncate(
8839
        string $str,
8840
        int $length,
8841
        string $substring = '',
8842
        string $encoding = 'UTF-8'
8843
    ): string {
8844 22
        if ($str === '') {
8845
            return '';
8846
        }
8847
8848 22
        if ($encoding === 'UTF-8') {
8849 10
            if ($length >= (int) \mb_strlen($str)) {
8850 2
                return $str;
8851
            }
8852
8853 8
            if ($substring !== '') {
8854 4
                $length -= (int) \mb_strlen($substring);
8855
8856
                /** @noinspection UnnecessaryCastingInspection */
8857 4
                return (string) \mb_substr($str, 0, $length) . $substring;
8858
            }
8859
8860 4
            return (string) \mb_substr($str, 0, $length);
8861
        }
8862
8863 12
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
8864
8865 12
        if ($length >= (int) self::strlen($str, $encoding)) {
8866 2
            return $str;
8867
        }
8868
8869 10
        if ($substring !== '') {
8870 6
            $length -= (int) self::strlen($substring, $encoding);
8871
        }
8872
8873
        return (
8874 10
               (string) self::substr(
8875
                   $str,
8876
                   0,
8877
                   $length,
8878
                   $encoding
8879
               )
8880
               ) . $substring;
8881
    }
8882
8883
    /**
8884
     * Truncates the string to a given length, while ensuring that it does not
8885
     * split words. If $substring is provided, and truncating occurs, the
8886
     * string is further truncated so that the substring may be appended without
8887
     * exceeding the desired length.
8888
     *
8889
     * @param string $str
8890
     * @param int    $length                                 <p>Desired length of the truncated string.</p>
8891
     * @param string $substring                              [optional] <p>The substring to append if it can fit.
8892
     *                                                       Default:
8893
     *                                                       ''</p>
8894
     * @param string $encoding                               [optional] <p>Default: 'UTF-8'</p>
8895
     * @param bool   $ignore_do_not_split_words_for_one_word [optional] <p>Default: false</p>
8896
     *
8897
     * @psalm-pure
8898
     *
8899
     * @return string
8900
     *                <p>A string after truncating.</p>
8901
     */
8902 47
    public static function str_truncate_safe(
8903
        string $str,
8904
        int $length,
8905
        string $substring = '',
8906
        string $encoding = 'UTF-8',
8907
        bool $ignore_do_not_split_words_for_one_word = false
8908
    ): string {
8909 47
        if ($str === '' || $length <= 0) {
8910 1
            return $substring;
8911
        }
8912
8913 47
        if ($encoding === 'UTF-8') {
8914 21
            if ($length >= (int) \mb_strlen($str)) {
8915 5
                return $str;
8916
            }
8917
8918
            // need to further trim the string so we can append the substring
8919 17
            $length -= (int) \mb_strlen($substring);
8920 17
            if ($length <= 0) {
8921 1
                return $substring;
8922
            }
8923
8924
            /** @var false|string $truncated - needed for PhpStan (stubs error) */
8925 17
            $truncated = \mb_substr($str, 0, $length);
8926 17
            if ($truncated === false) {
8927
                return '';
8928
            }
8929
8930
            // if the last word was truncated
8931 17
            $space_position = \mb_strpos($str, ' ', $length - 1);
8932 17
            if ($space_position !== $length) {
8933
                // find pos of the last occurrence of a space, get up to that
8934 13
                $last_position = \mb_strrpos($truncated, ' ', 0);
8935
8936
                if (
8937 13
                    $last_position !== false
8938
                    ||
8939
                    (
8940 3
                        $space_position !== false
8941
                        &&
8942
                        !$ignore_do_not_split_words_for_one_word
8943
                    )
8944
                ) {
8945 17
                    $truncated = (string) \mb_substr($truncated, 0, (int) $last_position);
8946
                }
8947
            }
8948
        } else {
8949 26
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8950
8951 26
            if ($length >= (int) self::strlen($str, $encoding)) {
8952 4
                return $str;
8953
            }
8954
8955
            // need to further trim the string so we can append the substring
8956 22
            $length -= (int) self::strlen($substring, $encoding);
8957 22
            if ($length <= 0) {
8958
                return $substring;
8959
            }
8960
8961 22
            $truncated = self::substr($str, 0, $length, $encoding);
8962
8963 22
            if ($truncated === false) {
8964
                return '';
8965
            }
8966
8967
            // if the last word was truncated
8968 22
            $space_position = self::strpos($str, ' ', $length - 1, $encoding);
8969 22
            if ($space_position !== $length) {
8970
                // find pos of the last occurrence of a space, get up to that
8971 12
                $last_position = self::strrpos($truncated, ' ', 0, $encoding);
8972
8973
                if (
8974 12
                    $last_position !== false
8975
                    ||
8976
                    (
8977 4
                        $space_position !== false
8978
                        &&
8979
                        !$ignore_do_not_split_words_for_one_word
8980
                    )
8981
                ) {
8982 9
                    $truncated = (string) self::substr($truncated, 0, (int) $last_position, $encoding);
8983
                }
8984
            }
8985
        }
8986
8987 39
        return $truncated . $substring;
8988
    }
8989
8990
    /**
8991
     * Returns a lowercase and trimmed string separated by underscores.
8992
     * Underscores are inserted before uppercase characters (with the exception
8993
     * of the first character of the string), and in place of spaces as well as
8994
     * dashes.
8995
     *
8996
     * @param string $str
8997
     *
8998
     * @psalm-pure
8999
     *
9000
     * @return string
9001
     *                <p>The underscored string.</p>
9002
     */
9003 16
    public static function str_underscored(string $str): string
9004
    {
9005 16
        return self::str_delimit($str, '_');
9006
    }
9007
9008
    /**
9009
     * Returns an UpperCamelCase version of the supplied string. It trims
9010
     * surrounding spaces, capitalizes letters following digits, spaces, dashes
9011
     * and underscores, and removes spaces, dashes, underscores.
9012
     *
9013
     * @param string      $str                           <p>The input string.</p>
9014
     * @param string      $encoding                      [optional] <p>Default: 'UTF-8'</p>
9015
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
9016
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
9017
     *                                                   tr</p>
9018
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
9019
     *                                                   -> ß</p>
9020
     *
9021
     * @psalm-pure
9022
     *
9023
     * @return string
9024
     *                <p>A string in UpperCamelCase.</p>
9025
     */
9026 13
    public static function str_upper_camelize(
9027
        string $str,
9028
        string $encoding = 'UTF-8',
9029
        bool $clean_utf8 = false,
9030
        string $lang = null,
9031
        bool $try_to_keep_the_string_length = false
9032
    ): string {
9033 13
        return self::ucfirst(self::str_camelize($str, $encoding), $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
9034
    }
9035
9036
    /**
9037
     * Get the number of words in a specific string.
9038
     *
9039
     * EXAMPLES: <code>
9040
     * // format: 0 -> return only word count (int)
9041
     * //
9042
     * UTF8::str_word_count('中文空白 öäü abc#c'); // 4
9043
     * UTF8::str_word_count('中文空白 öäü abc#c', 0, '#'); // 3
9044
     *
9045
     * // format: 1 -> return words (array)
9046
     * //
9047
     * UTF8::str_word_count('中文空白 öäü abc#c', 1); // array('中文空白', 'öäü', 'abc', 'c')
9048
     * UTF8::str_word_count('中文空白 öäü abc#c', 1, '#'); // array('中文空白', 'öäü', 'abc#c')
9049
     *
9050
     * // format: 2 -> return words with offset (array)
9051
     * //
9052
     * UTF8::str_word_count('中文空白 öäü ab#c', 2); // array(0 => '中文空白', 5 => 'öäü', 9 => 'abc', 13 => 'c')
9053
     * UTF8::str_word_count('中文空白 öäü ab#c', 2, '#'); // array(0 => '中文空白', 5 => 'öäü', 9 => 'abc#c')
9054
     * </code>
9055
     *
9056
     * @param string $str       <p>The input string.</p>
9057
     * @param int    $format    [optional] <p>
9058
     *                          <strong>0</strong> => return a number of words (default)<br>
9059
     *                          <strong>1</strong> => return an array of words<br>
9060
     *                          <strong>2</strong> => return an array of words with word-offset as key
9061
     *                          </p>
9062
     * @param string $char_list [optional] <p>Additional chars that contains to words and do not start a new word.</p>
9063
     *
9064
     * @psalm-pure
9065
     *
9066
     * @return int|string[]
9067
     *                      <p>The number of words in the string.</p>
9068
     */
9069 2
    public static function str_word_count(string $str, int $format = 0, string $char_list = '')
9070
    {
9071 2
        $str_parts = self::str_to_words($str, $char_list);
9072
9073 2
        $len = \count($str_parts);
9074
9075 2
        if ($format === 1) {
9076 2
            $number_of_words = [];
9077 2
            for ($i = 1; $i < $len; $i += 2) {
9078 2
                $number_of_words[] = $str_parts[$i];
9079
            }
9080 2
        } elseif ($format === 2) {
9081 2
            $number_of_words = [];
9082 2
            $offset = (int) self::strlen($str_parts[0]);
9083 2
            for ($i = 1; $i < $len; $i += 2) {
9084 2
                $number_of_words[$offset] = $str_parts[$i];
9085 2
                $offset += (int) self::strlen($str_parts[$i]) + (int) self::strlen($str_parts[$i + 1]);
9086
            }
9087
        } else {
9088 2
            $number_of_words = (int) (($len - 1) / 2);
9089
        }
9090
9091 2
        return $number_of_words;
9092
    }
9093
9094
    /**
9095
     * Case-insensitive string comparison.
9096
     *
9097
     * INFO: Case-insensitive version of UTF8::strcmp()
9098
     *
9099
     * EXAMPLE: <code>UTF8::strcasecmp("iñtërnâtiôn\nàlizætiøn", "Iñtërnâtiôn\nàlizætiøn"); // 0</code>
9100
     *
9101
     * @param string $str1     <p>The first string.</p>
9102
     * @param string $str2     <p>The second string.</p>
9103
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
9104
     *
9105
     * @psalm-pure
9106
     *
9107
     * @return int
9108
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
9109
     *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
9110
     *             <strong>0</strong> if they are equal
9111
     */
9112 23
    public static function strcasecmp(
9113
        string $str1,
9114
        string $str2,
9115
        string $encoding = 'UTF-8'
9116
    ): int {
9117 23
        return self::strcmp(
9118 23
            self::strtocasefold(
9119
                $str1,
9120
                true,
9121
                false,
9122
                $encoding,
9123
                null,
9124
                false
9125
            ),
9126 23
            self::strtocasefold(
9127
                $str2,
9128
                true,
9129
                false,
9130
                $encoding,
9131
                null,
9132
                false
9133
            )
9134
        );
9135
    }
9136
9137
    /**
9138
     * Case-sensitive string comparison.
9139
     *
9140
     * EXAMPLE: <code>UTF8::strcmp("iñtërnâtiôn\nàlizætiøn", "iñtërnâtiôn\nàlizætiøn"); // 0</code>
9141
     *
9142
     * @param string $str1 <p>The first string.</p>
9143
     * @param string $str2 <p>The second string.</p>
9144
     *
9145
     * @psalm-pure
9146
     *
9147
     * @return int
9148
     *             <strong>&lt; 0</strong> if str1 is less than str2<br>
9149
     *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
9150
     *             <strong>0</strong> if they are equal
9151
     */
9152 29
    public static function strcmp(string $str1, string $str2): int
9153
    {
9154 29
        if ($str1 === $str2) {
9155 21
            return 0;
9156
        }
9157
9158 24
        return \strcmp(
9159
            /** @phpstan-ignore-next-line - we use only NFD */
9160 24
            \Normalizer::normalize($str1, \Normalizer::NFD),
9161
            /** @phpstan-ignore-next-line - we use only NFD */
9162 24
            \Normalizer::normalize($str2, \Normalizer::NFD)
9163
        );
9164
    }
9165
9166
    /**
9167
     * Find length of initial segment not matching mask.
9168
     *
9169
     * @param string   $str
9170
     * @param string   $char_list
9171
     * @param int      $offset
9172
     * @param int|null $length
9173
     * @param string   $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
9174
     *
9175
     * @psalm-pure
9176
     *
9177
     * @return int
9178
     */
9179 12
    public static function strcspn(
9180
        string $str,
9181
        string $char_list,
9182
        int $offset = 0,
9183
        int $length = null,
9184
        string $encoding = 'UTF-8'
9185
    ): int {
9186 12
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9187
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9188
        }
9189
9190 12
        if ($char_list === '') {
9191 2
            return (int) self::strlen($str, $encoding);
9192
        }
9193
9194 11
        if ($offset || $length !== null) {
9195 3
            if ($encoding === 'UTF-8') {
9196 3
                if ($length === null) {
9197 2
                    $str_tmp = \mb_substr($str, $offset);
9198
                } else {
9199 3
                    $str_tmp = \mb_substr($str, $offset, $length);
9200
                }
9201
            } else {
9202
                $str_tmp = self::substr($str, $offset, $length, $encoding);
9203
            }
9204
9205 3
            if ($str_tmp === false) {
9206
                return 0;
9207
            }
9208
9209 3
            $str = $str_tmp;
9210
        }
9211
9212 11
        if ($str === '') {
9213 2
            return 0;
9214
        }
9215
9216 10
        $matches = [];
9217 10
        if (\preg_match('/^(.*?)' . self::rxClass($char_list) . '/us', $str, $matches)) {
9218 9
            $return = self::strlen($matches[1], $encoding);
9219 9
            if ($return === false) {
9220
                return 0;
9221
            }
9222
9223 9
            return $return;
9224
        }
9225
9226 2
        return (int) self::strlen($str, $encoding);
9227
    }
9228
9229
    /**
9230
     * Create a UTF-8 string from code points.
9231
     *
9232
     * INFO: opposite to UTF8::codepoints()
9233
     *
9234
     * EXAMPLE: <code>UTF8::string(array(246, 228, 252)); // 'öäü'</code>
9235
     *
9236
     * @param int|int[]|string|string[] $intOrHex <p>Integer or Hexadecimal codepoints.</p>
9237
     *
9238
     * @phpstan-param int[]|numeric-string[]|int|numeric-string $intOrHex
9239
     *
9240
     * @psalm-pure
9241
     *
9242
     * @return string
9243
     *                <p>A UTF-8 encoded string.</p>
9244
     */
9245 4
    public static function string($intOrHex): string
9246
    {
9247 4
        if ($intOrHex === []) {
9248 4
            return '';
9249
        }
9250
9251 4
        if (!\is_array($intOrHex)) {
9252 1
            $intOrHex = [$intOrHex];
9253
        }
9254
9255 4
        $str = '';
9256 4
        foreach ($intOrHex as $strPart) {
9257 4
            $str .= '&#' . (int) $strPart . ';';
9258
        }
9259
9260
        // We cannot use html_entity_decode() here, as it will not return
9261
        // characters for many values < 160.
9262 4
        return mb_convert_encoding($str, 'UTF-8', 'HTML-ENTITIES');
0 ignored issues
show
Bug Best Practice introduced by
The expression return mb_convert_encodi...TF-8', 'HTML-ENTITIES') could return the type array which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
9263
    }
9264
9265
    /**
9266
     * Checks if string starts with "BOM" (Byte Order Mark Character) character.
9267
     *
9268
     * EXAMPLE: <code>UTF8::string_has_bom("\xef\xbb\xbf foobar"); // true</code>
9269
     *
9270
     * @param string $str <p>The input string.</p>
9271
     *
9272
     * @psalm-pure
9273
     *
9274
     * @return bool
9275
     *              <p>
9276
     *              <strong>true</strong> if the string has BOM at the start,<br>
9277
     *              <strong>false</strong> otherwise
9278
     *              </p>
9279
     */
9280 40
    public static function string_has_bom(string $str): bool
9281
    {
9282 40
        foreach (self::$BOM as $bom_string => &$bom_byte_length) {
9283 40
            if (\strncmp($str, $bom_string, $bom_byte_length) === 0) {
9284 13
                return true;
9285
            }
9286
        }
9287
9288 40
        return false;
9289
    }
9290
9291
    /**
9292
     * Strip HTML and PHP tags from a string + clean invalid UTF-8.
9293
     *
9294
     * EXAMPLE: <code>UTF8::strip_tags("<span>κόσμε\xa0\xa1</span>"); // 'κόσμε'</code>
9295
     *
9296
     * @see http://php.net/manual/en/function.strip-tags.php
9297
     *
9298
     * @param string      $str            <p>
9299
     *                                    The input string.
9300
     *                                    </p>
9301
     * @param string|null $allowable_tags [optional] <p>
9302
     *                                    You can use the optional second parameter to specify tags which should
9303
     *                                    not be stripped.
9304
     *                                    </p>
9305
     *                                    <p>
9306
     *                                    HTML comments and PHP tags are also stripped. This is hardcoded and
9307
     *                                    can not be changed with allowable_tags.
9308
     *                                    </p>
9309
     * @param bool        $clean_utf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
9310
     *
9311
     * @psalm-pure
9312
     *
9313
     * @return string
9314
     *                <p>The stripped string.</p>
9315
     */
9316 4
    public static function strip_tags(
9317
        string $str,
9318
        string $allowable_tags = null,
9319
        bool $clean_utf8 = false
9320
    ): string {
9321 4
        if ($str === '') {
9322 1
            return '';
9323
        }
9324
9325 4
        if ($clean_utf8) {
9326 2
            $str = self::clean($str);
9327
        }
9328
9329 4
        if ($allowable_tags === null) {
9330 4
            return \strip_tags($str);
9331
        }
9332
9333 2
        return \strip_tags($str, $allowable_tags);
9334
    }
9335
9336
    /**
9337
     * Strip all whitespace characters. This includes tabs and newline
9338
     * characters, as well as multibyte whitespace such as the thin space
9339
     * and ideographic space.
9340
     *
9341
     * EXAMPLE: <code>UTF8::strip_whitespace('   Ο     συγγραφέας  '); // 'Οσυγγραφέας'</code>
9342
     *
9343
     * @param string $str
9344
     *
9345
     * @psalm-pure
9346
     *
9347
     * @return string
9348
     */
9349 36
    public static function strip_whitespace(string $str): string
9350
    {
9351 36
        if ($str === '') {
9352 3
            return '';
9353
        }
9354
9355 33
        return (string) \preg_replace('/[[:space:]]+/u', '', $str);
9356
    }
9357
9358
    /**
9359
     * Find the position of the first occurrence of a substring in a string, case-insensitive.
9360
     *
9361
     * INFO: use UTF8::stripos_in_byte() for the byte-length
9362
     *
9363
     * EXAMPLE: <code>UTF8::stripos('aσσb', 'ΣΣ'); // 1</code> (σσ == ΣΣ)
9364
     *
9365
     * @see http://php.net/manual/en/function.mb-stripos.php
9366
     *
9367
     * @param string $haystack   <p>The string from which to get the position of the first occurrence of needle.</p>
9368
     * @param string $needle     <p>The string to find in haystack.</p>
9369
     * @param int    $offset     [optional] <p>The position in haystack to start searching.</p>
9370
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
9371
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9372
     *
9373
     * @psalm-pure
9374
     *
9375
     * @return false|int
9376
     *                   Return the <strong>(int)</strong> numeric position of the first occurrence of needle in the
9377
     *                   haystack string,<br> or <strong>false</strong> if needle is not found
9378
     */
9379 25
    public static function stripos(
9380
        string $haystack,
9381
        string $needle,
9382
        int $offset = 0,
9383
        string $encoding = 'UTF-8',
9384
        bool $clean_utf8 = false
9385
    ) {
9386 25
        if ($haystack === '') {
9387 5
            if (\PHP_VERSION_ID >= 80000 && $needle === '') {
9388 2
                return 0;
9389
            }
9390
9391 5
            return false;
9392
        }
9393
9394 24
        if ($needle === '' && \PHP_VERSION_ID < 80000) {
9395
            return false;
9396
        }
9397
9398 24
        if ($clean_utf8) {
9399
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9400
            // if invalid characters are found in $haystack before $needle
9401 1
            $haystack = self::clean($haystack);
9402 1
            $needle = self::clean($needle);
9403
        }
9404
9405 24
        if (self::$SUPPORT['mbstring'] === true) {
9406 24
            if ($encoding === 'UTF-8') {
9407 24
                return \mb_stripos($haystack, $needle, $offset);
9408
            }
9409
9410 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9411
9412 2
            return \mb_stripos($haystack, $needle, $offset, $encoding);
9413
        }
9414
9415 2
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
9416
9417
        if (
9418 2
            $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
9419
            &&
9420 2
            $offset >= 0 // grapheme_stripos() can't handle negative offset
9421
            &&
9422 2
            self::$SUPPORT['intl'] === true
9423
        ) {
9424
            $return_tmp = \grapheme_stripos($haystack, $needle, $offset);
9425
            if ($return_tmp !== false) {
9426
                return $return_tmp;
9427
            }
9428
        }
9429
9430
        //
9431
        // fallback for ascii only
9432
        //
9433
9434 2
        if (ASCII::is_ascii($haystack . $needle)) {
9435 2
            return \stripos($haystack, $needle, $offset);
9436
        }
9437
9438
        //
9439
        // fallback via vanilla php
9440
        //
9441
9442 2
        $haystack = self::strtocasefold($haystack, true, false, $encoding, null, false);
9443 2
        $needle = self::strtocasefold($needle, true, false, $encoding, null, false);
9444
9445 2
        return self::strpos($haystack, $needle, $offset, $encoding);
9446
    }
9447
9448
    /**
9449
     * Returns all of haystack starting from and including the first occurrence of needle to the end.
9450
     *
9451
     * EXAMPLE: <code>
9452
     * $str = 'iñtërnâtiônàlizætiøn';
9453
     * $search = 'NÂT';
9454
     *
9455
     * UTF8::stristr($str, $search)); // 'nâtiônàlizætiøn'
9456
     * UTF8::stristr($str, $search, true)); // 'iñtër'
9457
     * </code>
9458
     *
9459
     * @param string $haystack      <p>The input string. Must be valid UTF-8.</p>
9460
     * @param string $needle        <p>The string to look for. Must be valid UTF-8.</p>
9461
     * @param bool   $before_needle [optional] <p>
9462
     *                              If <b>TRUE</b>, it returns the part of the
9463
     *                              haystack before the first occurrence of the needle (excluding the needle).
9464
     *                              </p>
9465
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
9466
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
9467
     *
9468
     * @psalm-pure
9469
     *
9470
     * @return false|string
9471
     *                      <p>A sub-string,<br>or <strong>false</strong> if needle is not found.</p>
9472
     */
9473 13
    public static function stristr(
9474
        string $haystack,
9475
        string $needle,
9476
        bool $before_needle = false,
9477
        string $encoding = 'UTF-8',
9478
        bool $clean_utf8 = false
9479
    ) {
9480 13
        if ($haystack === '') {
9481 3
            if (\PHP_VERSION_ID >= 80000 && $needle === '') {
9482 2
                return '';
9483
            }
9484
9485 2
            return false;
9486
        }
9487
9488 11
        if ($clean_utf8) {
9489
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9490
            // if invalid characters are found in $haystack before $needle
9491 1
            $needle = self::clean($needle);
9492 1
            $haystack = self::clean($haystack);
9493
        }
9494
9495 11
        if ($needle === '') {
9496 2
            if (\PHP_VERSION_ID >= 80000) {
9497 2
                return $haystack;
9498
            }
9499
9500
            return false;
9501
        }
9502
9503 10
        if (self::$SUPPORT['mbstring'] === true) {
9504 10
            if ($encoding === 'UTF-8') {
9505 10
                return \mb_stristr($haystack, $needle, $before_needle);
9506
            }
9507
9508 1
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9509
9510 1
            return \mb_stristr($haystack, $needle, $before_needle, $encoding);
9511
        }
9512
9513
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
9514
9515
        if (
9516
            $encoding !== 'UTF-8'
9517
            &&
9518
            self::$SUPPORT['mbstring'] === false
9519
        ) {
9520
            /**
9521
             * @psalm-suppress ImpureFunctionCall - is is only a warning
9522
             */
9523
            \trigger_error('UTF8::stristr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9524
        }
9525
9526
        if (
9527
            $encoding === 'UTF-8' // INFO: "grapheme_stristr()" can't handle other encodings
9528
            &&
9529
            self::$SUPPORT['intl'] === true
9530
        ) {
9531
            $return_tmp = \grapheme_stristr($haystack, $needle, $before_needle);
9532
            if ($return_tmp !== false) {
9533
                return $return_tmp;
9534
            }
9535
        }
9536
9537
        if (ASCII::is_ascii($needle . $haystack)) {
9538
            return \stristr($haystack, $needle, $before_needle);
9539
        }
9540
9541
        \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/usi', $haystack, $match);
9542
9543
        if (!isset($match[1])) {
9544
            return false;
9545
        }
9546
9547
        if ($before_needle) {
9548
            return $match[1];
9549
        }
9550
9551
        return self::substr($haystack, (int) self::strlen($match[1], $encoding), null, $encoding);
9552
    }
9553
9554
    /**
9555
     * Get the string length, not the byte-length!
9556
     *
9557
     * INFO: use UTF8::strwidth() for the char-length
9558
     *
9559
     * EXAMPLE: <code>UTF8::strlen("Iñtërnâtiôn\xE9àlizætiøn")); // 20</code>
9560
     *
9561
     * @see http://php.net/manual/en/function.mb-strlen.php
9562
     *
9563
     * @param string $str        <p>The string being checked for length.</p>
9564
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
9565
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9566
     *
9567
     * @psalm-pure
9568
     *
9569
     * @return false|int
9570
     *                   <p>
9571
     *                   The number <strong>(int)</strong> of characters in the string $str having character encoding
9572
     *                   $encoding.
9573
     *                   (One multi-byte character counted as +1).
9574
     *                   <br>
9575
     *                   Can return <strong>false</strong>, if e.g. mbstring is not installed and we process invalid
9576
     *                   chars.
9577
     *                   </p>
9578
     */
9579 174
    public static function strlen(
9580
        string $str,
9581
        string $encoding = 'UTF-8',
9582
        bool $clean_utf8 = false
9583
    ) {
9584 174
        if ($str === '') {
9585 25
            return 0;
9586
        }
9587
9588 172
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9589 12
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9590
        }
9591
9592 172
        if ($clean_utf8) {
9593
            // "mb_strlen" and "\iconv_strlen" returns wrong length,
9594
            // if invalid characters are found in $str
9595 5
            $str = self::clean($str);
9596
        }
9597
9598
        //
9599
        // fallback via mbstring
9600
        //
9601
9602 172
        if (self::$SUPPORT['mbstring'] === true) {
9603 166
            if ($encoding === 'UTF-8') {
9604
                /** @noinspection PhpUsageOfSilenceOperatorInspection - ignore warnings, it's working anyway */
9605 166
                return @\mb_strlen($str);
9606
            }
9607
9608
            /** @noinspection PhpUsageOfSilenceOperatorInspection - ignore warnings, it's working anyway */
9609 4
            return @\mb_strlen($str, $encoding);
9610
        }
9611
9612
        //
9613
        // fallback for binary || ascii only
9614
        //
9615
9616
        if (
9617 8
            $encoding === 'CP850'
9618
            ||
9619 8
            $encoding === 'ASCII'
9620
        ) {
9621
            return \strlen($str);
9622
        }
9623
9624
        if (
9625 8
            $encoding !== 'UTF-8'
9626
            &&
9627 8
            self::$SUPPORT['mbstring'] === false
9628
            &&
9629 8
            self::$SUPPORT['iconv'] === false
9630
        ) {
9631
            /**
9632
             * @psalm-suppress ImpureFunctionCall - is is only a warning
9633
             */
9634 2
            \trigger_error('UTF8::strlen() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9635
        }
9636
9637
        //
9638
        // fallback via iconv
9639
        //
9640
9641 8
        if (self::$SUPPORT['iconv'] === true) {
9642
            $return_tmp = \iconv_strlen($str, $encoding);
9643
            if ($return_tmp !== false) {
9644
                return $return_tmp;
9645
            }
9646
        }
9647
9648
        //
9649
        // fallback via intl
9650
        //
9651
9652
        if (
9653 8
            $encoding === 'UTF-8' // INFO: "grapheme_strlen()" can't handle other encodings
9654
            &&
9655 8
            self::$SUPPORT['intl'] === true
9656
        ) {
9657
            $return_tmp = \grapheme_strlen($str);
9658
            if ($return_tmp !== null) {
9659
                return $return_tmp;
9660
            }
9661
        }
9662
9663
        //
9664
        // fallback for ascii only
9665
        //
9666
9667 8
        if (ASCII::is_ascii($str)) {
9668 4
            return \strlen($str);
9669
        }
9670
9671
        //
9672
        // fallback via vanilla php
9673
        //
9674
9675 8
        \preg_match_all('/./us', $str, $parts);
9676
9677 8
        $return_tmp = \count($parts[0]);
9678 8
        if ($return_tmp === 0) {
9679
            return false;
9680
        }
9681
9682 8
        return $return_tmp;
9683
    }
9684
9685
    /**
9686
     * Get string length in byte.
9687
     *
9688
     * @param string $str
9689
     *
9690
     * @psalm-pure
9691
     *
9692
     * @return int
9693
     */
9694 1
    public static function strlen_in_byte(string $str): int
9695
    {
9696 1
        if ($str === '') {
9697
            return 0;
9698
        }
9699
9700 1
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
9701
            // "mb_" is available if overload is used, so use it ...
9702
            return \mb_strlen($str, 'CP850'); // 8-BIT
9703
        }
9704
9705 1
        return \strlen($str);
9706
    }
9707
9708
    /**
9709
     * Case-insensitive string comparisons using a "natural order" algorithm.
9710
     *
9711
     * INFO: natural order version of UTF8::strcasecmp()
9712
     *
9713
     * EXAMPLES: <code>
9714
     * UTF8::strnatcasecmp('2', '10Hello WORLD 中文空白!'); // -1
9715
     * UTF8::strcasecmp('2Hello world 中文空白!', '10Hello WORLD 中文空白!'); // 1
9716
     *
9717
     * UTF8::strnatcasecmp('10Hello world 中文空白!', '2Hello WORLD 中文空白!'); // 1
9718
     * UTF8::strcasecmp('10Hello world 中文空白!', '2Hello WORLD 中文空白!'); // -1
9719
     * </code>
9720
     *
9721
     * @param string $str1     <p>The first string.</p>
9722
     * @param string $str2     <p>The second string.</p>
9723
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
9724
     *
9725
     * @psalm-pure
9726
     *
9727
     * @return int
9728
     *             <strong>&lt; 0</strong> if str1 is less than str2<br>
9729
     *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
9730
     *             <strong>0</strong> if they are equal
9731
     */
9732 2
    public static function strnatcasecmp(string $str1, string $str2, string $encoding = 'UTF-8'): int
9733
    {
9734 2
        return self::strnatcmp(
9735 2
            self::strtocasefold($str1, true, false, $encoding, null, false),
9736 2
            self::strtocasefold($str2, true, false, $encoding, null, false)
9737
        );
9738
    }
9739
9740
    /**
9741
     * String comparisons using a "natural order" algorithm
9742
     *
9743
     * INFO: natural order version of UTF8::strcmp()
9744
     *
9745
     * EXAMPLES: <code>
9746
     * UTF8::strnatcmp('2Hello world 中文空白!', '10Hello WORLD 中文空白!'); // -1
9747
     * UTF8::strcmp('2Hello world 中文空白!', '10Hello WORLD 中文空白!'); // 1
9748
     *
9749
     * UTF8::strnatcmp('10Hello world 中文空白!', '2Hello WORLD 中文空白!'); // 1
9750
     * UTF8::strcmp('10Hello world 中文空白!', '2Hello WORLD 中文空白!'); // -1
9751
     * </code>
9752
     *
9753
     * @see http://php.net/manual/en/function.strnatcmp.php
9754
     *
9755
     * @param string $str1 <p>The first string.</p>
9756
     * @param string $str2 <p>The second string.</p>
9757
     *
9758
     * @psalm-pure
9759
     *
9760
     * @return int
9761
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
9762
     *             <strong>&gt; 0</strong> if str1 is greater than str2;<br>
9763
     *             <strong>0</strong> if they are equal
9764
     */
9765 4
    public static function strnatcmp(string $str1, string $str2): int
9766
    {
9767 4
        if ($str1 === $str2) {
9768 4
            return 0;
9769
        }
9770
9771 4
        return \strnatcmp(
9772 4
            (string) self::strtonatfold($str1),
9773 4
            (string) self::strtonatfold($str2)
9774
        );
9775
    }
9776
9777
    /**
9778
     * Case-insensitive string comparison of the first n characters.
9779
     *
9780
     * EXAMPLE: <code>
9781
     * UTF8::strcasecmp("iñtërnâtiôn\nàlizætiøn321", "iñtërnâtiôn\nàlizætiøn123", 5); // 0
9782
     * </code>
9783
     *
9784
     * @see http://php.net/manual/en/function.strncasecmp.php
9785
     *
9786
     * @param string $str1     <p>The first string.</p>
9787
     * @param string $str2     <p>The second string.</p>
9788
     * @param int    $len      <p>The length of strings to be used in the comparison.</p>
9789
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
9790
     *
9791
     * @psalm-pure
9792
     *
9793
     * @return int
9794
     *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
9795
     *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
9796
     *             <strong>0</strong> if they are equal
9797
     */
9798 2
    public static function strncasecmp(
9799
        string $str1,
9800
        string $str2,
9801
        int $len,
9802
        string $encoding = 'UTF-8'
9803
    ): int {
9804 2
        return self::strncmp(
9805 2
            self::strtocasefold($str1, true, false, $encoding, null, false),
9806 2
            self::strtocasefold($str2, true, false, $encoding, null, false),
9807
            $len
9808
        );
9809
    }
9810
9811
    /**
9812
     * String comparison of the first n characters.
9813
     *
9814
     * EXAMPLE: <code>
9815
     * UTF8::strncmp("Iñtërnâtiôn\nàlizætiøn321", "Iñtërnâtiôn\nàlizætiøn123", 5); // 0
9816
     * </code>
9817
     *
9818
     * @see http://php.net/manual/en/function.strncmp.php
9819
     *
9820
     * @param string $str1     <p>The first string.</p>
9821
     * @param string $str2     <p>The second string.</p>
9822
     * @param int    $len      <p>Number of characters to use in the comparison.</p>
9823
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
9824
     *
9825
     * @psalm-pure
9826
     *
9827
     * @return int
9828
     *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
9829
     *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
9830
     *             <strong>0</strong> if they are equal
9831
     */
9832 4
    public static function strncmp(
9833
        string $str1,
9834
        string $str2,
9835
        int $len,
9836
        string $encoding = 'UTF-8'
9837
    ): int {
9838 4
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9839
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9840
        }
9841
9842 4
        if ($encoding === 'UTF-8') {
9843 4
            $str1 = (string) \mb_substr($str1, 0, $len);
9844 4
            $str2 = (string) \mb_substr($str2, 0, $len);
9845
        } else {
9846
            $str1 = (string) self::substr($str1, 0, $len, $encoding);
9847
            $str2 = (string) self::substr($str2, 0, $len, $encoding);
9848
        }
9849
9850 4
        return self::strcmp($str1, $str2);
9851
    }
9852
9853
    /**
9854
     * Search a string for any of a set of characters.
9855
     *
9856
     * EXAMPLE: <code>UTF8::strpbrk('-中文空白-', '白'); // '白-'</code>
9857
     *
9858
     * @see http://php.net/manual/en/function.strpbrk.php
9859
     *
9860
     * @param string $haystack  <p>The string where char_list is looked for.</p>
9861
     * @param string $char_list <p>This parameter is case-sensitive.</p>
9862
     *
9863
     * @psalm-pure
9864
     *
9865
     * @return false|string
9866
     *                      <p>The string starting from the character found, or false if it is not found.</p>
9867
     */
9868 2
    public static function strpbrk(string $haystack, string $char_list)
9869
    {
9870 2
        if ($haystack === '' || $char_list === '') {
9871 2
            return false;
9872
        }
9873
9874 2
        if (\preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
9875 2
            return \substr($haystack, (int) \strpos($haystack, $m[0]));
9876
        }
9877
9878 2
        return false;
9879
    }
9880
9881
    /**
9882
     * Find the position of the first occurrence of a substring in a string.
9883
     *
9884
     * INFO: use UTF8::strpos_in_byte() for the byte-length
9885
     *
9886
     * EXAMPLE: <code>UTF8::strpos('ABC-ÖÄÜ-中文空白-中文空白', '中'); // 8</code>
9887
     *
9888
     * @see http://php.net/manual/en/function.mb-strpos.php
9889
     *
9890
     * @param string     $haystack   <p>The string from which to get the position of the first occurrence of needle.</p>
9891
     * @param int|string $needle     <p>The string to find in haystack.<br>Or a code point as int.</p>
9892
     * @param int        $offset     [optional] <p>The search offset. If it is not specified, 0 is used.</p>
9893
     * @param string     $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
9894
     * @param bool       $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9895
     *
9896
     * @psalm-pure
9897
     *
9898
     * @return false|int
9899
     *                   The <strong>(int)</strong> numeric position of the first occurrence of needle in the haystack
9900
     *                   string.<br> If needle is not found it returns false.
9901
     */
9902 52
    public static function strpos(
9903
        string $haystack,
9904
        $needle,
9905
        int $offset = 0,
9906
        string $encoding = 'UTF-8',
9907
        bool $clean_utf8 = false
9908
    ) {
9909 52
        if ($haystack === '') {
9910 4
            if (\PHP_VERSION_ID >= 80000) {
9911 4
                if ($needle === '') {
9912 4
                    return 0;
9913
                }
9914
            } else {
9915
                return false;
9916
            }
9917
        }
9918
9919
        // iconv and mbstring do not support integer $needle
9920 52
        if ((int) $needle === $needle) {
9921
            $needle = (string) self::chr($needle);
9922
        }
9923 52
        $needle = (string) $needle;
9924
9925 52
        if ($haystack === '') {
9926 2
            if (\PHP_VERSION_ID >= 80000 && $needle === '') {
9927
                return 0;
9928
            }
9929
9930 2
            return false;
9931
        }
9932
9933 51
        if ($needle === '' && \PHP_VERSION_ID < 80000) {
9934
            return false;
9935
        }
9936
9937 51
        if ($clean_utf8) {
9938
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9939
            // if invalid characters are found in $haystack before $needle
9940 3
            $needle = self::clean($needle);
9941 3
            $haystack = self::clean($haystack);
9942
        }
9943
9944 51
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9945 10
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9946
        }
9947
9948
        //
9949
        // fallback via mbstring
9950
        //
9951
9952 51
        if (self::$SUPPORT['mbstring'] === true) {
9953 49
            if ($encoding === 'UTF-8') {
9954
                /** @noinspection PhpUsageOfSilenceOperatorInspection - Offset not contained in string */
9955 49
                return @\mb_strpos($haystack, $needle, $offset);
9956
            }
9957
9958
            /** @noinspection PhpUsageOfSilenceOperatorInspection - Offset not contained in string */
9959 2
            return @\mb_strpos($haystack, $needle, $offset, $encoding);
9960
        }
9961
9962
        //
9963
        // fallback for binary || ascii only
9964
        //
9965
        if (
9966 4
            $encoding === 'CP850'
9967
            ||
9968 4
            $encoding === 'ASCII'
9969
        ) {
9970 2
            return \strpos($haystack, $needle, $offset);
9971
        }
9972
9973
        if (
9974 4
            $encoding !== 'UTF-8'
9975
            &&
9976 4
            self::$SUPPORT['iconv'] === false
9977
            &&
9978 4
            self::$SUPPORT['mbstring'] === false
9979
        ) {
9980
            /**
9981
             * @psalm-suppress ImpureFunctionCall - is is only a warning
9982
             */
9983 2
            \trigger_error('UTF8::strpos() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9984
        }
9985
9986
        //
9987
        // fallback via intl
9988
        //
9989
9990
        if (
9991 4
            $encoding === 'UTF-8' // INFO: "grapheme_strpos()" can't handle other encodings
9992
            &&
9993 4
            $offset >= 0 // grapheme_strpos() can't handle negative offset
9994
            &&
9995 4
            self::$SUPPORT['intl'] === true
9996
        ) {
9997
            $return_tmp = \grapheme_strpos($haystack, $needle, $offset);
9998
            if ($return_tmp !== false) {
9999
                return $return_tmp;
10000
            }
10001
        }
10002
10003
        //
10004
        // fallback via iconv
10005
        //
10006
10007
        if (
10008 4
            $offset >= 0 // iconv_strpos() can't handle negative offset
10009
            &&
10010 4
            self::$SUPPORT['iconv'] === true
10011
        ) {
10012
            // ignore invalid negative offset to keep compatibility
10013
            // with php < 5.5.35, < 5.6.21, < 7.0.6
10014
            $return_tmp = \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
10015
            if ($return_tmp !== false) {
10016
                return $return_tmp;
10017
            }
10018
        }
10019
10020
        //
10021
        // fallback for ascii only
10022
        //
10023
10024 4
        if (ASCII::is_ascii($haystack . $needle)) {
10025
            /** @noinspection PhpUsageOfSilenceOperatorInspection - Offset not contained in string */
10026 2
            return @\strpos($haystack, $needle, $offset);
10027
        }
10028
10029
        //
10030
        // fallback via vanilla php
10031
        //
10032
10033 4
        $haystack_tmp = self::substr($haystack, $offset, null, $encoding);
10034 4
        if ($haystack_tmp === false) {
10035
            $haystack_tmp = '';
10036
        }
10037 4
        $haystack = (string) $haystack_tmp;
10038
10039 4
        if ($offset < 0) {
10040
            $offset = 0;
10041
        }
10042
10043 4
        $pos = \strpos($haystack, $needle);
10044 4
        if ($pos === false) {
10045 3
            return false;
10046
        }
10047
10048 4
        if ($pos) {
10049 4
            return $offset + (int) self::strlen(\substr($haystack, 0, $pos), $encoding);
10050
        }
10051
10052 4
        return $offset + 0;
10053
    }
10054
10055
    /**
10056
     * Find the position of the first occurrence of a substring in a string.
10057
     *
10058
     * @param string $haystack <p>
10059
     *                         The string being checked.
10060
     *                         </p>
10061
     * @param string $needle   <p>
10062
     *                         The position counted from the beginning of haystack.
10063
     *                         </p>
10064
     * @param int    $offset   [optional] <p>
10065
     *                         The search offset. If it is not specified, 0 is used.
10066
     *                         </p>
10067
     *
10068
     * @psalm-pure
10069
     *
10070
     * @return false|int
10071
     *                   <p>The numeric position of the first occurrence of needle in the
10072
     *                   haystack string. If needle is not found, it returns false.</p>
10073
     */
10074 2
    public static function strpos_in_byte(string $haystack, string $needle, int $offset = 0)
10075
    {
10076 2
        if ($haystack === '' || $needle === '') {
10077
            return false;
10078
        }
10079
10080 2
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10081
            // "mb_" is available if overload is used, so use it ...
10082
            return \mb_strpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
10083
        }
10084
10085 2
        return \strpos($haystack, $needle, $offset);
10086
    }
10087
10088
    /**
10089
     * Find the position of the first occurrence of a substring in a string, case-insensitive.
10090
     *
10091
     * @param string $haystack <p>
10092
     *                         The string being checked.
10093
     *                         </p>
10094
     * @param string $needle   <p>
10095
     *                         The position counted from the beginning of haystack.
10096
     *                         </p>
10097
     * @param int    $offset   [optional] <p>
10098
     *                         The search offset. If it is not specified, 0 is used.
10099
     *                         </p>
10100
     *
10101
     * @psalm-pure
10102
     *
10103
     * @return false|int
10104
     *                   <p>The numeric position of the first occurrence of needle in the
10105
     *                   haystack string. If needle is not found, it returns false.</p>
10106
     */
10107 2
    public static function stripos_in_byte(string $haystack, string $needle, int $offset = 0)
10108
    {
10109 2
        if ($haystack === '' || $needle === '') {
10110
            return false;
10111
        }
10112
10113 2
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10114
            // "mb_" is available if overload is used, so use it ...
10115
            return \mb_stripos($haystack, $needle, $offset, 'CP850'); // 8-BIT
10116
        }
10117
10118 2
        return \stripos($haystack, $needle, $offset);
10119
    }
10120
10121
    /**
10122
     * Find the last occurrence of a character in a string within another.
10123
     *
10124
     * EXAMPLE: <code>UTF8::strrchr('κόσμεκόσμε-äöü', 'κόσμε'); // 'κόσμε-äöü'</code>
10125
     *
10126
     * @see http://php.net/manual/en/function.mb-strrchr.php
10127
     *
10128
     * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
10129
     * @param string $needle        <p>The string to find in haystack</p>
10130
     * @param bool   $before_needle [optional] <p>
10131
     *                              Determines which portion of haystack
10132
     *                              this function returns.
10133
     *                              If set to true, it returns all of haystack
10134
     *                              from the beginning to the last occurrence of needle.
10135
     *                              If set to false, it returns all of haystack
10136
     *                              from the last occurrence of needle to the end,
10137
     *                              </p>
10138
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
10139
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
10140
     *
10141
     * @psalm-pure
10142
     *
10143
     * @return false|string
10144
     *                      <p>The portion of haystack or false if needle is not found.</p>
10145
     */
10146 2
    public static function strrchr(
10147
        string $haystack,
10148
        string $needle,
10149
        bool $before_needle = false,
10150
        string $encoding = 'UTF-8',
10151
        bool $clean_utf8 = false
10152
    ) {
10153 2
        if ($haystack === '' || $needle === '') {
10154 2
            return false;
10155
        }
10156
10157 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10158 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10159
        }
10160
10161 2
        if ($clean_utf8) {
10162
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10163
            // if invalid characters are found in $haystack before $needle
10164 2
            $needle = self::clean($needle);
10165 2
            $haystack = self::clean($haystack);
10166
        }
10167
10168
        //
10169
        // fallback via mbstring
10170
        //
10171
10172 2
        if (self::$SUPPORT['mbstring'] === true) {
10173 2
            if ($encoding === 'UTF-8') {
10174 2
                return \mb_strrchr($haystack, $needle, $before_needle);
10175
            }
10176
10177 2
            return \mb_strrchr($haystack, $needle, $before_needle, $encoding);
10178
        }
10179
10180
        //
10181
        // fallback for binary || ascii only
10182
        //
10183
10184
        if (
10185
            !$before_needle
10186
            &&
10187
            (
10188
                $encoding === 'CP850'
10189
                ||
10190
                $encoding === 'ASCII'
10191
            )
10192
        ) {
10193
            return \strrchr($haystack, $needle);
10194
        }
10195
10196
        if (
10197
            $encoding !== 'UTF-8'
10198
            &&
10199
            self::$SUPPORT['mbstring'] === false
10200
        ) {
10201
            /**
10202
             * @psalm-suppress ImpureFunctionCall - is is only a warning
10203
             */
10204
            \trigger_error('UTF8::strrchr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10205
        }
10206
10207
        //
10208
        // fallback via iconv
10209
        //
10210
10211
        if (self::$SUPPORT['iconv'] === true) {
10212
            $needle_tmp = self::substr($needle, 0, 1, $encoding);
10213
            if ($needle_tmp === false) {
10214
                return false;
10215
            }
10216
            $needle = $needle_tmp;
10217
10218
            $pos = \iconv_strrpos($haystack, $needle, $encoding);
10219
            if ($pos === false) {
10220
                return false;
10221
            }
10222
10223
            if ($before_needle) {
10224
                return self::substr($haystack, 0, $pos, $encoding);
10225
            }
10226
10227
            return self::substr($haystack, $pos, null, $encoding);
10228
        }
10229
10230
        //
10231
        // fallback via vanilla php
10232
        //
10233
10234
        $needle_tmp = self::substr($needle, 0, 1, $encoding);
10235
        if ($needle_tmp === false) {
10236
            return false;
10237
        }
10238
        $needle = $needle_tmp;
10239
10240
        $pos = self::strrpos($haystack, $needle, 0, $encoding);
10241
        if ($pos === false) {
10242
            return false;
10243
        }
10244
10245
        if ($before_needle) {
10246
            return self::substr($haystack, 0, $pos, $encoding);
10247
        }
10248
10249
        return self::substr($haystack, $pos, null, $encoding);
10250
    }
10251
10252
    /**
10253
     * Reverses characters order in the string.
10254
     *
10255
     * EXAMPLE: <code>UTF8::strrev('κ-öäü'); // 'üäö-κ'</code>
10256
     *
10257
     * @param string $str      <p>The input string.</p>
10258
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
10259
     *
10260
     * @psalm-pure
10261
     *
10262
     * @return string
10263
     *                <p>The string with characters in the reverse sequence.</p>
10264
     */
10265 10
    public static function strrev(string $str, string $encoding = 'UTF-8'): string
10266
    {
10267 10
        if ($str === '') {
10268 4
            return '';
10269
        }
10270
10271
        // init
10272 8
        $reversed = '';
10273
10274 8
        $str = self::emoji_encode($str, true);
10275
10276 8
        if ($encoding === 'UTF-8') {
10277 8
            if (self::$SUPPORT['intl'] === true) {
10278
                // try "grapheme" first: https://stackoverflow.com/questions/17496493/strrev-dosent-support-utf-8
10279 8
                $i = (int) \grapheme_strlen($str);
10280 8
                while ($i--) {
10281 8
                    $reversed_tmp = \grapheme_substr($str, $i, 1);
10282 8
                    if ($reversed_tmp !== false) {
10283 8
                        $reversed .= $reversed_tmp;
10284
                    }
10285
                }
10286
            } else {
10287
                $i = (int) \mb_strlen($str);
10288 8
                while ($i--) {
10289
                    $reversed_tmp = \mb_substr($str, $i, 1);
10290
                    if ($reversed_tmp !== false) {
10291
                        $reversed .= $reversed_tmp;
10292
                    }
10293
                }
10294
            }
10295
        } else {
10296
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10297
10298
            $i = (int) self::strlen($str, $encoding);
10299
            while ($i--) {
10300
                $reversed_tmp = self::substr($str, $i, 1, $encoding);
10301
                if ($reversed_tmp !== false) {
10302
                    $reversed .= $reversed_tmp;
10303
                }
10304
            }
10305
        }
10306
10307 8
        return self::emoji_decode($reversed, true);
10308
    }
10309
10310
    /**
10311
     * Find the last occurrence of a character in a string within another, case-insensitive.
10312
     *
10313
     * EXAMPLE: <code>UTF8::strrichr('Aκόσμεκόσμε-äöü', 'aκόσμε'); // 'Aκόσμεκόσμε-äöü'</code>
10314
     *
10315
     * @see http://php.net/manual/en/function.mb-strrichr.php
10316
     *
10317
     * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
10318
     * @param string $needle        <p>The string to find in haystack.</p>
10319
     * @param bool   $before_needle [optional] <p>
10320
     *                              Determines which portion of haystack
10321
     *                              this function returns.
10322
     *                              If set to true, it returns all of haystack
10323
     *                              from the beginning to the last occurrence of needle.
10324
     *                              If set to false, it returns all of haystack
10325
     *                              from the last occurrence of needle to the end,
10326
     *                              </p>
10327
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
10328
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
10329
     *
10330
     * @psalm-pure
10331
     *
10332
     * @return false|string
10333
     *                      <p>The portion of haystack or<br>false if needle is not found.</p>
10334
     */
10335 3
    public static function strrichr(
10336
        string $haystack,
10337
        string $needle,
10338
        bool $before_needle = false,
10339
        string $encoding = 'UTF-8',
10340
        bool $clean_utf8 = false
10341
    ) {
10342 3
        if ($haystack === '' || $needle === '') {
10343 2
            return false;
10344
        }
10345
10346 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10347 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10348
        }
10349
10350 3
        if ($clean_utf8) {
10351
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10352
            // if invalid characters are found in $haystack before $needle
10353 2
            $needle = self::clean($needle);
10354 2
            $haystack = self::clean($haystack);
10355
        }
10356
10357
        //
10358
        // fallback via mbstring
10359
        //
10360
10361 3
        if (self::$SUPPORT['mbstring'] === true) {
10362 3
            if ($encoding === 'UTF-8') {
10363 3
                return \mb_strrichr($haystack, $needle, $before_needle);
10364
            }
10365
10366 2
            return \mb_strrichr($haystack, $needle, $before_needle, $encoding);
10367
        }
10368
10369
        //
10370
        // fallback via vanilla php
10371
        //
10372
10373
        $needle_tmp = self::substr($needle, 0, 1, $encoding);
10374
        if ($needle_tmp === false) {
10375
            return false;
10376
        }
10377
        $needle = $needle_tmp;
10378
10379
        $pos = self::strripos($haystack, $needle, 0, $encoding);
10380
        if ($pos === false) {
10381
            return false;
10382
        }
10383
10384
        if ($before_needle) {
10385
            return self::substr($haystack, 0, $pos, $encoding);
10386
        }
10387
10388
        return self::substr($haystack, $pos, null, $encoding);
10389
    }
10390
10391
    /**
10392
     * Find the position of the last occurrence of a substring in a string, case-insensitive.
10393
     *
10394
     * EXAMPLE: <code>UTF8::strripos('ABC-ÖÄÜ-中文空白-中文空白', '中'); // 13</code>
10395
     *
10396
     * @param string     $haystack   <p>The string to look in.</p>
10397
     * @param int|string $needle     <p>The string to look for.</p>
10398
     * @param int        $offset     [optional] <p>Number of characters to ignore in the beginning or end.</p>
10399
     * @param string     $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
10400
     * @param bool       $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10401
     *
10402
     * @psalm-pure
10403
     *
10404
     * @return false|int
10405
     *                   <p>The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
10406
     *                   string.<br>If needle is not found, it returns false.</p>
10407
     */
10408 14
    public static function strripos(
10409
        string $haystack,
10410
        $needle,
10411
        int $offset = 0,
10412
        string $encoding = 'UTF-8',
10413
        bool $clean_utf8 = false
10414
    ) {
10415 14
        if ($haystack === '') {
10416 3
            if (\PHP_VERSION_ID >= 80000) {
10417 3
                if ($needle === '') {
10418 3
                    return 0;
10419
                }
10420
            } else {
10421
                return false;
10422
            }
10423
        }
10424
10425
        // iconv and mbstring do not support integer $needle
10426 14
        if ((int) $needle === $needle && $needle >= 0) {
10427
            $needle = (string) self::chr($needle);
10428
        }
10429 14
        $needle = (string) $needle;
10430
10431 14
        if ($haystack === '') {
10432 1
            if (\PHP_VERSION_ID >= 80000 && $needle === '') {
10433
                return 0;
10434
            }
10435
10436 1
            return false;
10437
        }
10438
10439 14
        if ($needle === '' && \PHP_VERSION_ID < 80000) {
10440
            return false;
10441
        }
10442
10443 14
        if ($clean_utf8) {
10444
            // mb_strripos() && iconv_strripos() is not tolerant to invalid characters
10445 3
            $needle = self::clean($needle);
10446 3
            $haystack = self::clean($haystack);
10447
        }
10448
10449 14
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10450 9
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10451
        }
10452
10453
        //
10454
        // fallback via mbstrig
10455
        //
10456
10457 14
        if (self::$SUPPORT['mbstring'] === true) {
10458 14
            if ($encoding === 'UTF-8') {
10459 14
                return \mb_strripos($haystack, $needle, $offset);
10460
            }
10461
10462
            return \mb_strripos($haystack, $needle, $offset, $encoding);
10463
        }
10464
10465
        //
10466
        // fallback for binary || ascii only
10467
        //
10468
10469
        if (
10470
            $encoding === 'CP850'
10471
            ||
10472
            $encoding === 'ASCII'
10473
        ) {
10474
            return \strripos($haystack, $needle, $offset);
10475
        }
10476
10477
        if (
10478
            $encoding !== 'UTF-8'
10479
            &&
10480
            self::$SUPPORT['mbstring'] === false
10481
        ) {
10482
            /**
10483
             * @psalm-suppress ImpureFunctionCall - is is only a warning
10484
             */
10485
            \trigger_error('UTF8::strripos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10486
        }
10487
10488
        //
10489
        // fallback via intl
10490
        //
10491
10492
        if (
10493
            $encoding === 'UTF-8' // INFO: "grapheme_strripos()" can't handle other encodings
10494
            &&
10495
            $offset >= 0 // grapheme_strripos() can't handle negative offset
10496
            &&
10497
            self::$SUPPORT['intl'] === true
10498
        ) {
10499
            $return_tmp = \grapheme_strripos($haystack, $needle, $offset);
10500
            if ($return_tmp !== false) {
10501
                return $return_tmp;
10502
            }
10503
        }
10504
10505
        //
10506
        // fallback for ascii only
10507
        //
10508
10509
        if (ASCII::is_ascii($haystack . $needle)) {
10510
            return \strripos($haystack, $needle, $offset);
10511
        }
10512
10513
        //
10514
        // fallback via vanilla php
10515
        //
10516
10517
        $haystack = self::strtocasefold($haystack, true, false, $encoding);
10518
        $needle = self::strtocasefold($needle, true, false, $encoding);
10519
10520
        return self::strrpos($haystack, $needle, $offset, $encoding, $clean_utf8);
10521
    }
10522
10523
    /**
10524
     * Finds position of last occurrence of a string within another, case-insensitive.
10525
     *
10526
     * @param string $haystack <p>
10527
     *                         The string from which to get the position of the last occurrence
10528
     *                         of needle.
10529
     *                         </p>
10530
     * @param string $needle   <p>
10531
     *                         The string to find in haystack.
10532
     *                         </p>
10533
     * @param int    $offset   [optional] <p>
10534
     *                         The position in haystack
10535
     *                         to start searching.
10536
     *                         </p>
10537
     *
10538
     * @psalm-pure
10539
     *
10540
     * @return false|int
10541
     *                   <p>eturn the numeric position of the last occurrence of needle in the
10542
     *                   haystack string, or false if needle is not found.</p>
10543
     */
10544 2
    public static function strripos_in_byte(string $haystack, string $needle, int $offset = 0)
10545
    {
10546 2
        if ($haystack === '' || $needle === '') {
10547
            return false;
10548
        }
10549
10550 2
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10551
            // "mb_" is available if overload is used, so use it ...
10552
            return \mb_strripos($haystack, $needle, $offset, 'CP850'); // 8-BIT
10553
        }
10554
10555 2
        return \strripos($haystack, $needle, $offset);
10556
    }
10557
10558
    /**
10559
     * Find the position of the last occurrence of a substring in a string.
10560
     *
10561
     * EXAMPLE: <code>UTF8::strrpos('ABC-ÖÄÜ-中文空白-中文空白', '中'); // 13</code>
10562
     *
10563
     * @see http://php.net/manual/en/function.mb-strrpos.php
10564
     *
10565
     * @param string     $haystack   <p>The string being checked, for the last occurrence of needle</p>
10566
     * @param int|string $needle     <p>The string to find in haystack.<br>Or a code point as int.</p>
10567
     * @param int        $offset     [optional] <p>May be specified to begin searching an arbitrary number of characters
10568
     *                               into the string. Negative values will stop searching at an arbitrary point prior to
10569
     *                               the end of the string.
10570
     *                               </p>
10571
     * @param string     $encoding   [optional] <p>Set the charset.</p>
10572
     * @param bool       $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10573
     *
10574
     * @psalm-pure
10575
     *
10576
     * @return false|int
10577
     *                   <p>The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
10578
     *                   string.<br>If needle is not found, it returns false.</p>
10579
     */
10580 35
    public static function strrpos(
10581
        string $haystack,
10582
        $needle,
10583
        int $offset = 0,
10584
        string $encoding = 'UTF-8',
10585
        bool $clean_utf8 = false
10586
    ) {
10587 35
        if ($haystack === '') {
10588 4
            if (\PHP_VERSION_ID >= 80000) {
10589 4
                if ($needle === '') {
10590 4
                    return 0;
10591
                }
10592
            } else {
10593
                return false;
10594
            }
10595
        }
10596
10597
        // iconv and mbstring do not support integer $needle
10598 35
        if ((int) $needle === $needle && $needle >= 0) {
10599 1
            $needle = (string) self::chr($needle);
10600
        }
10601 35
        $needle = (string) $needle;
10602
10603 35
        if ($haystack === '') {
10604 2
            if (\PHP_VERSION_ID >= 80000 && $needle === '') {
10605
                return 0;
10606
            }
10607
10608 2
            return false;
10609
        }
10610
10611 34
        if ($needle === '' && \PHP_VERSION_ID < 80000) {
10612
            return false;
10613
        }
10614
10615 34
        if ($clean_utf8) {
10616
            // mb_strrpos && iconv_strrpos is not tolerant to invalid characters
10617 4
            $needle = self::clean($needle);
10618 4
            $haystack = self::clean($haystack);
10619
        }
10620
10621 34
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10622 8
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10623
        }
10624
10625
        //
10626
        // fallback via mbstring
10627
        //
10628
10629 34
        if (self::$SUPPORT['mbstring'] === true) {
10630 34
            if ($encoding === 'UTF-8') {
10631 34
                return \mb_strrpos($haystack, $needle, $offset);
10632
            }
10633
10634 2
            return \mb_strrpos($haystack, $needle, $offset, $encoding);
10635
        }
10636
10637
        //
10638
        // fallback for binary || ascii only
10639
        //
10640
10641
        if (
10642
            $encoding === 'CP850'
10643
            ||
10644
            $encoding === 'ASCII'
10645
        ) {
10646
            return \strrpos($haystack, $needle, $offset);
10647
        }
10648
10649
        if (
10650
            $encoding !== 'UTF-8'
10651
            &&
10652
            self::$SUPPORT['mbstring'] === false
10653
        ) {
10654
            /**
10655
             * @psalm-suppress ImpureFunctionCall - is is only a warning
10656
             */
10657
            \trigger_error('UTF8::strrpos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10658
        }
10659
10660
        //
10661
        // fallback via intl
10662
        //
10663
10664
        if (
10665
            $offset >= 0 // grapheme_strrpos() can't handle negative offset
10666
            &&
10667
            $encoding === 'UTF-8' // INFO: "grapheme_strrpos()" can't handle other encodings
10668
            &&
10669
            self::$SUPPORT['intl'] === true
10670
        ) {
10671
            $return_tmp = \grapheme_strrpos($haystack, $needle, $offset);
10672
            if ($return_tmp !== false) {
10673
                return $return_tmp;
10674
            }
10675
        }
10676
10677
        //
10678
        // fallback for ascii only
10679
        //
10680
10681
        if (ASCII::is_ascii($haystack . $needle)) {
10682
            return \strrpos($haystack, $needle, $offset);
10683
        }
10684
10685
        //
10686
        // fallback via vanilla php
10687
        //
10688
10689
        $haystack_tmp = null;
10690
        if ($offset > 0) {
10691
            $haystack_tmp = self::substr($haystack, $offset);
10692
        } elseif ($offset < 0) {
10693
            $haystack_tmp = self::substr($haystack, 0, $offset);
10694
            $offset = 0;
10695
        }
10696
10697
        if ($haystack_tmp !== null) {
10698
            if ($haystack_tmp === false) {
10699
                $haystack_tmp = '';
10700
            }
10701
            $haystack = (string) $haystack_tmp;
10702
        }
10703
10704
        $pos = \strrpos($haystack, $needle);
10705
        if ($pos === false) {
10706
            return false;
10707
        }
10708
10709
        /** @var false|string $str_tmp - needed for PhpStan (stubs error) */
10710
        $str_tmp = \substr($haystack, 0, $pos);
10711
        if ($str_tmp === false) {
10712
            return false;
10713
        }
10714
10715
        return $offset + (int) self::strlen($str_tmp);
10716
    }
10717
10718
    /**
10719
     * Find the position of the last occurrence of a substring in a string.
10720
     *
10721
     * @param string $haystack <p>
10722
     *                         The string being checked, for the last occurrence
10723
     *                         of needle.
10724
     *                         </p>
10725
     * @param string $needle   <p>
10726
     *                         The string to find in haystack.
10727
     *                         </p>
10728
     * @param int    $offset   [optional] <p>May be specified to begin searching an arbitrary number of characters into
10729
     *                         the string. Negative values will stop searching at an arbitrary point
10730
     *                         prior to the end of the string.
10731
     *                         </p>
10732
     *
10733
     * @psalm-pure
10734
     *
10735
     * @return false|int
10736
     *                   <p>The numeric position of the last occurrence of needle in the
10737
     *                   haystack string. If needle is not found, it returns false.</p>
10738
     */
10739 2
    public static function strrpos_in_byte(string $haystack, string $needle, int $offset = 0)
10740
    {
10741 2
        if ($haystack === '' || $needle === '') {
10742
            return false;
10743
        }
10744
10745 2
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10746
            // "mb_" is available if overload is used, so use it ...
10747
            return \mb_strrpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
10748
        }
10749
10750 2
        return \strrpos($haystack, $needle, $offset);
10751
    }
10752
10753
    /**
10754
     * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
10755
     * mask.
10756
     *
10757
     * EXAMPLE: <code>UTF8::strspn('iñtërnâtiônàlizætiøn', 'itñ'); // '3'</code>
10758
     *
10759
     * @param string   $str      <p>The input string.</p>
10760
     * @param string   $mask     <p>The mask of chars</p>
10761
     * @param int      $offset   [optional]
10762
     * @param int|null $length   [optional]
10763
     * @param string   $encoding [optional] <p>Set the charset.</p>
10764
     *
10765
     * @psalm-pure
10766
     *
10767
     * @return false|int
10768
     */
10769 10
    public static function strspn(
10770
        string $str,
10771
        string $mask,
10772
        int $offset = 0,
10773
        int $length = null,
10774
        string $encoding = 'UTF-8'
10775
    ) {
10776 10
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10777
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10778
        }
10779
10780 10
        if ($offset || $length !== null) {
10781 2
            if ($encoding === 'UTF-8') {
10782 2
                if ($length === null) {
10783
                    $str = (string) \mb_substr($str, $offset);
10784
                } else {
10785 2
                    $str = (string) \mb_substr($str, $offset, $length);
10786
                }
10787
            } else {
10788
                $str = (string) self::substr($str, $offset, $length, $encoding);
10789
            }
10790
        }
10791
10792 10
        if ($str === '' || $mask === '') {
10793 2
            return 0;
10794
        }
10795
10796 8
        $matches = [];
10797
10798 8
        return \preg_match('/^' . self::rxClass($mask) . '+/u', $str, $matches) ? (int) self::strlen($matches[0], $encoding) : 0;
10799
    }
10800
10801
    /**
10802
     * Returns part of haystack string from the first occurrence of needle to the end of haystack.
10803
     *
10804
     * EXAMPLE: <code>
10805
     * $str = 'iñtërnâtiônàlizætiøn';
10806
     * $search = 'nât';
10807
     *
10808
     * UTF8::strstr($str, $search)); // 'nâtiônàlizætiøn'
10809
     * UTF8::strstr($str, $search, true)); // 'iñtër'
10810
     * </code>
10811
     *
10812
     * @param string $haystack      <p>The input string. Must be valid UTF-8.</p>
10813
     * @param string $needle        <p>The string to look for. Must be valid UTF-8.</p>
10814
     * @param bool   $before_needle [optional] <p>
10815
     *                              If <b>TRUE</b>, strstr() returns the part of the
10816
     *                              haystack before the first occurrence of the needle (excluding the needle).
10817
     *                              </p>
10818
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
10819
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
10820
     *
10821
     * @psalm-pure
10822
     *
10823
     * @return false|string
10824
     *                      <p>A sub-string,<br>or <strong>false</strong> if needle is not found.</p>
10825
     */
10826 3
    public static function strstr(
10827
        string $haystack,
10828
        string $needle,
10829
        bool $before_needle = false,
10830
        string $encoding = 'UTF-8',
10831
        bool $clean_utf8 = false
10832
    ) {
10833 3
        if ($haystack === '') {
10834 2
            if (\PHP_VERSION_ID >= 80000 && $needle === '') {
10835 1
                return '';
10836
            }
10837
10838 2
            return false;
10839
        }
10840
10841 3
        if ($clean_utf8) {
10842
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10843
            // if invalid characters are found in $haystack before $needle
10844
            $needle = self::clean($needle);
10845
            $haystack = self::clean($haystack);
10846
        }
10847
10848 3
        if ($needle === '') {
10849 1
            if (\PHP_VERSION_ID >= 80000) {
10850 1
                return $haystack;
10851
            }
10852
10853
            return false;
10854
        }
10855
10856 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10857 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10858
        }
10859
10860
        //
10861
        // fallback via mbstring
10862
        //
10863
10864 3
        if (self::$SUPPORT['mbstring'] === true) {
10865 3
            if ($encoding === 'UTF-8') {
10866 3
                return \mb_strstr($haystack, $needle, $before_needle);
10867
            }
10868
10869 2
            return \mb_strstr($haystack, $needle, $before_needle, $encoding);
10870
        }
10871
10872
        //
10873
        // fallback for binary || ascii only
10874
        //
10875
10876
        if (
10877
            $encoding === 'CP850'
10878
            ||
10879
            $encoding === 'ASCII'
10880
        ) {
10881
            return \strstr($haystack, $needle, $before_needle);
10882
        }
10883
10884
        if (
10885
            $encoding !== 'UTF-8'
10886
            &&
10887
            self::$SUPPORT['mbstring'] === false
10888
        ) {
10889
            /**
10890
             * @psalm-suppress ImpureFunctionCall - is is only a warning
10891
             */
10892
            \trigger_error('UTF8::strstr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10893
        }
10894
10895
        //
10896
        // fallback via intl
10897
        //
10898
10899
        if (
10900
            $encoding === 'UTF-8' // INFO: "grapheme_strstr()" can't handle other encodings
10901
            &&
10902
            self::$SUPPORT['intl'] === true
10903
        ) {
10904
            $return_tmp = \grapheme_strstr($haystack, $needle, $before_needle);
10905
            if ($return_tmp !== false) {
10906
                return $return_tmp;
10907
            }
10908
        }
10909
10910
        //
10911
        // fallback for ascii only
10912
        //
10913
10914
        if (ASCII::is_ascii($haystack . $needle)) {
10915
            return \strstr($haystack, $needle, $before_needle);
10916
        }
10917
10918
        //
10919
        // fallback via vanilla php
10920
        //
10921
10922
        \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/us', $haystack, $match);
10923
10924
        if (!isset($match[1])) {
10925
            return false;
10926
        }
10927
10928
        if ($before_needle) {
10929
            return $match[1];
10930
        }
10931
10932
        return self::substr($haystack, (int) self::strlen($match[1]));
10933
    }
10934
10935
    /**
10936
     * Finds first occurrence of a string within another.
10937
     *
10938
     * @param string $haystack      <p>
10939
     *                              The string from which to get the first occurrence
10940
     *                              of needle.
10941
     *                              </p>
10942
     * @param string $needle        <p>
10943
     *                              The string to find in haystack.
10944
     *                              </p>
10945
     * @param bool   $before_needle [optional] <p>
10946
     *                              Determines which portion of haystack
10947
     *                              this function returns.
10948
     *                              If set to true, it returns all of haystack
10949
     *                              from the beginning to the first occurrence of needle.
10950
     *                              If set to false, it returns all of haystack
10951
     *                              from the first occurrence of needle to the end,
10952
     *                              </p>
10953
     *
10954
     * @psalm-pure
10955
     *
10956
     * @return false|string
10957
     *                      <p>The portion of haystack,
10958
     *                      or false if needle is not found.</p>
10959
     */
10960 2
    public static function strstr_in_byte(
10961
        string $haystack,
10962
        string $needle,
10963
        bool $before_needle = false
10964
    ) {
10965 2
        if ($haystack === '' || $needle === '') {
10966
            return false;
10967
        }
10968
10969 2
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10970
            // "mb_" is available if overload is used, so use it ...
10971
            return \mb_strstr($haystack, $needle, $before_needle, 'CP850'); // 8-BIT
10972
        }
10973
10974 2
        return \strstr($haystack, $needle, $before_needle);
10975
    }
10976
10977
    /**
10978
     * Unicode transformation for case-less matching.
10979
     *
10980
     * EXAMPLE: <code>UTF8::strtocasefold('ǰ◌̱'); // 'ǰ◌̱'</code>
10981
     *
10982
     * @see http://unicode.org/reports/tr21/tr21-5.html
10983
     *
10984
     * @param string      $str        <p>The input string.</p>
10985
     * @param bool        $full       [optional] <p>
10986
     *                                <b>true</b>, replace full case folding chars (default)<br>
10987
     *                                <b>false</b>, use only limited static array [UTF8::$COMMON_CASE_FOLD]
10988
     *                                </p>
10989
     * @param bool        $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10990
     * @param string      $encoding   [optional] <p>Set the charset.</p>
10991
     * @param string|null $lang       [optional] <p>Set the language for special cases: az, el, lt, tr</p>
10992
     * @param bool        $lower      [optional] <p>Use lowercase string, otherwise use uppercase string. PS: uppercase
10993
     *                                is for some languages better ...</p>
10994
     *
10995
     * @psalm-pure
10996
     *
10997
     * @return string
10998
     */
10999 32
    public static function strtocasefold(
11000
        string $str,
11001
        bool $full = true,
11002
        bool $clean_utf8 = false,
11003
        string $encoding = 'UTF-8',
11004
        string $lang = null,
11005
        bool $lower = true
11006
    ): string {
11007 32
        if ($str === '') {
11008 7
            return '';
11009
        }
11010
11011 31
        if ($clean_utf8) {
11012
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11013
            // if invalid characters are found in $haystack before $needle
11014 2
            $str = self::clean($str);
11015
        }
11016
11017 31
        $str = self::fixStrCaseHelper($str, $lower, $full);
11018
11019 31
        if ($lang === null && $encoding === 'UTF-8') {
11020 31
            if ($lower) {
11021 2
                return \mb_strtolower($str);
11022
            }
11023
11024 29
            return \mb_strtoupper($str);
11025
        }
11026
11027 2
        if ($lower) {
11028
            return self::strtolower($str, $encoding, false, $lang);
11029
        }
11030
11031 2
        return self::strtoupper($str, $encoding, false, $lang);
11032
    }
11033
11034
    /**
11035
     * Make a string lowercase.
11036
     *
11037
     * EXAMPLE: <code>UTF8::strtolower('DÉJÀ Σσς Iıİi'); // 'déjà σσς iıii'</code>
11038
     *
11039
     * @see http://php.net/manual/en/function.mb-strtolower.php
11040
     *
11041
     * @param string      $str                           <p>The string being lowercased.</p>
11042
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
11043
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
11044
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
11045
     *                                                   tr</p>
11046
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
11047
     *                                                   -> ß</p>
11048
     *
11049
     * @psalm-pure
11050
     *
11051
     * @return string
11052
     *                <p>String with all alphabetic characters converted to lowercase.</p>
11053
     */
11054 73
    public static function strtolower(
11055
        $str,
11056
        string $encoding = 'UTF-8',
11057
        bool $clean_utf8 = false,
11058
        string $lang = null,
11059
        bool $try_to_keep_the_string_length = false
11060
    ): string {
11061
        // init
11062 73
        $str = (string) $str;
11063
11064 73
        if ($str === '') {
11065 1
            return '';
11066
        }
11067
11068 72
        if ($clean_utf8) {
11069
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11070
            // if invalid characters are found in $haystack before $needle
11071 2
            $str = self::clean($str);
11072
        }
11073
11074
        // hack for old php version or for the polyfill ...
11075 72
        if ($try_to_keep_the_string_length) {
11076
            $str = self::fixStrCaseHelper($str, true);
11077
        }
11078
11079 72
        if ($lang === null && $encoding === 'UTF-8') {
11080 13
            return \mb_strtolower($str);
11081
        }
11082
11083 61
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
11084
11085 61
        if ($lang !== null) {
11086 2
            if (self::$SUPPORT['intl'] === true) {
11087 2
                if (self::$INTL_TRANSLITERATOR_LIST === null) {
11088
                    self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
11089
                }
11090
11091 2
                $language_code = $lang . '-Lower';
11092 2
                if (!\in_array($language_code, self::$INTL_TRANSLITERATOR_LIST, true)) {
11093
                    /**
11094
                     * @psalm-suppress ImpureFunctionCall - is is only a warning
11095
                     */
11096
                    \trigger_error('UTF8::strtolower() cannot handle special language: ' . $lang . ' | supported: ' . \print_r(self::$INTL_TRANSLITERATOR_LIST, true), \E_USER_WARNING);
0 ignored issues
show
Bug introduced by
Are you sure print_r(self::INTL_TRANSLITERATOR_LIST, true) of type string|true can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

11096
                    \trigger_error('UTF8::strtolower() cannot handle special language: ' . $lang . ' | supported: ' . /** @scrutinizer ignore-type */ \print_r(self::$INTL_TRANSLITERATOR_LIST, true), \E_USER_WARNING);
Loading history...
11097
11098
                    $language_code = 'Any-Lower';
11099
                }
11100
11101 2
                return (string) \transliterator_transliterate($language_code, $str);
11102
            }
11103
11104
            /**
11105
             * @psalm-suppress ImpureFunctionCall - is is only a warning
11106
             */
11107
            \trigger_error('UTF8::strtolower() without intl cannot handle the "lang" parameter: ' . $lang, \E_USER_WARNING);
11108
        }
11109
11110
        // always fallback via symfony polyfill
11111 61
        return \mb_strtolower($str, $encoding);
11112
    }
11113
11114
    /**
11115
     * Make a string uppercase.
11116
     *
11117
     * EXAMPLE: <code>UTF8::strtoupper('Déjà Σσς Iıİi'); // 'DÉJÀ ΣΣΣ IIİI'</code>
11118
     *
11119
     * @see http://php.net/manual/en/function.mb-strtoupper.php
11120
     *
11121
     * @param string      $str                           <p>The string being uppercased.</p>
11122
     * @param string      $encoding                      [optional] <p>Set the charset.</p>
11123
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
11124
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
11125
     *                                                   tr</p>
11126
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
11127
     *                                                   -> ß</p>
11128
     *
11129
     * @psalm-pure
11130
     *
11131
     * @return string
11132
     *                <p>String with all alphabetic characters converted to uppercase.</p>
11133
     */
11134 17
    public static function strtoupper(
11135
        $str,
11136
        string $encoding = 'UTF-8',
11137
        bool $clean_utf8 = false,
11138
        string $lang = null,
11139
        bool $try_to_keep_the_string_length = false
11140
    ): string {
11141
        // init
11142 17
        $str = (string) $str;
11143
11144 17
        if ($str === '') {
11145 1
            return '';
11146
        }
11147
11148 16
        if ($clean_utf8) {
11149
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11150
            // if invalid characters are found in $haystack before $needle
11151 2
            $str = self::clean($str);
11152
        }
11153
11154
        // hack for old php version or for the polyfill ...
11155 16
        if ($try_to_keep_the_string_length) {
11156 2
            $str = self::fixStrCaseHelper($str);
11157
        }
11158
11159 16
        if ($lang === null && $encoding === 'UTF-8') {
11160 8
            return \mb_strtoupper($str);
11161
        }
11162
11163 10
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
11164
11165 10
        if ($lang !== null) {
11166 2
            if (self::$SUPPORT['intl'] === true) {
11167 2
                if (self::$INTL_TRANSLITERATOR_LIST === null) {
11168
                    self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
11169
                }
11170
11171 2
                $language_code = $lang . '-Upper';
11172 2
                if (!\in_array($language_code, self::$INTL_TRANSLITERATOR_LIST, true)) {
11173
                    /**
11174
                     * @psalm-suppress ImpureFunctionCall - is is only a warning
11175
                     */
11176
                    \trigger_error('UTF8::strtoupper() without intl for special language: ' . $lang, \E_USER_WARNING);
11177
11178
                    $language_code = 'Any-Upper';
11179
                }
11180
11181 2
                return (string) \transliterator_transliterate($language_code, $str);
11182
            }
11183
11184
            /**
11185
             * @psalm-suppress ImpureFunctionCall - is is only a warning
11186
             */
11187
            \trigger_error('UTF8::strtolower() without intl cannot handle the "lang"-parameter: ' . $lang, \E_USER_WARNING);
11188
        }
11189
11190
        // always fallback via symfony polyfill
11191 10
        return \mb_strtoupper($str, $encoding);
11192
    }
11193
11194
    /**
11195
     * Translate characters or replace sub-strings.
11196
     *
11197
     * EXAMPLE:
11198
     * <code>
11199
     * $array = [
11200
     *     'Hello'   => '○●◎',
11201
     *     '中文空白' => 'earth',
11202
     * ];
11203
     * UTF8::strtr('Hello 中文空白', $array); // '○●◎ earth'
11204
     * </code>
11205
     *
11206
     * @see http://php.net/manual/en/function.strtr.php
11207
     *
11208
     * @param string          $str  <p>The string being translated.</p>
11209
     * @param string|string[] $from <p>The string replacing from.</p>
11210
     * @param string|string[] $to   [optional] <p>The string being translated to to.</p>
11211
     *
11212
     * @psalm-pure
11213
     *
11214
     * @return string
11215
     *                <p>This function returns a copy of str, translating all occurrences of each character in "from"
11216
     *                to the corresponding character in "to".</p>
11217
     */
11218 2
    public static function strtr(string $str, $from, $to = ''): string
11219
    {
11220 2
        if ($str === '') {
11221
            return '';
11222
        }
11223
11224 2
        if ($from === $to) {
11225
            return $str;
11226
        }
11227
11228 2
        if ($to !== '') {
11229 2
            if (!\is_array($from)) {
11230 2
                $from = self::str_split($from);
11231
            }
11232
11233 2
            if (!\is_array($to)) {
11234 2
                $to = self::str_split($to);
11235
            }
11236
11237 2
            $count_from = \count($from);
11238 2
            $count_to = \count($to);
11239
11240 2
            if ($count_from !== $count_to) {
11241 2
                if ($count_from > $count_to) {
11242 2
                    $from = \array_slice($from, 0, $count_to);
11243 2
                } elseif ($count_from < $count_to) {
11244 2
                    $to = \array_slice($to, 0, $count_from);
11245
                }
11246
            }
11247
11248 2
            $from = \array_combine($from, $to);
11249 2
            if ($from === false) {
11250
                throw new \InvalidArgumentException('The number of elements for each array isn\'t equal or the arrays are empty: (from: ' . \print_r($from, true) . ' | to: ' . \print_r($to, true) . ')');
0 ignored issues
show
Bug introduced by
Are you sure print_r($to, true) of type string|true can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

11250
                throw new \InvalidArgumentException('The number of elements for each array isn\'t equal or the arrays are empty: (from: ' . \print_r($from, true) . ' | to: ' . /** @scrutinizer ignore-type */ \print_r($to, true) . ')');
Loading history...
Bug introduced by
Are you sure print_r($from, true) of type string|true can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

11250
                throw new \InvalidArgumentException('The number of elements for each array isn\'t equal or the arrays are empty: (from: ' . /** @scrutinizer ignore-type */ \print_r($from, true) . ' | to: ' . \print_r($to, true) . ')');
Loading history...
11251
            }
11252
        }
11253
11254 2
        if (\is_string($from)) {
11255 2
            return \str_replace($from, $to, $str);
0 ignored issues
show
Bug introduced by
It seems like $to can also be of type array<mixed,string[]>; however, parameter $replace of str_replace() does only seem to accept string|string[], maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

11255
            return \str_replace($from, /** @scrutinizer ignore-type */ $to, $str);
Loading history...
11256
        }
11257
11258 2
        return \strtr($str, $from);
11259
    }
11260
11261
    /**
11262
     * Return the width of a string.
11263
     *
11264
     * INFO: use UTF8::strlen() for the byte-length
11265
     *
11266
     * EXAMPLE: <code>UTF8::strwidth("Iñtërnâtiôn\xE9àlizætiøn")); // 21</code>
11267
     *
11268
     * @param string $str        <p>The input string.</p>
11269
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
11270
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
11271
     *
11272
     * @psalm-pure
11273
     *
11274
     * @return int
11275
     */
11276 2
    public static function strwidth(
11277
        string $str,
11278
        string $encoding = 'UTF-8',
11279
        bool $clean_utf8 = false
11280
    ): int {
11281 2
        if ($str === '') {
11282 2
            return 0;
11283
        }
11284
11285 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
11286 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11287
        }
11288
11289 2
        if ($clean_utf8) {
11290
            // iconv and mbstring are not tolerant to invalid encoding
11291
            // further, their behaviour is inconsistent with that of PHP's substr
11292 2
            $str = self::clean($str);
11293
        }
11294
11295
        //
11296
        // fallback via mbstring
11297
        //
11298
11299 2
        if (self::$SUPPORT['mbstring'] === true) {
11300 2
            if ($encoding === 'UTF-8') {
11301 2
                return \mb_strwidth($str);
11302
            }
11303
11304
            return \mb_strwidth($str, $encoding);
11305
        }
11306
11307
        //
11308
        // fallback via vanilla php
11309
        //
11310
11311
        if ($encoding !== 'UTF-8') {
11312
            $str = self::encode('UTF-8', $str, false, $encoding);
11313
        }
11314
11315
        $wide = 0;
11316
        $str = (string) \preg_replace('/[\x{1100}-\x{115F}\x{2329}\x{232A}\x{2E80}-\x{303E}\x{3040}-\x{A4CF}\x{AC00}-\x{D7A3}\x{F900}-\x{FAFF}\x{FE10}-\x{FE19}\x{FE30}-\x{FE6F}\x{FF00}-\x{FF60}\x{FFE0}-\x{FFE6}\x{20000}-\x{2FFFD}\x{30000}-\x{3FFFD}]/u', '', $str, -1, $wide);
11317
11318
        return ($wide << 1) + (int) self::strlen($str);
11319
    }
11320
11321
    /**
11322
     * Get part of a string.
11323
     *
11324
     * EXAMPLE: <code>UTF8::substr('中文空白', 1, 2); // '文空'</code>
11325
     *
11326
     * @see http://php.net/manual/en/function.mb-substr.php
11327
     *
11328
     * @param string   $str        <p>The string being checked.</p>
11329
     * @param int      $offset     <p>The first position used in str.</p>
11330
     * @param int|null $length     [optional] <p>The maximum length of the returned string.</p>
11331
     * @param string   $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
11332
     * @param bool     $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
11333
     *
11334
     * @psalm-pure
11335
     *
11336
     * @return false|string
11337
     *                      The portion of <i>str</i> specified by the <i>offset</i> and
11338
     *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
11339
     *                      characters long, <b>FALSE</b> will be returned.
11340
     */
11341 172
    public static function substr(
11342
        string $str,
11343
        int $offset = 0,
11344
        int $length = null,
11345
        string $encoding = 'UTF-8',
11346
        bool $clean_utf8 = false
11347
    ) {
11348
        // empty string
11349 172
        if ($str === '' || $length === 0) {
11350 8
            return '';
11351
        }
11352
11353 168
        if ($clean_utf8) {
11354
            // iconv and mbstring are not tolerant to invalid encoding
11355
            // further, their behaviour is inconsistent with that of PHP's substr
11356 2
            $str = self::clean($str);
11357
        }
11358
11359
        // whole string
11360 168
        if (!$offset && $length === null) {
11361 7
            return $str;
11362
        }
11363
11364 163
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
11365 19
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11366
        }
11367
11368
        //
11369
        // fallback via mbstring
11370
        //
11371
11372 163
        if (self::$SUPPORT['mbstring'] === true && $encoding === 'UTF-8') {
11373 161
            if ($length === null) {
11374 64
                return \mb_substr($str, $offset);
11375
            }
11376
11377 102
            return \mb_substr($str, $offset, $length);
11378
        }
11379
11380
        //
11381
        // fallback for binary || ascii only
11382
        //
11383
11384
        if (
11385 4
            $encoding === 'CP850'
11386
            ||
11387 4
            $encoding === 'ASCII'
11388
        ) {
11389
            if ($length === null) {
11390
                return \substr($str, $offset);
11391
            }
11392
11393
            return \substr($str, $offset, $length);
11394
        }
11395
11396
        // otherwise we need the string-length
11397 4
        $str_length = 0;
11398 4
        if ($offset || $length === null) {
11399 4
            $str_length = self::strlen($str, $encoding);
11400
        }
11401
11402
        // e.g.: invalid chars + mbstring not installed
11403 4
        if ($str_length === false) {
11404
            return false;
11405
        }
11406
11407
        // empty string
11408 4
        if ($offset === $str_length && !$length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type integer|null is loosely compared to false; this is ambiguous if the integer can be 0. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
11409
            return '';
11410
        }
11411
11412
        // impossible
11413 4
        if ($offset && $offset > $str_length) {
11414
            return '';
11415
        }
11416
11417 4
        $length = $length ?? $str_length;
11418
11419
        if (
11420 4
            $encoding !== 'UTF-8'
11421
            &&
11422 4
            self::$SUPPORT['mbstring'] === false
11423
        ) {
11424
            /**
11425
             * @psalm-suppress ImpureFunctionCall - is is only a warning
11426
             */
11427 2
            \trigger_error('UTF8::substr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
11428
        }
11429
11430
        //
11431
        // fallback via intl
11432
        //
11433
11434
        if (
11435 4
            $encoding === 'UTF-8' // INFO: "grapheme_substr()" can't handle other encodings
11436
            &&
11437 4
            $offset >= 0 // grapheme_substr() can't handle negative offset
11438
            &&
11439 4
            self::$SUPPORT['intl'] === true
11440
        ) {
11441
            $return_tmp = \grapheme_substr($str, $offset, $length);
11442
            if ($return_tmp !== false) {
11443
                return $return_tmp;
11444
            }
11445
        }
11446
11447
        //
11448
        // fallback via iconv
11449
        //
11450
11451
        if (
11452 4
            $length >= 0 // "iconv_substr()" can't handle negative length
11453
            &&
11454 4
            self::$SUPPORT['iconv'] === true
11455
        ) {
11456
            $return_tmp = \iconv_substr($str, $offset, $length);
11457
            if ($return_tmp !== false) {
11458
                return $return_tmp;
11459
            }
11460
        }
11461
11462
        //
11463
        // fallback for ascii only
11464
        //
11465
11466 4
        if (ASCII::is_ascii($str)) {
11467
            return \substr($str, $offset, $length);
11468
        }
11469
11470
        //
11471
        // fallback via vanilla php
11472
        //
11473
11474
        // split to array, and remove invalid characters
11475
        // &&
11476
        // extract relevant part, and join to make sting again
11477 4
        return \implode('', \array_slice(self::str_split($str), $offset, $length));
11478
    }
11479
11480
    /**
11481
     * Binary-safe comparison of two strings from an offset, up to a length of characters.
11482
     *
11483
     * EXAMPLE: <code>
11484
     * UTF8::substr_compare("○●◎\r", '●◎', 0, 2); // -1
11485
     * UTF8::substr_compare("○●◎\r", '◎●', 1, 2); // 1
11486
     * UTF8::substr_compare("○●◎\r", '●◎', 1, 2); // 0
11487
     * </code>
11488
     *
11489
     * @param string   $str1               <p>The main string being compared.</p>
11490
     * @param string   $str2               <p>The secondary string being compared.</p>
11491
     * @param int      $offset             [optional] <p>The start position for the comparison. If negative, it starts
11492
     *                                     counting from the end of the string.</p>
11493
     * @param int|null $length             [optional] <p>The length of the comparison. The default value is the largest
11494
     *                                     of the length of the str compared to the length of main_str less the
11495
     *                                     offset.</p>
11496
     * @param bool     $case_insensitivity [optional] <p>If case_insensitivity is TRUE, comparison is case
11497
     *                                     insensitive.</p>
11498
     * @param string   $encoding           [optional] <p>Set the charset for e.g. "mb_" function</p>
11499
     *
11500
     * @psalm-pure
11501
     *
11502
     * @return int
11503
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
11504
     *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
11505
     *             <strong>0</strong> if they are equal
11506
     */
11507 2
    public static function substr_compare(
11508
        string $str1,
11509
        string $str2,
11510
        int $offset = 0,
11511
        int $length = null,
11512
        bool $case_insensitivity = false,
11513
        string $encoding = 'UTF-8'
11514
    ): int {
11515
        if (
11516 2
            $offset !== 0
11517
            ||
11518 2
            $length !== null
11519
        ) {
11520 2
            if ($encoding === 'UTF-8') {
11521 2
                if ($length === null) {
11522 2
                    $str1 = (string) \mb_substr($str1, $offset);
11523
                } else {
11524 2
                    $str1 = (string) \mb_substr($str1, $offset, $length);
11525
                }
11526 2
                $str2 = (string) \mb_substr($str2, 0, (int) self::strlen($str1));
11527
            } else {
11528
                $encoding = self::normalize_encoding($encoding, 'UTF-8');
11529
11530
                $str1 = (string) self::substr($str1, $offset, $length, $encoding);
11531
                $str2 = (string) self::substr($str2, 0, (int) self::strlen($str1), $encoding);
11532
            }
11533
        }
11534
11535 2
        if ($case_insensitivity) {
11536 2
            return self::strcasecmp($str1, $str2, $encoding);
11537
        }
11538
11539 2
        return self::strcmp($str1, $str2);
11540
    }
11541
11542
    /**
11543
     * Count the number of substring occurrences.
11544
     *
11545
     * EXAMPLE: <code>UTF8::substr_count('中文空白', '文空', 1, 2); // 1</code>
11546
     *
11547
     * @see http://php.net/manual/en/function.substr-count.php
11548
     *
11549
     * @param string   $haystack   <p>The string to search in.</p>
11550
     * @param string   $needle     <p>The substring to search for.</p>
11551
     * @param int      $offset     [optional] <p>The offset where to start counting.</p>
11552
     * @param int|null $length     [optional] <p>
11553
     *                             The maximum length after the specified offset to search for the
11554
     *                             substring. It outputs a warning if the offset plus the length is
11555
     *                             greater than the haystack length.
11556
     *                             </p>
11557
     * @param string   $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
11558
     * @param bool     $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
11559
     *
11560
     * @psalm-pure
11561
     *
11562
     * @return false|int
11563
     *                   <p>This functions returns an integer or false if there isn't a string.</p>
11564
     */
11565 5
    public static function substr_count(
11566
        string $haystack,
11567
        string $needle,
11568
        int $offset = 0,
11569
        int $length = null,
11570
        string $encoding = 'UTF-8',
11571
        bool $clean_utf8 = false
11572
    ) {
11573 5
        if ($needle === '') {
11574 2
            return false;
11575
        }
11576
11577 5
        if ($haystack === '') {
11578 2
            if (\PHP_VERSION_ID >= 80000) {
11579 2
                return 0;
11580
            }
11581
11582
            return 0;
11583
        }
11584
11585 5
        if ($length === 0) {
11586 2
            return 0;
11587
        }
11588
11589 5
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
11590 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11591
        }
11592
11593 5
        if ($clean_utf8) {
11594
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11595
            // if invalid characters are found in $haystack before $needle
11596
            $needle = self::clean($needle);
11597
            $haystack = self::clean($haystack);
11598
        }
11599
11600 5
        if ($offset || $length > 0) {
11601 2
            if ($length === null) {
11602 2
                $length_tmp = self::strlen($haystack, $encoding);
11603 2
                if ($length_tmp === false) {
11604
                    return false;
11605
                }
11606 2
                $length = $length_tmp;
11607
            }
11608
11609 2
            if ($encoding === 'UTF-8') {
11610 2
                $haystack = (string) \mb_substr($haystack, $offset, $length);
11611
            } else {
11612 2
                $haystack = (string) \mb_substr($haystack, $offset, $length, $encoding);
11613
            }
11614
        }
11615
11616
        if (
11617 5
            $encoding !== 'UTF-8'
11618
            &&
11619 5
            self::$SUPPORT['mbstring'] === false
11620
        ) {
11621
            /**
11622
             * @psalm-suppress ImpureFunctionCall - is is only a warning
11623
             */
11624
            \trigger_error('UTF8::substr_count() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
11625
        }
11626
11627 5
        if (self::$SUPPORT['mbstring'] === true) {
11628 5
            if ($encoding === 'UTF-8') {
11629 5
                return \mb_substr_count($haystack, $needle);
11630
            }
11631
11632 2
            return \mb_substr_count($haystack, $needle, $encoding);
11633
        }
11634
11635
        \preg_match_all('/' . \preg_quote($needle, '/') . '/us', $haystack, $matches, \PREG_SET_ORDER);
11636
11637
        return \count($matches);
11638
    }
11639
11640
    /**
11641
     * Count the number of substring occurrences.
11642
     *
11643
     * @param string   $haystack <p>
11644
     *                           The string being checked.
11645
     *                           </p>
11646
     * @param string   $needle   <p>
11647
     *                           The string being found.
11648
     *                           </p>
11649
     * @param int      $offset   [optional] <p>
11650
     *                           The offset where to start counting
11651
     *                           </p>
11652
     * @param int|null $length   [optional] <p>
11653
     *                           The maximum length after the specified offset to search for the
11654
     *                           substring. It outputs a warning if the offset plus the length is
11655
     *                           greater than the haystack length.
11656
     *                           </p>
11657
     *
11658
     * @psalm-pure
11659
     *
11660
     * @return false|int
11661
     *                   <p>The number of times the
11662
     *                   needle substring occurs in the
11663
     *                   haystack string.</p>
11664
     */
11665 4
    public static function substr_count_in_byte(
11666
        string $haystack,
11667
        string $needle,
11668
        int $offset = 0,
11669
        int $length = null
11670
    ) {
11671 4
        if ($haystack === '' || $needle === '') {
11672 1
            return 0;
11673
        }
11674
11675
        if (
11676 3
            ($offset || $length !== null)
11677
            &&
11678 3
            self::$SUPPORT['mbstring_func_overload'] === true
11679
        ) {
11680
            if ($length === null) {
11681
                $length_tmp = self::strlen($haystack);
11682
                if ($length_tmp === false) {
11683
                    return false;
11684
                }
11685
                $length = $length_tmp;
11686
            }
11687
11688
            if (
11689
                (
11690
                    $length !== 0
11691
                    &&
11692
                    $offset !== 0
11693
                )
11694
                &&
11695
                ($length + $offset) <= 0
11696
                &&
11697
                \PHP_VERSION_ID < 71000 // output from "substr_count()" have changed in PHP 7.1
11698
            ) {
11699
                return false;
11700
            }
11701
11702
            /** @var false|string $haystack_tmp - needed for PhpStan (stubs error) */
11703
            $haystack_tmp = \substr($haystack, $offset, $length);
11704
            if ($haystack_tmp === false) {
11705
                $haystack_tmp = '';
11706
            }
11707
            $haystack = (string) $haystack_tmp;
11708
        }
11709
11710 3
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
11711
            // "mb_" is available if overload is used, so use it ...
11712
            return \mb_substr_count($haystack, $needle, 'CP850'); // 8-BIT
11713
        }
11714
11715 3
        if ($length === null) {
11716 3
            return \substr_count($haystack, $needle, $offset);
11717
        }
11718
11719
        return \substr_count($haystack, $needle, $offset, $length);
11720
    }
11721
11722
    /**
11723
     * Returns the number of occurrences of $substring in the given string.
11724
     * By default, the comparison is case-sensitive, but can be made insensitive
11725
     * by setting $case_sensitive to false.
11726
     *
11727
     * @param string $str            <p>The input string.</p>
11728
     * @param string $substring      <p>The substring to search for.</p>
11729
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
11730
     * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
11731
     *
11732
     * @psalm-pure
11733
     *
11734
     * @return int
11735
     */
11736 15
    public static function substr_count_simple(
11737
        string $str,
11738
        string $substring,
11739
        bool $case_sensitive = true,
11740
        string $encoding = 'UTF-8'
11741
    ): int {
11742 15
        if ($str === '' || $substring === '') {
11743 2
            return 0;
11744
        }
11745
11746 13
        if ($encoding === 'UTF-8') {
11747 7
            if ($case_sensitive) {
11748
                return (int) \mb_substr_count($str, $substring);
11749
            }
11750
11751 7
            return (int) \mb_substr_count(
11752 7
                \mb_strtoupper($str),
11753 7
                \mb_strtoupper($substring)
11754
            );
11755
        }
11756
11757 6
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
11758
11759 6
        if ($case_sensitive) {
11760 3
            return (int) \mb_substr_count($str, $substring, $encoding);
11761
        }
11762
11763 3
        return (int) \mb_substr_count(
11764 3
            self::strtocasefold($str, true, false, $encoding, null, false),
11765 3
            self::strtocasefold($substring, true, false, $encoding, null, false),
11766
            $encoding
11767
        );
11768
    }
11769
11770
    /**
11771
     * Removes a prefix ($needle) from the beginning of the string ($haystack), case-insensitive.
11772
     *
11773
     * EXMAPLE: <code>
11774
     * UTF8::substr_ileft('ΚόσμεMiddleEnd', 'Κόσμε'); // 'MiddleEnd'
11775
     * UTF8::substr_ileft('ΚόσμεMiddleEnd', 'κόσμε'); // 'MiddleEnd'
11776
     * </code>
11777
     *
11778
     * @param string $haystack <p>The string to search in.</p>
11779
     * @param string $needle   <p>The substring to search for.</p>
11780
     *
11781
     * @psalm-pure
11782
     *
11783
     * @return string
11784
     *                <p>Return the sub-string.</p>
11785
     */
11786 2
    public static function substr_ileft(string $haystack, string $needle): string
11787
    {
11788 2
        if ($haystack === '') {
11789 2
            return '';
11790
        }
11791
11792 2
        if ($needle === '') {
11793 2
            return $haystack;
11794
        }
11795
11796 2
        if (self::str_istarts_with($haystack, $needle)) {
11797 2
            $haystack = (string) \mb_substr($haystack, (int) self::strlen($needle));
11798
        }
11799
11800 2
        return $haystack;
11801
    }
11802
11803
    /**
11804
     * Get part of a string process in bytes.
11805
     *
11806
     * @param string   $str    <p>The string being checked.</p>
11807
     * @param int      $offset <p>The first position used in str.</p>
11808
     * @param int|null $length [optional] <p>The maximum length of the returned string.</p>
11809
     *
11810
     * @psalm-pure
11811
     *
11812
     * @return false|string
11813
     *                      The portion of <i>str</i> specified by the <i>offset</i> and
11814
     *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
11815
     *                      characters long, <b>FALSE</b> will be returned.
11816
     */
11817 1
    public static function substr_in_byte(string $str, int $offset = 0, int $length = null)
11818
    {
11819
        // empty string
11820 1
        if ($str === '' || $length === 0) {
11821
            return '';
11822
        }
11823
11824
        // whole string
11825 1
        if (!$offset && $length === null) {
11826
            return $str;
11827
        }
11828
11829 1
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
11830
            // "mb_" is available if overload is used, so use it ...
11831
            return \mb_substr($str, $offset, $length, 'CP850'); // 8-BIT
11832
        }
11833
11834 1
        return \substr($str, $offset, $length ?? 2147483647);
11835
    }
11836
11837
    /**
11838
     * Removes a suffix ($needle) from the end of the string ($haystack), case-insensitive.
11839
     *
11840
     * EXAMPLE: <code>
11841
     * UTF8::substr_iright('BeginMiddleΚόσμε', 'Κόσμε'); // 'BeginMiddle'
11842
     * UTF8::substr_iright('BeginMiddleΚόσμε', 'κόσμε'); // 'BeginMiddle'
11843
     * </code>
11844
     *
11845
     * @param string $haystack <p>The string to search in.</p>
11846
     * @param string $needle   <p>The substring to search for.</p>
11847
     *
11848
     * @psalm-pure
11849
     *
11850
     * @return string
11851
     *                <p>Return the sub-string.<p>
11852
     */
11853 2
    public static function substr_iright(string $haystack, string $needle): string
11854
    {
11855 2
        if ($haystack === '') {
11856 2
            return '';
11857
        }
11858
11859 2
        if ($needle === '') {
11860 2
            return $haystack;
11861
        }
11862
11863 2
        if (self::str_iends_with($haystack, $needle)) {
11864 2
            $haystack = (string) \mb_substr($haystack, 0, (int) self::strlen($haystack) - (int) self::strlen($needle));
11865
        }
11866
11867 2
        return $haystack;
11868
    }
11869
11870
    /**
11871
     * Removes a prefix ($needle) from the beginning of the string ($haystack).
11872
     *
11873
     * EXAMPLE: <code>
11874
     * UTF8::substr_left('ΚόσμεMiddleEnd', 'Κόσμε'); // 'MiddleEnd'
11875
     * UTF8::substr_left('ΚόσμεMiddleEnd', 'κόσμε'); // 'ΚόσμεMiddleEnd'
11876
     * </code>
11877
     *
11878
     * @param string $haystack <p>The string to search in.</p>
11879
     * @param string $needle   <p>The substring to search for.</p>
11880
     *
11881
     * @psalm-pure
11882
     *
11883
     * @return string
11884
     *                <p>Return the sub-string.</p>
11885
     */
11886 2
    public static function substr_left(string $haystack, string $needle): string
11887
    {
11888 2
        if ($haystack === '') {
11889 2
            return '';
11890
        }
11891
11892 2
        if ($needle === '') {
11893 2
            return $haystack;
11894
        }
11895
11896 2
        if (self::str_starts_with($haystack, $needle)) {
11897 2
            $haystack = (string) \mb_substr($haystack, (int) self::strlen($needle));
11898
        }
11899
11900 2
        return $haystack;
11901
    }
11902
11903
    /**
11904
     * Replace text within a portion of a string.
11905
     *
11906
     * EXAMPLE: <code>UTF8::substr_replace(array('Iñtërnâtiônàlizætiøn', 'foo'), 'æ', 1); // array('Iæñtërnâtiônàlizætiøn', 'fæoo')</code>
11907
     *
11908
     * source: https://gist.github.com/stemar/8287074
11909
     *
11910
     * @param string|string[] $str         <p>The input string or an array of stings.</p>
11911
     * @param string|string[] $replacement <p>The replacement string or an array of stings.</p>
11912
     * @param int|int[]       $offset      <p>
11913
     *                                     If start is positive, the replacing will begin at the start'th offset
11914
     *                                     into string.
11915
     *                                     <br><br>
11916
     *                                     If start is negative, the replacing will begin at the start'th character
11917
     *                                     from the end of string.
11918
     *                                     </p>
11919
     * @param int|int[]|null  $length      [optional] <p>If given and is positive, it represents the length of the
11920
     *                                     portion of string which is to be replaced. If it is negative, it
11921
     *                                     represents the number of characters from the end of string at which to
11922
     *                                     stop replacing. If it is not given, then it will default to strlen(
11923
     *                                     string ); i.e. end the replacing at the end of string. Of course, if
11924
     *                                     length is zero then this function will have the effect of inserting
11925
     *                                     replacement into string at the given start offset.</p>
11926
     * @param string          $encoding    [optional] <p>Set the charset for e.g. "mb_" function</p>
11927
     *
11928
     * @psalm-pure
11929
     *
11930
     * @return string|string[]
11931
     *                         <p>The result string is returned. If string is an array then array is returned.</p>
11932
     *
11933
     * @template TSubstrReplace
11934
     * @phpstan-param TSubstrReplace $str
11935
     * @phpstan-return TSubstrReplace
11936
     */
11937 10
    public static function substr_replace(
11938
        $str,
11939
        $replacement,
11940
        $offset,
11941
        $length = null,
11942
        string $encoding = 'UTF-8'
11943
    ) {
11944 10
        if (\is_array($str)) {
11945 1
            $num = \count($str);
11946
11947
            // the replacement
11948 1
            if (\is_array($replacement)) {
11949 1
                $replacement = \array_slice($replacement, 0, $num);
11950
            } else {
11951 1
                $replacement = \array_pad([$replacement], $num, $replacement);
11952
            }
11953
11954
            // the offset
11955 1
            if (\is_array($offset)) {
11956 1
                $offset = \array_slice($offset, 0, $num);
11957 1
                foreach ($offset as &$value_tmp) {
11958 1
                    $value_tmp = (int) $value_tmp === $value_tmp ? $value_tmp : 0;
11959
                }
11960 1
                unset($value_tmp);
11961
            } else {
11962 1
                $offset = \array_pad([$offset], $num, $offset);
11963
            }
11964
11965
            // the length
11966 1
            if ($length === null) {
11967 1
                $length = \array_fill(0, $num, 0);
11968 1
            } elseif (\is_array($length)) {
11969 1
                $length = \array_slice($length, 0, $num);
11970 1
                foreach ($length as &$value_tmp_V2) {
11971 1
                    $value_tmp_V2 = (int) $value_tmp_V2 === $value_tmp_V2 ? $value_tmp_V2 : $num;
11972
                }
11973 1
                unset($value_tmp_V2);
11974
            } else {
11975 1
                $length = \array_pad([$length], $num, $length);
11976
            }
11977
11978
            // recursive call
11979
            /** @phpstan-ignore-next-line - phpstan currently can't handle recursive calls */
11980 1
            return \array_map([self::class, 'substr_replace'], $str, $replacement, $offset, $length);
11981
        }
11982
11983 10
        if (\is_array($replacement)) {
11984 1
            if ($replacement !== []) {
11985 1
                $replacement = $replacement[0];
11986
            } else {
11987 1
                $replacement = '';
11988
            }
11989
        }
11990
11991
        // init
11992 10
        $str = (string) $str;
11993 10
        $replacement = (string) $replacement;
11994
11995 10
        if (\is_array($length)) {
11996
            throw new \InvalidArgumentException('Parameter "$length" can only be an array, if "$str" is also an array.');
11997
        }
11998
11999 10
        if (\is_array($offset)) {
12000
            throw new \InvalidArgumentException('Parameter "$offset" can only be an array, if "$str" is also an array.');
12001
        }
12002
12003 10
        if ($str === '') {
12004 5
            return $replacement;
12005
        }
12006
12007 9
        if (self::$SUPPORT['mbstring'] === true) {
12008 9
            $string_length = (int) self::strlen($str, $encoding);
12009
12010 9
            if ($offset < 0) {
12011 1
                $offset = (int) \max(0, $string_length + $offset);
12012 9
            } elseif ($offset > $string_length) {
12013 1
                $offset = $string_length;
12014
            }
12015
12016 9
            if ($length !== null && $length < 0) {
12017 1
                $length = (int) \max(0, $string_length - $offset + $length);
12018 9
            } elseif ($length === null || $length > $string_length) {
12019 4
                $length = $string_length;
12020
            }
12021
12022 9
            if (($offset + $length) > $string_length) {
12023 4
                $length = $string_length - $offset;
12024
            }
12025
12026 9
            return ((string) \mb_substr($str, 0, $offset, $encoding)) .
12027
                   $replacement .
12028 9
                   ((string) \mb_substr($str, $offset + $length, $string_length - $offset - $length, $encoding));
12029
        }
12030
12031
        //
12032
        // fallback for ascii only
12033
        //
12034
12035
        if (ASCII::is_ascii($str)) {
12036
            return ($length === null) ?
12037
                \substr_replace($str, $replacement, $offset) :
12038
                \substr_replace($str, $replacement, $offset, $length);
12039
        }
12040
12041
        //
12042
        // fallback via vanilla php
12043
        //
12044
12045
        \preg_match_all('/./us', $str, $str_matches);
12046
        \preg_match_all('/./us', $replacement, $replacement_matches);
12047
12048
        if ($length === null) {
12049
            $length_tmp = self::strlen($str, $encoding);
12050
            if ($length_tmp === false) {
12051
                // e.g.: non mbstring support + invalid chars
12052
                return '';
12053
            }
12054
            $length = $length_tmp;
12055
        }
12056
12057
        \array_splice($str_matches[0], $offset, $length, $replacement_matches[0]);
12058
12059
        return \implode('', $str_matches[0]);
12060
    }
12061
12062
    /**
12063
     * Removes a suffix ($needle) from the end of the string ($haystack).
12064
     *
12065
     * EXAMPLE: <code>
12066
     * UTF8::substr_right('BeginMiddleΚόσμε', 'Κόσμε'); // 'BeginMiddle'
12067
     * UTF8::substr_right('BeginMiddleΚόσμε', 'κόσμε'); // 'BeginMiddleΚόσμε'
12068
     * </code>
12069
     *
12070
     * @param string $haystack <p>The string to search in.</p>
12071
     * @param string $needle   <p>The substring to search for.</p>
12072
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
12073
     *
12074
     * @psalm-pure
12075
     *
12076
     * @return string
12077
     *                <p>Return the sub-string.</p>
12078
     */
12079 2
    public static function substr_right(
12080
        string $haystack,
12081
        string $needle,
12082
        string $encoding = 'UTF-8'
12083
    ): string {
12084 2
        if ($haystack === '') {
12085 2
            return '';
12086
        }
12087
12088 2
        if ($needle === '') {
12089 2
            return $haystack;
12090
        }
12091
12092
        if (
12093 2
            $encoding === 'UTF-8'
12094
            &&
12095 2
            \substr($haystack, -\strlen($needle)) === $needle
12096
        ) {
12097 2
            return (string) \mb_substr($haystack, 0, (int) \mb_strlen($haystack) - (int) \mb_strlen($needle));
12098
        }
12099
12100 2
        if (\substr($haystack, -\strlen($needle)) === $needle) {
12101
            return (string) self::substr(
12102
                $haystack,
12103
                0,
12104
                (int) self::strlen($haystack, $encoding) - (int) self::strlen($needle, $encoding),
12105
                $encoding
12106
            );
12107
        }
12108
12109 2
        return $haystack;
12110
    }
12111
12112
    /**
12113
     * Returns a case swapped version of the string.
12114
     *
12115
     * EXAMPLE: <code>UTF8::swapCase('déJÀ σσς iıII'); // 'DÉjà ΣΣΣ IIii'</code>
12116
     *
12117
     * @param string $str        <p>The input string.</p>
12118
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
12119
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
12120
     *
12121
     * @psalm-pure
12122
     *
12123
     * @return string
12124
     *                <p>Each character's case swapped.</p>
12125
     */
12126 6
    public static function swapCase(string $str, string $encoding = 'UTF-8', bool $clean_utf8 = false): string
12127
    {
12128 6
        if ($str === '') {
12129 1
            return '';
12130
        }
12131
12132 6
        if ($clean_utf8) {
12133
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
12134
            // if invalid characters are found in $haystack before $needle
12135 2
            $str = self::clean($str);
12136
        }
12137
12138 6
        if ($encoding === 'UTF-8') {
12139 4
            return (string) (\mb_strtolower($str) ^ \mb_strtoupper($str) ^ $str);
12140
        }
12141
12142 4
        return (string) (self::strtolower($str, $encoding) ^ self::strtoupper($str, $encoding) ^ $str);
12143
    }
12144
12145
    /**
12146
     * Checks whether symfony-polyfills are used.
12147
     *
12148
     * @psalm-pure
12149
     *
12150
     * @return bool
12151
     *              <p><strong>true</strong> if in use, <strong>false</strong> otherwise</p>
12152
     *
12153
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
12154
     */
12155
    public static function symfony_polyfill_used(): bool
12156
    {
12157
        // init
12158
        $return = false;
12159
12160
        $return_tmp = \extension_loaded('mbstring');
12161
        if (!$return_tmp && \function_exists('mb_strlen')) {
12162
            $return = true;
12163
        }
12164
12165
        $return_tmp = \extension_loaded('iconv');
12166
        if (!$return_tmp && \function_exists('iconv')) {
12167
            $return = true;
12168
        }
12169
12170
        return $return;
12171
    }
12172
12173
    /**
12174
     * @param string $str
12175
     * @param int    $tab_length
12176
     *
12177
     * @psalm-pure
12178
     *
12179
     * @return string
12180
     */
12181 6
    public static function tabs_to_spaces(string $str, int $tab_length = 4): string
12182
    {
12183 6
        if ($tab_length === 4) {
12184 3
            $spaces = '    ';
12185 3
        } elseif ($tab_length === 2) {
12186 1
            $spaces = '  ';
12187
        } else {
12188 2
            $spaces = \str_repeat(' ', $tab_length);
12189
        }
12190
12191 6
        return \str_replace("\t", $spaces, $str);
12192
    }
12193
12194
    /**
12195
     * Converts the first character of each word in the string to uppercase
12196
     * and all other chars to lowercase.
12197
     *
12198
     * @param string      $str                           <p>The input string.</p>
12199
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
12200
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
12201
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
12202
     *                                                   tr</p>
12203
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
12204
     *                                                   -> ß</p>
12205
     *
12206
     * @psalm-pure
12207
     *
12208
     * @return string
12209
     *                <p>A string with all characters of $str being title-cased.</p>
12210
     */
12211 5
    public static function titlecase(
12212
        string $str,
12213
        string $encoding = 'UTF-8',
12214
        bool $clean_utf8 = false,
12215
        string $lang = null,
12216
        bool $try_to_keep_the_string_length = false
12217
    ): string {
12218 5
        if ($clean_utf8) {
12219
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
12220
            // if invalid characters are found in $haystack before $needle
12221
            $str = self::clean($str);
12222
        }
12223
12224
        if (
12225 5
            $lang === null
12226
            &&
12227
            !$try_to_keep_the_string_length
12228
        ) {
12229 5
            if ($encoding === 'UTF-8') {
12230 3
                return \mb_convert_case($str, \MB_CASE_TITLE);
12231
            }
12232
12233 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
12234
12235 2
            return \mb_convert_case($str, \MB_CASE_TITLE, $encoding);
12236
        }
12237
12238
        return self::str_titleize(
12239
            $str,
12240
            null,
12241
            $encoding,
12242
            false,
12243
            $lang,
12244
            $try_to_keep_the_string_length,
12245
            false
12246
        );
12247
    }
12248
12249
    /**
12250
     * Convert a string into ASCII.
12251
     *
12252
     * EXAMPLE: <code>UTF8::to_ascii('déjà σσς iıii'); // 'deja sss iiii'</code>
12253
     *
12254
     * @param string $str     <p>The input string.</p>
12255
     * @param string $unknown [optional] <p>Character use if character unknown. (default is ?)</p>
12256
     * @param bool   $strict  [optional] <p>Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad
12257
     *                        performance</p>
12258
     *
12259
     * @psalm-pure
12260
     *
12261
     * @return string
12262
     */
12263 37
    public static function to_ascii(
12264
        string $str,
12265
        string $unknown = '?',
12266
        bool $strict = false
12267
    ): string {
12268 37
        return ASCII::to_transliterate($str, $unknown, $strict);
12269
    }
12270
12271
    /**
12272
     * @param bool|float|int|string $str
12273
     *
12274
     * @psalm-pure
12275
     *
12276
     * @return bool
12277
     */
12278 25
    public static function to_boolean($str): bool
12279
    {
12280
        // init
12281 25
        $str = (string) $str;
12282
12283 25
        if ($str === '') {
12284 2
            return false;
12285
        }
12286
12287
        // Info: http://php.net/manual/en/filter.filters.validate.php
12288 23
        $map = [
12289
            'true'  => true,
12290
            '1'     => true,
12291
            'on'    => true,
12292
            'yes'   => true,
12293
            'false' => false,
12294
            '0'     => false,
12295
            'off'   => false,
12296
            'no'    => false,
12297
        ];
12298
12299 23
        if (isset($map[$str])) {
12300 13
            return $map[$str];
12301
        }
12302
12303 10
        $key = \strtolower($str);
12304 10
        if (isset($map[$key])) {
12305 2
            return $map[$key];
12306
        }
12307
12308 8
        if (\is_numeric($str)) {
12309 6
            return ((float) $str) > 0;
12310
        }
12311
12312 2
        return (bool) \trim($str);
12313
    }
12314
12315
    /**
12316
     * Convert given string to safe filename (and keep string case).
12317
     *
12318
     * @param string $str
12319
     * @param bool   $use_transliterate No transliteration, conversion etc. is done by default - unsafe characters are
12320
     *                                  simply replaced with hyphen.
12321
     * @param string $fallback_char
12322
     *
12323
     * @psalm-pure
12324
     *
12325
     * @return string
12326
     */
12327 1
    public static function to_filename(
12328
        string $str,
12329
        bool $use_transliterate = false,
12330
        string $fallback_char = '-'
12331
    ): string {
12332 1
        return ASCII::to_filename(
12333
            $str,
12334
            $use_transliterate,
12335
            $fallback_char
12336
        );
12337
    }
12338
12339
    /**
12340
     * Convert a string into "ISO-8859"-encoding (Latin-1).
12341
     *
12342
     * EXAMPLE: <code>UTF8::to_utf8(UTF8::to_iso8859('  -ABC-中文空白-  ')); // '  -ABC-????-  '</code>
12343
     *
12344
     * @param string|string[] $str
12345
     *
12346
     * @psalm-pure
12347
     *
12348
     * @return string|string[]
12349
     *
12350
     * @template TToIso8859
12351
     * @phpstan-param TToIso8859 $str
12352
     * @phpstan-return TToIso8859
12353
     */
12354 8
    public static function to_iso8859($str)
12355
    {
12356 8
        if (\is_array($str)) {
12357 2
            foreach ($str as &$v) {
12358 2
                $v = self::to_iso8859($v);
12359
            }
12360
12361 2
            return $str;
12362
        }
12363
12364 8
        $str = (string) $str;
12365 8
        if ($str === '') {
12366 2
            return '';
12367
        }
12368
12369 8
        return self::utf8_decode($str);
12370
    }
12371
12372
    /**
12373
     * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
12374
     *
12375
     * <ul>
12376
     * <li>It decode UTF-8 codepoints and Unicode escape sequences.</li>
12377
     * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li>
12378
     * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
12379
     * case.</li>
12380
     * </ul>
12381
     *
12382
     * EXAMPLE: <code>UTF8::to_utf8(["\u0063\u0061\u0074"]); // array('cat')</code>
12383
     *
12384
     * @param string|string[] $str                        <p>Any string or array of strings.</p>
12385
     * @param bool            $decode_html_entity_to_utf8 <p>Set to true, if you need to decode html-entities.</p>
12386
     *
12387
     * @psalm-pure
12388
     *
12389
     * @return string|string[]
12390
     *                         <p>The UTF-8 encoded string</p>
12391
     *
12392
     * @template TToUtf8
12393
     * @phpstan-param TToUtf8 $str
12394
     * @phpstan-return TToUtf8
12395
     */
12396 41
    public static function to_utf8($str, bool $decode_html_entity_to_utf8 = false)
12397
    {
12398 41
        if (\is_array($str)) {
12399 4
            foreach ($str as &$v) {
12400 4
                $v = self::to_utf8_string($v, $decode_html_entity_to_utf8);
12401
            }
12402
12403
            /** @phpstan-var TToUtf8 $str */
12404 4
            return $str;
12405
        }
12406
12407
        /** @phpstan-var TToUtf8 $str */
12408 41
        $str = self::to_utf8_string($str, $decode_html_entity_to_utf8);
12409
12410 41
        return $str;
12411
    }
12412
12413
    /**
12414
     * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
12415
     *
12416
     * <ul>
12417
     * <li>It decode UTF-8 codepoints and Unicode escape sequences.</li>
12418
     * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li>
12419
     * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
12420
     * case.</li>
12421
     * </ul>
12422
     *
12423
     * EXAMPLE: <code>UTF8::to_utf8_string("\u0063\u0061\u0074"); // 'cat'</code>
12424
     *
12425
     * @param string $str                        <p>Any string.</p>
12426
     * @param bool   $decode_html_entity_to_utf8 <p>Set to true, if you need to decode html-entities.</p>
12427
     *
12428
     * @psalm-pure
12429
     *
12430
     * @return string
12431
     *                <p>The UTF-8 encoded string</p>
12432
     */
12433 41
    public static function to_utf8_string(string $str, bool $decode_html_entity_to_utf8 = false): string
12434
    {
12435 41
        if ($str === '') {
12436 7
            return $str;
12437
        }
12438
12439 41
        $max = \strlen($str);
12440 41
        $buf = '';
12441
12442 41
        for ($i = 0; $i < $max; ++$i) {
12443 41
            $c1 = $str[$i];
12444
12445 41
            if ($c1 >= "\xC0") { // should be converted to UTF8, if it's not UTF8 already
12446
12447 37
                if ($c1 <= "\xDF") { // looks like 2 bytes UTF8
12448
12449 34
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
12450
12451 34
                    if ($c2 >= "\x80" && $c2 <= "\xBF") { // yeah, almost sure it's UTF8 already
12452 20
                        $buf .= $c1 . $c2;
12453 20
                        ++$i;
12454
                    } else { // not valid UTF8 - convert it
12455 34
                        $buf .= self::to_utf8_convert_helper($c1);
12456
                    }
12457 34
                } elseif ($c1 >= "\xE0" && $c1 <= "\xEF") { // looks like 3 bytes UTF8
12458
12459 34
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
12460 34
                    $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
12461
12462 34
                    if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF") { // yeah, almost sure it's UTF8 already
12463 17
                        $buf .= $c1 . $c2 . $c3;
12464 17
                        $i += 2;
12465
                    } else { // not valid UTF8 - convert it
12466 34
                        $buf .= self::to_utf8_convert_helper($c1);
12467
                    }
12468 27
                } elseif ($c1 >= "\xF0" && $c1 <= "\xF7") { // looks like 4 bytes UTF8
12469
12470 27
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
12471 27
                    $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
12472 27
                    $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
12473
12474 27
                    if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF" && $c4 >= "\x80" && $c4 <= "\xBF") { // yeah, almost sure it's UTF8 already
12475 10
                        $buf .= $c1 . $c2 . $c3 . $c4;
12476 10
                        $i += 3;
12477
                    } else { // not valid UTF8 - convert it
12478 27
                        $buf .= self::to_utf8_convert_helper($c1);
12479
                    }
12480
                } else { // doesn't look like UTF8, but should be converted
12481
12482 37
                    $buf .= self::to_utf8_convert_helper($c1);
12483
                }
12484 39
            } elseif (($c1 & "\xC0") === "\x80") { // needs conversion
12485
12486 3
                $buf .= self::to_utf8_convert_helper($c1);
12487
            } else { // it doesn't need conversion
12488
12489 39
                $buf .= $c1;
12490
            }
12491
        }
12492
12493
        // decode unicode escape sequences + unicode surrogate pairs
12494 41
        $buf = \preg_replace_callback(
12495
            '/\\\\u([dD][89abAB][0-9a-fA-F]{2})\\\\u([dD][cdefCDEF][\da-fA-F]{2})|\\\\u([0-9a-fA-F]{4})/',
12496
            /**
12497
             * @param array $matches
12498
             *
12499
             * @psalm-pure
12500
             *
12501
             * @return string
12502
             */
12503 41
            static function (array $matches): string {
12504 13
                if (isset($matches[3])) {
12505 13
                    $cp = (int) \hexdec($matches[3]);
12506
                } else {
12507
                    // http://unicode.org/faq/utf_bom.html#utf16-4
12508 1
                    $cp = ((int) \hexdec($matches[1]) << 10)
12509 1
                          + (int) \hexdec($matches[2])
12510
                          + 0x10000
12511 1
                          - (0xD800 << 10)
12512
                          - 0xDC00;
12513
                }
12514
12515
                // https://github.com/php/php-src/blob/php-7.3.2/ext/standard/html.c#L471
12516
                //
12517
                // php_utf32_utf8(unsigned char *buf, unsigned k)
12518
12519 13
                if ($cp < 0x80) {
12520 8
                    return (string) self::chr($cp);
12521
                }
12522
12523 10
                if ($cp < 0xA0) {
12524
                    /** @noinspection UnnecessaryCastingInspection */
12525
                    return (string) self::chr(0xC0 | $cp >> 6) . (string) self::chr(0x80 | $cp & 0x3F);
12526
                }
12527
12528 10
                return self::decimal_to_chr($cp);
12529
            },
12530
            $buf
12531
        );
12532
12533 41
        if ($buf === null) {
12534
            return '';
12535
        }
12536
12537
        // decode UTF-8 codepoints
12538 41
        if ($decode_html_entity_to_utf8) {
12539 3
            $buf = self::html_entity_decode($buf);
12540
        }
12541
12542 41
        return $buf;
12543
    }
12544
12545
    /**
12546
     * Returns the given string as an integer, or null if the string isn't numeric.
12547
     *
12548
     * @param string $str
12549
     *
12550
     * @psalm-pure
12551
     *
12552
     * @return int|null
12553
     *                  <p>null if the string isn't numeric</p>
12554
     */
12555 1
    public static function to_int(string $str)
12556
    {
12557 1
        if (\is_numeric($str)) {
12558 1
            return (int) $str;
12559
        }
12560
12561 1
        return null;
12562
    }
12563
12564
    /**
12565
     * Returns the given input as string, or null if the input isn't int|float|string
12566
     * and do not implement the "__toString()" method.
12567
     *
12568
     * @param float|int|object|string|null $input
12569
     *
12570
     * @psalm-pure
12571
     *
12572
     * @return string|null
12573
     *                     <p>null if the input isn't int|float|string and has no "__toString()" method</p>
12574
     */
12575 1
    public static function to_string($input)
12576
    {
12577 1
        if ($input === null) {
12578
            return null;
12579
        }
12580
12581
        /** @var string $input_type - hack for psalm */
12582 1
        $input_type = \gettype($input);
12583
12584
        if (
12585 1
            $input_type === 'string'
12586
            ||
12587 1
            $input_type === 'integer'
12588
            ||
12589 1
            $input_type === 'float'
12590
            ||
12591 1
            $input_type === 'double'
12592
        ) {
12593 1
            return (string) $input;
12594
        }
12595
12596
        /** @phpstan-ignore-next-line - "gettype": FP? */
12597 1
        if ($input_type === 'object' && \method_exists($input, '__toString')) {
12598 1
            return (string) $input;
12599
        }
12600
12601 1
        return null;
12602
    }
12603
12604
    /**
12605
     * Strip whitespace or other characters from the beginning and end of a UTF-8 string.
12606
     *
12607
     * INFO: This is slower then "trim()"
12608
     *
12609
     * We can only use the original-function, if we use <= 7-Bit in the string / chars
12610
     * but the check for ASCII (7-Bit) cost more time, then we can safe here.
12611
     *
12612
     * EXAMPLE: <code>UTF8::trim('   -ABC-中文空白-  '); // '-ABC-中文空白-'</code>
12613
     *
12614
     * @param string      $str   <p>The string to be trimmed</p>
12615
     * @param string|null $chars [optional] <p>Optional characters to be stripped</p>
12616
     *
12617
     * @psalm-pure
12618
     *
12619
     * @return string
12620
     *                <p>The trimmed string.</p>
12621
     */
12622 57
    public static function trim(string $str = '', string $chars = null): string
12623
    {
12624 57
        if ($str === '') {
12625 9
            return '';
12626
        }
12627
12628 50
        if (self::$SUPPORT['mbstring'] === true) {
12629 50
            if ($chars !== null) {
12630
                /** @noinspection PregQuoteUsageInspection */
12631 28
                $chars = \preg_quote($chars);
12632 28
                $pattern = "^[{$chars}]+|[{$chars}]+\$";
12633
            } else {
12634 22
                $pattern = '^[\\s]+|[\\s]+$';
12635
            }
12636
12637 50
            return (string) \mb_ereg_replace($pattern, '', $str);
12638
        }
12639
12640 8
        if ($chars !== null) {
12641
            $chars = \preg_quote($chars, '/');
12642
            $pattern = "^[{$chars}]+|[{$chars}]+\$";
12643
        } else {
12644 8
            $pattern = '^[\\s]+|[\\s]+$';
12645
        }
12646
12647 8
        return self::regex_replace($str, $pattern, '');
12648
    }
12649
12650
    /**
12651
     * Makes string's first char uppercase.
12652
     *
12653
     * EXAMPLE: <code>UTF8::ucfirst('ñtërnâtiônàlizætiøn foo'); // 'Ñtërnâtiônàlizætiøn foo'</code>
12654
     *
12655
     * @param string      $str                           <p>The input string.</p>
12656
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
12657
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
12658
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
12659
     *                                                   tr</p>
12660
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
12661
     *                                                   -> ß</p>
12662
     *
12663
     * @psalm-pure
12664
     *
12665
     * @return string
12666
     *                <p>The resulting string with with char uppercase.</p>
12667
     */
12668 69
    public static function ucfirst(
12669
        string $str,
12670
        string $encoding = 'UTF-8',
12671
        bool $clean_utf8 = false,
12672
        string $lang = null,
12673
        bool $try_to_keep_the_string_length = false
12674
    ): string {
12675 69
        if ($str === '') {
12676 3
            return '';
12677
        }
12678
12679 68
        if ($clean_utf8) {
12680
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
12681
            // if invalid characters are found in $haystack before $needle
12682 1
            $str = self::clean($str);
12683
        }
12684
12685 68
        $use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
12686
12687 68
        if ($encoding === 'UTF-8') {
12688 22
            $str_part_two = (string) \mb_substr($str, 1);
12689
12690 22
            if ($use_mb_functions) {
12691 22
                $str_part_one = \mb_strtoupper(
12692 22
                    (string) \mb_substr($str, 0, 1)
12693
                );
12694
            } else {
12695 22
                $str_part_one = self::strtoupper(
12696
                    (string) \mb_substr($str, 0, 1),
12697
                    $encoding,
12698
                    false,
12699
                    $lang,
12700
                    $try_to_keep_the_string_length
12701
                );
12702
            }
12703
        } else {
12704 47
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
12705
12706 47
            $str_part_two = (string) self::substr($str, 1, null, $encoding);
12707
12708 47
            if ($use_mb_functions) {
12709 47
                $str_part_one = \mb_strtoupper(
12710 47
                    (string) \mb_substr($str, 0, 1, $encoding),
12711
                    $encoding
12712
                );
12713
            } else {
12714
                $str_part_one = self::strtoupper(
12715
                    (string) self::substr($str, 0, 1, $encoding),
12716
                    $encoding,
12717
                    false,
12718
                    $lang,
12719
                    $try_to_keep_the_string_length
12720
                );
12721
            }
12722
        }
12723
12724 68
        return $str_part_one . $str_part_two;
12725
    }
12726
12727
    /**
12728
     * Uppercase for all words in the string.
12729
     *
12730
     * EXAMPLE: <code>UTF8::ucwords('iñt ërn âTi ônà liz æti øn'); // 'Iñt Ërn ÂTi Ônà Liz Æti Øn'</code>
12731
     *
12732
     * @param string   $str        <p>The input string.</p>
12733
     * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
12734
     * @param string   $char_list  [optional] <p>Additional chars that contains to words and do not start a new
12735
     *                             word.</p>
12736
     * @param string   $encoding   [optional] <p>Set the charset.</p>
12737
     * @param bool     $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
12738
     *
12739
     * @psalm-pure
12740
     *
12741
     * @return string
12742
     */
12743 9
    public static function ucwords(
12744
        string $str,
12745
        array $exceptions = [],
12746
        string $char_list = '',
12747
        string $encoding = 'UTF-8',
12748
        bool $clean_utf8 = false
12749
    ): string {
12750 9
        if (!$str) {
12751 2
            return '';
12752
        }
12753
12754
        // INFO: mb_convert_case($str, MB_CASE_TITLE);
12755
        // -> MB_CASE_TITLE didn't only uppercase the first letter, it also lowercase all other letters
12756
12757 8
        if ($clean_utf8) {
12758
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
12759
            // if invalid characters are found in $haystack before $needle
12760 1
            $str = self::clean($str);
12761
        }
12762
12763 8
        $use_php_default_functions = !(bool) ($char_list . \implode('', $exceptions));
12764
12765
        if (
12766 8
            $use_php_default_functions
12767
            &&
12768 8
            ASCII::is_ascii($str)
12769
        ) {
12770
            return \ucwords($str);
12771
        }
12772
12773 8
        $words = self::str_to_words($str, $char_list);
12774 8
        $use_exceptions = $exceptions !== [];
12775
12776 8
        $words_str = '';
12777 8
        foreach ($words as &$word) {
12778 8
            if (!$word) {
12779 8
                continue;
12780
            }
12781
12782
            if (
12783 8
                !$use_exceptions
12784
                ||
12785 8
                !\in_array($word, $exceptions, true)
12786
            ) {
12787 8
                $words_str .= self::ucfirst($word, $encoding);
12788
            } else {
12789 1
                $words_str .= $word;
12790
            }
12791
        }
12792
12793 8
        return $words_str;
12794
    }
12795
12796
    /**
12797
     * Multi decode HTML entity + fix urlencoded-win1252-chars.
12798
     *
12799
     * EXAMPLE: <code>UTF8::urldecode('tes%20öäü%20\u00edtest+test'); // 'tes öäü ítest test'</code>
12800
     *
12801
     * e.g:
12802
     * 'test+test'                     => 'test test'
12803
     * 'D&#252;sseldorf'               => 'Düsseldorf'
12804
     * 'D%FCsseldorf'                  => 'Düsseldorf'
12805
     * 'D&#xFC;sseldorf'               => 'Düsseldorf'
12806
     * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
12807
     * 'Düsseldorf'                   => 'Düsseldorf'
12808
     * 'D%C3%BCsseldorf'               => 'Düsseldorf'
12809
     * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
12810
     * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
12811
     *
12812
     * @param string $str          <p>The input string.</p>
12813
     * @param bool   $multi_decode <p>Decode as often as possible.</p>
12814
     *
12815
     * @psalm-pure
12816
     *
12817
     * @return string
12818
     */
12819 4
    public static function urldecode(string $str, bool $multi_decode = true): string
12820
    {
12821 4
        if ($str === '') {
12822 3
            return '';
12823
        }
12824
12825 4
        $str = self::urldecode_unicode_helper($str);
12826
12827 4
        if ($multi_decode) {
12828
            do {
12829 3
                $str_compare = $str;
12830
12831
                /**
12832
                 * @psalm-suppress PossiblyInvalidArgument
12833
                 */
12834 3
                $str = \urldecode(
12835 3
                    self::html_entity_decode(
12836 3
                        self::to_utf8($str),
12837 3
                        \ENT_QUOTES | \ENT_HTML5
12838
                    )
12839
                );
12840 3
            } while ($str_compare !== $str);
12841
        } else {
12842
            /**
12843
             * @psalm-suppress PossiblyInvalidArgument
12844
             */
12845 1
            $str = \urldecode(
12846 1
                self::html_entity_decode(
12847 1
                    self::to_utf8($str),
12848 1
                    \ENT_QUOTES | \ENT_HTML5
12849
                )
12850
            );
12851
        }
12852
12853 4
        return self::fix_simple_utf8($str);
12854
    }
12855
12856
    /**
12857
     * Decodes a UTF-8 string to ISO-8859-1.
12858
     *
12859
     * EXAMPLE: <code>UTF8::encode('UTF-8', UTF8::utf8_decode('-ABC-中文空白-')); // '-ABC-????-'</code>
12860
     *
12861
     * @param string $str             <p>The input string.</p>
12862
     * @param bool   $keep_utf8_chars
12863
     *
12864
     * @psalm-pure
12865
     *
12866
     * @return string
12867
     */
12868 14
    public static function utf8_decode(string $str, bool $keep_utf8_chars = false): string
12869
    {
12870 14
        if ($str === '') {
12871 6
            return '';
12872
        }
12873
12874
        // save for later comparision
12875 14
        $str_backup = $str;
12876 14
        $len = \strlen($str);
12877
12878 14
        if (self::$ORD === null) {
12879
            self::$ORD = self::getData('ord');
12880
        }
12881
12882 14
        if (self::$CHR === null) {
12883
            self::$CHR = self::getData('chr');
12884
        }
12885
12886 14
        $no_char_found = '?';
12887 14
        for ($i = 0, $j = 0; $i < $len; ++$i, ++$j) {
12888 14
            switch ($str[$i] & "\xF0") {
12889 14
                case "\xC0":
12890 13
                case "\xD0":
12891 13
                    $c = (self::$ORD[$str[$i] & "\x1F"] << 6) | self::$ORD[$str[++$i] & "\x3F"];
12892 13
                    $str[$j] = $c < 256 ? self::$CHR[$c] : $no_char_found;
12893
12894 13
                    break;
12895
12896 13
                case "\xF0":
12897
                    ++$i;
12898
12899
                // no break
12900
12901 13
                case "\xE0":
12902 11
                    $str[$j] = $no_char_found;
12903 11
                    $i += 2;
12904
12905 11
                    break;
12906
12907
                default:
12908 12
                    $str[$j] = $str[$i];
12909
            }
12910
        }
12911
12912
        /** @var false|string $return - needed for PhpStan (stubs error) */
12913 14
        $return = \substr($str, 0, $j);
12914 14
        if ($return === false) {
12915
            $return = '';
12916
        }
12917
12918
        if (
12919 14
            $keep_utf8_chars
12920
            &&
12921 14
            (int) self::strlen($return) >= (int) self::strlen($str_backup)
12922
        ) {
12923 2
            return $str_backup;
12924
        }
12925
12926 14
        return $return;
12927
    }
12928
12929
    /**
12930
     * Encodes an ISO-8859-1 string to UTF-8.
12931
     *
12932
     * EXAMPLE: <code>UTF8::utf8_decode(UTF8::utf8_encode('-ABC-中文空白-')); // '-ABC-中文空白-'</code>
12933
     *
12934
     * @param string $str <p>The input string.</p>
12935
     *
12936
     * @psalm-pure
12937
     *
12938
     * @return string
12939
     */
12940 16
    public static function utf8_encode(string $str): string
12941
    {
12942 16
        if ($str === '') {
12943 14
            return '';
12944
        }
12945
12946
        /** @var false|string $str - the polyfill maybe return false */
12947 16
        $str = \utf8_encode($str);
0 ignored issues
show
Bug introduced by
It seems like $str can also be of type false; however, parameter $string of utf8_encode() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

12947
        $str = \utf8_encode(/** @scrutinizer ignore-type */ $str);
Loading history...
12948
12949 16
        if ($str === false) {
12950
            return '';
12951
        }
12952
12953 16
        return $str;
12954
    }
12955
12956
    /**
12957
     * Returns an array with all utf8 whitespace characters.
12958
     *
12959
     * @see http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
12960
     *
12961
     * @psalm-pure
12962
     *
12963
     * @return string[]
12964
     *                  An array with all known whitespace characters as values and the type of whitespace as keys
12965
     *                  as defined in above URL
12966
     */
12967 2
    public static function whitespace_table(): array
12968
    {
12969 2
        return self::$WHITESPACE_TABLE;
12970
    }
12971
12972
    /**
12973
     * Limit the number of words in a string.
12974
     *
12975
     * EXAMPLE: <code>UTF8::words_limit('fòô bàř fòô', 2, ''); // 'fòô bàř'</code>
12976
     *
12977
     * @param string $str        <p>The input string.</p>
12978
     * @param int    $limit      <p>The limit of words as integer.</p>
12979
     * @param string $str_add_on <p>Replacement for the striped string.</p>
12980
     *
12981
     * @psalm-pure
12982
     *
12983
     * @return string
12984
     */
12985 2
    public static function words_limit(
12986
        string $str,
12987
        int $limit = 100,
12988
        string $str_add_on = '…'
12989
    ): string {
12990 2
        if ($str === '' || $limit < 1) {
12991 2
            return '';
12992
        }
12993
12994 2
        \preg_match('/^\\s*+(?:[^\\s]++\\s*+){1,' . $limit . '}/u', $str, $matches);
12995
12996
        if (
12997 2
            !isset($matches[0])
12998
            ||
12999 2
            \mb_strlen($str) === (int) \mb_strlen($matches[0])
13000
        ) {
13001 2
            return $str;
13002
        }
13003
13004 2
        return \rtrim($matches[0]) . $str_add_on;
13005
    }
13006
13007
    /**
13008
     * Wraps a string to a given number of characters
13009
     *
13010
     * EXAMPLE: <code>UTF8::wordwrap('Iñtërnâtiônàlizætiøn', 2, '<br>', true)); // 'Iñ<br>të<br>rn<br>ât<br>iô<br>nà<br>li<br>zæ<br>ti<br>øn'</code>
13011
     *
13012
     * @see http://php.net/manual/en/function.wordwrap.php
13013
     *
13014
     * @param string $str   <p>The input string.</p>
13015
     * @param int    $width [optional] <p>The column width.</p>
13016
     * @param string $break [optional] <p>The line is broken using the optional break parameter.</p>
13017
     * @param bool   $cut   [optional] <p>
13018
     *                      If the cut is set to true, the string is
13019
     *                      always wrapped at or before the specified width. So if you have
13020
     *                      a word that is larger than the given width, it is broken apart.
13021
     *                      </p>
13022
     *
13023
     * @psalm-pure
13024
     *
13025
     * @return string
13026
     *                <p>The given string wrapped at the specified column.</p>
13027
     */
13028 12
    public static function wordwrap(
13029
        string $str,
13030
        int $width = 75,
13031
        string $break = "\n",
13032
        bool $cut = false
13033
    ): string {
13034 12
        if ($str === '' || $break === '') {
13035 4
            return '';
13036
        }
13037
13038 10
        $str_split = \explode($break, $str);
13039
13040
        /** @var string[] $charsArray */
13041 10
        $charsArray = [];
13042 10
        $word_split = '';
13043 10
        foreach ($str_split as $i => $i_value) {
13044 10
            if ($i) {
13045 3
                $charsArray[] = $break;
13046 3
                $word_split .= '#';
13047
            }
13048
13049 10
            foreach (self::str_split($i_value) as $c) {
13050 10
                $charsArray[] = $c;
13051 10
                if ($c === ' ') {
13052 3
                    $word_split .= ' ';
13053
                } else {
13054 10
                    $word_split .= '?';
13055
                }
13056
            }
13057
        }
13058
13059 10
        $str_return = '';
13060 10
        $j = 0;
13061 10
        $b = -1;
13062 10
        $i = -1;
13063 10
        $word_split = \wordwrap($word_split, $width, '#', $cut);
13064
13065 10
        $max = \mb_strlen($word_split);
13066
        /** @noinspection PhpAssignmentInConditionInspection - is ok here */
13067 10
        while (($b = \mb_strpos($word_split, '#', $b + 1)) !== false) {
13068 8
            for (++$i; $i < $b; ++$i) {
13069 8
                if (isset($charsArray[$j])) {
13070 8
                    $str_return .= $charsArray[$j];
13071 8
                    unset($charsArray[$j]);
13072
                }
13073 8
                ++$j;
13074
13075
                // prevent endless loop, e.g. if there is a error in the "mb_*" polyfill
13076 8
                if ($i > $max) {
13077
                    break 2;
13078
                }
13079
            }
13080
13081
            if (
13082 8
                $break === $charsArray[$j]
13083
                ||
13084 8
                $charsArray[$j] === ' '
13085
            ) {
13086 5
                unset($charsArray[$j++]);
13087
            }
13088
13089 8
            $str_return .= $break;
13090
13091
            // prevent endless loop, e.g. if there is a error in the "mb_*" polyfill
13092 8
            if ($b > $max) {
13093
                break;
13094
            }
13095
        }
13096
13097 10
        return $str_return . \implode('', $charsArray);
13098
    }
13099
13100
    /**
13101
     * Line-Wrap the string after $limit, but split the string by "$delimiter" before ...
13102
     *    ... so that we wrap the per line.
13103
     *
13104
     * @param string      $str             <p>The input string.</p>
13105
     * @param int         $width           [optional] <p>The column width.</p>
13106
     * @param string      $break           [optional] <p>The line is broken using the optional break parameter.</p>
13107
     * @param bool        $cut             [optional] <p>
13108
     *                                     If the cut is set to true, the string is
13109
     *                                     always wrapped at or before the specified width. So if you have
13110
     *                                     a word that is larger than the given width, it is broken apart.
13111
     *                                     </p>
13112
     * @param bool        $add_final_break [optional] <p>
13113
     *                                     If this flag is true, then the method will add a $break at the end
13114
     *                                     of the result string.
13115
     *                                     </p>
13116
     * @param string|null $delimiter       [optional] <p>
13117
     *                                     You can change the default behavior, where we split the string by newline.
13118
     *                                     </p>
13119
     *
13120
     * @psalm-pure
13121
     *
13122
     * @return string
13123
     */
13124 1
    public static function wordwrap_per_line(
13125
        string $str,
13126
        int $width = 75,
13127
        string $break = "\n",
13128
        bool $cut = false,
13129
        bool $add_final_break = true,
13130
        string $delimiter = null
13131
    ): string {
13132 1
        if ($delimiter === null) {
13133 1
            $strings = \preg_split('/\\r\\n|\\r|\\n/', $str);
13134
        } else {
13135 1
            $strings = \explode($delimiter, $str);
13136
        }
13137
13138 1
        $string_helper_array = [];
13139 1
        if ($strings !== false) {
13140 1
            foreach ($strings as $value) {
13141 1
                $string_helper_array[] = self::wordwrap($value, $width, $break, $cut);
13142
            }
13143
        }
13144
13145 1
        if ($add_final_break) {
13146 1
            $final_break = $break;
13147
        } else {
13148 1
            $final_break = '';
13149
        }
13150
13151 1
        return \implode($delimiter ?? "\n", $string_helper_array) . $final_break;
13152
    }
13153
13154
    /**
13155
     * Returns an array of Unicode White Space characters.
13156
     *
13157
     * @psalm-pure
13158
     *
13159
     * @return string[]
13160
     *                  <p>An array with numeric code point as key and White Space Character as value.</p>
13161
     */
13162 2
    public static function ws(): array
13163
    {
13164 2
        return self::$WHITESPACE;
13165
    }
13166
13167
    /**
13168
     * Checks whether the passed string contains only byte sequences that are valid UTF-8 characters.
13169
     *
13170
     * EXAMPLE: <code>
13171
     * UTF8::is_utf8_string('Iñtërnâtiônàlizætiøn']); // true
13172
     * //
13173
     * UTF8::is_utf8_string("Iñtërnâtiônàlizætiøn\xA0\xA1"); // false
13174
     * </code>
13175
     *
13176
     * @see          http://hsivonen.iki.fi/php-utf8/
13177
     *
13178
     * @param string $str    <p>The string to be checked.</p>
13179
     * @param bool   $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
13180
     *
13181
     * @psalm-pure
13182
     *
13183
     * @return bool
13184
     */
13185 110
    private static function is_utf8_string(string $str, bool $strict = false)
13186
    {
13187 110
        if ($str === '') {
13188 15
            return true;
13189
        }
13190
13191 103
        if ($strict) {
13192 2
            $is_binary = self::is_binary($str, true);
13193
13194 2
            if ($is_binary && self::is_utf16($str, false) !== false) {
13195 2
                return false;
13196
            }
13197
13198 2
            if ($is_binary && self::is_utf32($str, false) !== false) {
13199
                return false;
13200
            }
13201
        }
13202
13203 103
        if (self::$SUPPORT['pcre_utf8']) {
13204
            // If even just the first character can be matched, when the /u
13205
            // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
13206
            // invalid, nothing at all will match, even if the string contains
13207
            // some valid sequences
13208 103
            return \preg_match('/^./us', $str) === 1;
13209
        }
13210
13211 2
        $mState = 0; // cached expected number of octets after the current octet
13212
        // until the beginning of the next UTF8 character sequence
13213 2
        $mUcs4 = 0; // cached Unicode character
13214 2
        $mBytes = 1; // cached expected number of octets in the current sequence
13215
13216 2
        if (self::$ORD === null) {
13217
            self::$ORD = self::getData('ord');
13218
        }
13219
13220 2
        $len = \strlen($str);
13221 2
        for ($i = 0; $i < $len; ++$i) {
13222 2
            $in = self::$ORD[$str[$i]];
13223
13224 2
            if ($mState === 0) {
13225
                // When mState is zero we expect either a US-ASCII character or a
13226
                // multi-octet sequence.
13227 2
                if ((0x80 & $in) === 0) {
13228
                    // US-ASCII, pass straight through.
13229 2
                    $mBytes = 1;
13230 2
                } elseif ((0xE0 & $in) === 0xC0) {
13231
                    // First octet of 2 octet sequence.
13232 2
                    $mUcs4 = $in;
13233 2
                    $mUcs4 = ($mUcs4 & 0x1F) << 6;
13234 2
                    $mState = 1;
13235 2
                    $mBytes = 2;
13236 2
                } elseif ((0xF0 & $in) === 0xE0) {
13237
                    // First octet of 3 octet sequence.
13238 2
                    $mUcs4 = $in;
13239 2
                    $mUcs4 = ($mUcs4 & 0x0F) << 12;
13240 2
                    $mState = 2;
13241 2
                    $mBytes = 3;
13242
                } elseif ((0xF8 & $in) === 0xF0) {
13243
                    // First octet of 4 octet sequence.
13244
                    $mUcs4 = $in;
13245
                    $mUcs4 = ($mUcs4 & 0x07) << 18;
13246
                    $mState = 3;
13247
                    $mBytes = 4;
13248
                } elseif ((0xFC & $in) === 0xF8) {
13249
                    /* First octet of 5 octet sequence.
13250
                     *
13251
                     * This is illegal because the encoded codepoint must be either
13252
                     * (a) not the shortest form or
13253
                     * (b) outside the Unicode range of 0-0x10FFFF.
13254
                     * Rather than trying to resynchronize, we will carry on until the end
13255
                     * of the sequence and let the later error handling code catch it.
13256
                     */
13257
                    $mUcs4 = $in;
13258
                    $mUcs4 = ($mUcs4 & 0x03) << 24;
13259
                    $mState = 4;
13260
                    $mBytes = 5;
13261
                } elseif ((0xFE & $in) === 0xFC) {
13262
                    // First octet of 6 octet sequence, see comments for 5 octet sequence.
13263
                    $mUcs4 = $in;
13264
                    $mUcs4 = ($mUcs4 & 1) << 30;
13265
                    $mState = 5;
13266
                    $mBytes = 6;
13267
                } else {
13268
                    // Current octet is neither in the US-ASCII range nor a legal first
13269
                    // octet of a multi-octet sequence.
13270 2
                    return false;
13271
                }
13272 2
            } elseif ((0xC0 & $in) === 0x80) {
13273
13274
                // When mState is non-zero, we expect a continuation of the multi-octet
13275
                // sequence
13276
13277
                // Legal continuation.
13278 2
                $shift = ($mState - 1) * 6;
13279 2
                $tmp = $in;
13280 2
                $tmp = ($tmp & 0x0000003F) << $shift;
13281 2
                $mUcs4 |= $tmp;
13282
                // Prefix: End of the multi-octet sequence. mUcs4 now contains the final
13283
                // Unicode code point to be output.
13284 2
                if (--$mState === 0) {
13285
                    // Check for illegal sequences and code points.
13286
                    //
13287
                    // From Unicode 3.1, non-shortest form is illegal
13288
                    if (
13289 2
                        ($mBytes === 2 && $mUcs4 < 0x0080)
13290
                        ||
13291 2
                        ($mBytes === 3 && $mUcs4 < 0x0800)
13292
                        ||
13293 2
                        ($mBytes === 4 && $mUcs4 < 0x10000)
13294
                        ||
13295 2
                        ($mBytes > 4)
13296
                        ||
13297
                        // From Unicode 3.2, surrogate characters are illegal.
13298 2
                        (($mUcs4 & 0xFFFFF800) === 0xD800)
13299
                        ||
13300
                        // Code points outside the Unicode range are illegal.
13301 2
                        ($mUcs4 > 0x10FFFF)
13302
                    ) {
13303
                        return false;
13304
                    }
13305
                    // initialize UTF8 cache
13306 2
                    $mState = 0;
13307 2
                    $mUcs4 = 0;
13308 2
                    $mBytes = 1;
13309
                }
13310
            } else {
13311
                // ((0xC0 & (*in) != 0x80) && (mState != 0))
13312
                // Incomplete multi-octet sequence.
13313
                return false;
13314
            }
13315
        }
13316
13317 2
        return $mState === 0;
13318
    }
13319
13320
    /**
13321
     * @param string $str
13322
     * @param bool   $use_lowercase      <p>Use uppercase by default, otherwise use lowercase.</p>
13323
     * @param bool   $use_full_case_fold <p>Convert not only common cases.</p>
13324
     *
13325
     * @psalm-pure
13326
     *
13327
     * @return string
13328
     */
13329 33
    private static function fixStrCaseHelper(
13330
        string $str,
13331
        bool $use_lowercase = false,
13332
        bool $use_full_case_fold = false
13333
    ) {
13334 33
        $upper = self::$COMMON_CASE_FOLD['upper'];
13335 33
        $lower = self::$COMMON_CASE_FOLD['lower'];
13336
13337 33
        if ($use_lowercase) {
13338 2
            $str = \str_replace(
13339
                $upper,
13340
                $lower,
13341
                $str
13342
            );
13343
        } else {
13344 31
            $str = \str_replace(
13345
                $lower,
13346
                $upper,
13347
                $str
13348
            );
13349
        }
13350
13351 33
        if ($use_full_case_fold) {
13352
            /**
13353
             * @psalm-suppress ImpureStaticVariable
13354
             *
13355
             * @var array<mixed>|null
13356
             */
13357
            static $FULL_CASE_FOLD = null;
13358 31
            if ($FULL_CASE_FOLD === null) {
13359 1
                $FULL_CASE_FOLD = self::getData('caseFolding_full');
13360
            }
13361
13362 31
            if ($use_lowercase) {
13363 2
                $str = \str_replace($FULL_CASE_FOLD[0], $FULL_CASE_FOLD[1], $str);
13364
            } else {
13365 29
                $str = \str_replace($FULL_CASE_FOLD[1], $FULL_CASE_FOLD[0], $str);
13366
            }
13367
        }
13368
13369 33
        return $str;
13370
    }
13371
13372
    /**
13373
     * get data from "/data/*.php"
13374
     *
13375
     * @param string $file
13376
     *
13377
     * @psalm-pure
13378
     *
13379
     * @return array
13380
     */
13381 7
    private static function getData(string $file)
13382
    {
13383
        /** @noinspection PhpIncludeInspection */
13384
        /** @noinspection UsingInclusionReturnValueInspection */
13385
        /** @psalm-suppress UnresolvableInclude */
13386 7
        return include __DIR__ . '/data/' . $file . '.php';
13387
    }
13388
13389
    /**
13390
     * @psalm-pure
13391
     *
13392
     * @return true|null
13393
     */
13394 1
    private static function initEmojiData()
13395
    {
13396 1
        if (self::$EMOJI_KEYS_CACHE === null) {
13397 1
            if (self::$EMOJI === null) {
13398 1
                self::$EMOJI = self::getData('emoji');
13399
            }
13400
13401
            /**
13402
             * @psalm-suppress ImpureFunctionCall - static sort function is used
13403
             */
13404 1
            \uksort(
13405 1
                self::$EMOJI,
13406 1
                static function (string $a, string $b): int {
13407 1
                    return \strlen($b) <=> \strlen($a);
13408
                }
13409
            );
13410
13411 1
            self::$EMOJI_KEYS_CACHE = \array_keys(self::$EMOJI);
13412 1
            self::$EMOJI_VALUES_CACHE = self::$EMOJI;
13413
13414 1
            foreach (self::$EMOJI_KEYS_CACHE as $key) {
13415 1
                $tmp_key = \crc32($key);
13416 1
                self::$EMOJI_KEYS_REVERSIBLE_CACHE[] = '_-_PORTABLE_UTF8_-_' . $tmp_key . '_-_' . \strrev((string) $tmp_key) . '_-_8FTU_ELBATROP_-_';
13417
            }
13418
13419 1
            return true;
13420
        }
13421
13422
        return null;
13423
    }
13424
13425
    /**
13426
     * Checks whether mbstring "overloaded" is active on the server.
13427
     *
13428
     * @psalm-pure
13429
     *
13430
     * @return bool
13431
     */
13432
    private static function mbstring_overloaded(): bool
13433
    {
13434
        /**
13435
         * INI directive 'mbstring.func_overload' is deprecated since PHP 7.2
13436
         */
13437
13438
        /** @noinspection PhpComposerExtensionStubsInspection */
13439
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
13440
        /** @noinspection DeprecatedIniOptionsInspection */
13441
        return \defined('MB_OVERLOAD_STRING')
13442
               &&
13443
               ((int) @\ini_get('mbstring.func_overload') & \MB_OVERLOAD_STRING);
13444
    }
13445
13446
    /**
13447
     * @param string[] $strings
13448
     * @param bool     $remove_empty_values
13449
     * @param int|null $remove_short_values
13450
     *
13451
     * @psalm-pure
13452
     *
13453
     * @return string[]
13454
     */
13455 2
    private static function reduce_string_array(
13456
        array $strings,
13457
        bool $remove_empty_values,
13458
        int $remove_short_values = null
13459
    ) {
13460
        // init
13461 2
        $return = [];
13462
13463 2
        foreach ($strings as &$str) {
13464
            if (
13465 2
                $remove_short_values !== null
13466
                &&
13467 2
                \mb_strlen($str) <= $remove_short_values
13468
            ) {
13469 2
                continue;
13470
            }
13471
13472
            if (
13473 2
                $remove_empty_values
13474
                &&
13475 2
                \trim($str) === ''
13476
            ) {
13477 2
                continue;
13478
            }
13479
13480 2
            $return[] = $str;
13481
        }
13482
13483 2
        return $return;
13484
    }
13485
13486
    /**
13487
     * rxClass
13488
     *
13489
     * @param string $s
13490
     * @param string $class
13491
     *
13492
     * @return string
13493
     *                *
13494
     * @psalm-pure
13495
     */
13496 36
    private static function rxClass(string $s, string $class = '')
13497
    {
13498
        /**
13499
         * @psalm-suppress ImpureStaticVariable
13500
         *
13501
         * @var array<string,string>
13502
         */
13503 36
        static $RX_CLASS_CACHE = [];
13504
13505 36
        $cache_key = $s . '_' . $class;
13506
13507 36
        if (isset($RX_CLASS_CACHE[$cache_key])) {
13508 24
            return $RX_CLASS_CACHE[$cache_key];
13509
        }
13510
13511 16
        $class_array[] = $class;
0 ignored issues
show
Comprehensibility Best Practice introduced by
$class_array was never initialized. Although not strictly required by PHP, it is generally a good practice to add $class_array = array(); before regardless.
Loading history...
13512
13513
        /** @noinspection SuspiciousLoopInspection */
13514
        /** @noinspection AlterInForeachInspection */
13515 16
        foreach (self::str_split($s) as &$s) {
0 ignored issues
show
Bug introduced by
The expression self::str_split($s) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
13516 15
            if ($s === '-') {
13517
                $class_array[0] = '-' . $class_array[0];
13518 15
            } elseif (!isset($s[2])) {
13519 15
                $class_array[0] .= \preg_quote($s, '/');
13520 1
            } elseif (self::strlen($s) === 1) {
13521 1
                $class_array[0] .= $s;
13522
            } else {
13523
                $class_array[] = $s;
13524
            }
13525
        }
13526
13527 16
        if ($class_array[0]) {
13528 16
            $class_array[0] = '[' . $class_array[0] . ']';
13529
        }
13530
13531 16
        if (\count($class_array) === 1) {
13532 16
            $return = $class_array[0];
13533
        } else {
13534
            $return = '(?:' . \implode('|', $class_array) . ')';
13535
        }
13536
13537 16
        $RX_CLASS_CACHE[$cache_key] = $return;
13538
13539 16
        return $return;
13540
    }
13541
13542
    /**
13543
     * Personal names such as "Marcus Aurelius" are sometimes typed incorrectly using lowercase ("marcus aurelius").
13544
     *
13545
     * @param string $names
13546
     * @param string $delimiter
13547
     * @param string $encoding
13548
     *
13549
     * @psalm-pure
13550
     *
13551
     * @return string
13552
     */
13553 1
    private static function str_capitalize_name_helper(
13554
        string $names,
13555
        string $delimiter,
13556
        string $encoding = 'UTF-8'
13557
    ) {
13558
        // init
13559 1
        $name_helper_array = \explode($delimiter, $names);
13560 1
        if ($name_helper_array === false) {
13561
            return '';
13562
        }
13563
13564 1
        $special_cases = [
13565
            'names' => [
13566
                'ab',
13567
                'af',
13568
                'al',
13569
                'and',
13570
                'ap',
13571
                'bint',
13572
                'binte',
13573
                'da',
13574
                'de',
13575
                'del',
13576
                'den',
13577
                'der',
13578
                'di',
13579
                'dit',
13580
                'ibn',
13581
                'la',
13582
                'mac',
13583
                'nic',
13584
                'of',
13585
                'ter',
13586
                'the',
13587
                'und',
13588
                'van',
13589
                'von',
13590
                'y',
13591
                'zu',
13592
            ],
13593
            'prefixes' => [
13594
                'al-',
13595
                "d'",
13596
                'ff',
13597
                "l'",
13598
                'mac',
13599
                'mc',
13600
                'nic',
13601
            ],
13602
        ];
13603
13604 1
        foreach ($name_helper_array as &$name) {
13605 1
            if (\in_array($name, $special_cases['names'], true)) {
13606 1
                continue;
13607
            }
13608
13609 1
            $continue = false;
13610
13611 1
            if ($delimiter === '-') {
13612 1
                foreach ((array) $special_cases['names'] as &$beginning) {
13613 1
                    if (\strncmp($name, $beginning, \strlen($beginning)) === 0) {
13614 1
                        $continue = true;
13615
13616 1
                        break;
13617
                    }
13618
                }
13619 1
                unset($beginning);
13620
            }
13621
13622 1
            foreach ((array) $special_cases['prefixes'] as &$beginning) {
13623 1
                if (\strncmp($name, $beginning, \strlen($beginning)) === 0) {
13624 1
                    $continue = true;
13625
13626 1
                    break;
13627
                }
13628
            }
13629 1
            unset($beginning);
13630
13631 1
            if ($continue) {
13632 1
                continue;
13633
            }
13634
13635 1
            $name = self::ucfirst($name, $encoding);
13636
        }
13637
13638 1
        return \implode($delimiter, $name_helper_array);
13639
    }
13640
13641
    /**
13642
     * Generic case-sensitive transformation for collation matching.
13643
     *
13644
     * @param string $str <p>The input string</p>
13645
     *
13646
     * @psalm-pure
13647
     *
13648
     * @return string|null
13649
     */
13650 6
    private static function strtonatfold(string $str)
13651
    {
13652 6
        $str = \Normalizer::normalize($str, \Normalizer::NFD);
13653 6
        if ($str === false) {
13654 2
            return '';
13655
        }
13656
13657 6
        return \preg_replace(
13658
            '/\p{Mn}+/u',
13659
            '',
13660
            $str
13661
        );
13662
    }
13663
13664
    /**
13665
     * @param int|string $input
13666
     *
13667
     * @psalm-pure
13668
     *
13669
     * @return string
13670
     */
13671 29
    private static function to_utf8_convert_helper($input)
13672
    {
13673
        // init
13674 29
        $buf = '';
13675
13676 29
        if (self::$ORD === null) {
13677
            self::$ORD = self::getData('ord');
13678
        }
13679
13680 29
        if (self::$CHR === null) {
13681
            self::$CHR = self::getData('chr');
13682
        }
13683
13684 29
        if (self::$WIN1252_TO_UTF8 === null) {
13685 1
            self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
13686
        }
13687
13688 29
        $ordC1 = self::$ORD[$input];
13689 29
        if (isset(self::$WIN1252_TO_UTF8[$ordC1])) { // found in Windows-1252 special cases
13690 29
            $buf .= self::$WIN1252_TO_UTF8[$ordC1];
13691
        } else {
13692
            /** @noinspection OffsetOperationsInspection */
13693
            $cc1 = self::$CHR[$ordC1 / 64] | "\xC0";
13694
            $cc2 = ((string) $input & "\x3F") | "\x80";
0 ignored issues
show
Bug introduced by
Are you sure you want to use the bitwise | or did you mean ||?
Loading history...
Bug introduced by
Are you sure you want to use the bitwise & or did you mean &&?
Loading history...
13695
            $buf .= $cc1 . $cc2;
13696
        }
13697
13698 29
        return $buf;
13699
    }
13700
13701
    /**
13702
     * @param string $str
13703
     *
13704
     * @psalm-pure
13705
     *
13706
     * @return string
13707
     */
13708 9
    private static function urldecode_unicode_helper(string $str)
13709
    {
13710 9
        if (\strpos($str, '%u') === false) {
13711 9
            return $str;
13712
        }
13713
13714 7
        $pattern = '/%u([0-9a-fA-F]{3,4})/';
13715 7
        if (\preg_match($pattern, $str)) {
13716 7
            $str = (string) \preg_replace($pattern, '&#x\\1;', $str);
13717
        }
13718
13719 7
        return $str;
13720
    }
13721
}
13722