Passed
Push — master ( 794406...61f22d )
by Lars
32:22 queued 29:46
created

UTF8::iconv_loaded()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 3
Code Lines 1

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 2

Importance

Changes 2
Bugs 0 Features 0
Metric Value
cc 1
eloc 1
c 2
b 0
f 0
nc 1
nop 0
dl 0
loc 3
ccs 0
cts 2
cp 0
crap 2
rs 10
1
<?php
2
3
declare(strict_types=1);
4
5
namespace voku\helper;
6
7
/**
8
 * @psalm-immutable
9
 */
10
final class UTF8
11
{
12
    /**
13
     * Bom => Byte-Length
14
     *
15
     * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
16
     *
17
     * @var array<string, int>
18
     */
19
    private static $BOM = [
20
        "\xef\xbb\xbf"     => 3, // UTF-8 BOM
21
        ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
22
        "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
23
        '  þÿ'             => 6, // UTF-32 (BE) BOM as "WINDOWS-1252"
24
        "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
25
        'ÿþ  '             => 6, // UTF-32 (LE) BOM as "WINDOWS-1252"
26
        "\xfe\xff"         => 2, // UTF-16 (BE) BOM
27
        'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
28
        "\xff\xfe"         => 2, // UTF-16 (LE) BOM
29
        'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
30
    ];
31
32
    /**
33
     * Numeric code point => UTF-8 Character
34
     *
35
     * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
36
     *
37
     * @var array<int, string>
38
     */
39
    private static $WHITESPACE = [
40
        // NULL Byte
41
        0 => "\x0",
42
        // Tab
43
        9 => "\x9",
44
        // New Line
45
        10 => "\xa",
46
        // Vertical Tab
47
        11 => "\xb",
48
        // Carriage Return
49
        13 => "\xd",
50
        // Ordinary Space
51
        32 => "\x20",
52
        // NO-BREAK SPACE
53
        160 => "\xc2\xa0",
54
        // OGHAM SPACE MARK
55
        5760 => "\xe1\x9a\x80",
56
        // MONGOLIAN VOWEL SEPARATOR
57
        6158 => "\xe1\xa0\x8e",
58
        // EN QUAD
59
        8192 => "\xe2\x80\x80",
60
        // EM QUAD
61
        8193 => "\xe2\x80\x81",
62
        // EN SPACE
63
        8194 => "\xe2\x80\x82",
64
        // EM SPACE
65
        8195 => "\xe2\x80\x83",
66
        // THREE-PER-EM SPACE
67
        8196 => "\xe2\x80\x84",
68
        // FOUR-PER-EM SPACE
69
        8197 => "\xe2\x80\x85",
70
        // SIX-PER-EM SPACE
71
        8198 => "\xe2\x80\x86",
72
        // FIGURE SPACE
73
        8199 => "\xe2\x80\x87",
74
        // PUNCTUATION SPACE
75
        8200 => "\xe2\x80\x88",
76
        // THIN SPACE
77
        8201 => "\xe2\x80\x89",
78
        // HAIR SPACE
79
        8202 => "\xe2\x80\x8a",
80
        // LINE SEPARATOR
81
        8232 => "\xe2\x80\xa8",
82
        // PARAGRAPH SEPARATOR
83
        8233 => "\xe2\x80\xa9",
84
        // NARROW NO-BREAK SPACE
85
        8239 => "\xe2\x80\xaf",
86
        // MEDIUM MATHEMATICAL SPACE
87
        8287 => "\xe2\x81\x9f",
88
        // HALFWIDTH HANGUL FILLER
89
        65440 => "\xef\xbe\xa0",
90
        // IDEOGRAPHIC SPACE
91
        12288 => "\xe3\x80\x80",
92
    ];
93
94
    /**
95
     * @var array<string, string>
96
     */
97
    private static $WHITESPACE_TABLE = [
98
        'SPACE'                     => "\x20",
99
        'NO-BREAK SPACE'            => "\xc2\xa0",
100
        'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
101
        'EN QUAD'                   => "\xe2\x80\x80",
102
        'EM QUAD'                   => "\xe2\x80\x81",
103
        'EN SPACE'                  => "\xe2\x80\x82",
104
        'EM SPACE'                  => "\xe2\x80\x83",
105
        'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
106
        'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
107
        'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
108
        'FIGURE SPACE'              => "\xe2\x80\x87",
109
        'PUNCTUATION SPACE'         => "\xe2\x80\x88",
110
        'THIN SPACE'                => "\xe2\x80\x89",
111
        'HAIR SPACE'                => "\xe2\x80\x8a",
112
        'LINE SEPARATOR'            => "\xe2\x80\xa8",
113
        'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
114
        'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
115
        'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
116
        'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
117
        'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
118
        'HALFWIDTH HANGUL FILLER'   => "\xef\xbe\xa0",
119
    ];
120
121
    /**
122
     * @var array
123
     *
124
     * @phpstan-var array{upper: string[], lower: string[]}
125
     */
126
    private static $COMMON_CASE_FOLD = [
127
        'upper' => [
128
            'µ',
129
            'ſ',
130
            "\xCD\x85",
131
            'ς',
132
            'ẞ',
133
            "\xCF\x90",
134
            "\xCF\x91",
135
            "\xCF\x95",
136
            "\xCF\x96",
137
            "\xCF\xB0",
138
            "\xCF\xB1",
139
            "\xCF\xB5",
140
            "\xE1\xBA\x9B",
141
            "\xE1\xBE\xBE",
142
        ],
143
        'lower' => [
144
            'μ',
145
            's',
146
            'ι',
147
            'σ',
148
            'ß',
149
            'β',
150
            'θ',
151
            'φ',
152
            'π',
153
            'κ',
154
            'ρ',
155
            'ε',
156
            "\xE1\xB9\xA1",
157
            'ι',
158
        ],
159
    ];
160
161
    /**
162
     * @var array
163
     *
164
     * @phpstan-var array<string, mixed>
165
     */
166
    private static $SUPPORT = [];
167
168
    /**
169
     * @var string[]|null
170
     *
171
     * @phpstan-var array<string, string>|null
172
     */
173
    private static $BROKEN_UTF8_FIX;
174
175
    /**
176
     * @var string[]|null
177
     *
178
     * @phpstan-var array<int, string>|null
179
     */
180
    private static $WIN1252_TO_UTF8;
181
182
    /**
183
     * @var string[]|null
184
     *
185
     * @phpstan-var array<int ,string>|null
186
     */
187
    private static $INTL_TRANSLITERATOR_LIST;
188
189
    /**
190
     * @var string[]|null
191
     *
192
     * @phpstan-var array<string>|null
193
     */
194
    private static $ENCODINGS;
195
196
    /**
197
     * @var int[]|null
198
     *
199
     * @phpstan-var array<string ,int>|null
200
     */
201
    private static $ORD;
202
203
    /**
204
     * @var string[]|null
205
     *
206
     * @phpstan-var array<string, string>|null
207
     */
208
    private static $EMOJI;
209
210
    /**
211
     * @var string[]|null
212
     *
213
     * @phpstan-var array<string>|null
214
     */
215
    private static $EMOJI_VALUES_CACHE;
216
217
    /**
218
     * @var string[]|null
219
     *
220
     * @phpstan-var array<string>|null
221
     */
222
    private static $EMOJI_KEYS_CACHE;
223
224
    /**
225
     * @var string[]|null
226
     *
227
     * @phpstan-var array<string>|null
228
     */
229
    private static $EMOJI_KEYS_REVERSIBLE_CACHE;
230
231
    /**
232
     * @var string[]|null
233
     *
234
     * @phpstan-var array<int, string>|null
235
     */
236
    private static $CHR;
237
238
    /**
239
     * __construct()
240
     */
241 34
    public function __construct()
242
    {
243 34
    }
244
245
    /**
246
     * Return the character at the specified position: $str[1] like functionality.
247
     *
248
     * EXAMPLE: <code>UTF8::access('fòô', 1); // 'ò'</code>
249
     *
250
     * @param string $str      <p>A UTF-8 string.</p>
251
     * @param int    $pos      <p>The position of character to return.</p>
252
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
253
     *
254
     * @psalm-pure
255
     *
256
     * @return string
257
     *                <p>Single multi-byte character.</p>
258
     */
259 3
    public static function access(string $str, int $pos, string $encoding = 'UTF-8'): string
260
    {
261 3
        if ($str === '' || $pos < 0) {
262 2
            return '';
263
        }
264
265 3
        if ($encoding === 'UTF-8') {
266 3
            return (string) \mb_substr($str, $pos, 1);
267
        }
268
269
        return (string) self::substr($str, $pos, 1, $encoding);
270
    }
271
272
    /**
273
     * Prepends UTF-8 BOM character to the string and returns the whole string.
274
     *
275
     * INFO: If BOM already existed there, the Input string is returned.
276
     *
277
     * EXAMPLE: <code>UTF8::add_bom_to_string('fòô'); // "\xEF\xBB\xBF" . 'fòô'</code>
278
     *
279
     * @param string $str <p>The input string.</p>
280
     *
281
     * @psalm-pure
282
     *
283
     * @return string
284
     *                <p>The output string that contains BOM.</p>
285
     */
286 2
    public static function add_bom_to_string(string $str): string
287
    {
288 2
        if (!self::string_has_bom($str)) {
289 2
            $str = self::bom() . $str;
290
        }
291
292 2
        return $str;
293
    }
294
295
    /**
296
     * Changes all keys in an array.
297
     *
298
     * @param array<string, mixed> $array    <p>The array to work on</p>
299
     * @param int                  $case     [optional] <p> Either <strong>CASE_UPPER</strong><br>
300
     *                                       or <strong>CASE_LOWER</strong> (default)</p>
301
     * @param string               $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
302
     *
303
     * @psalm-pure
304
     *
305
     * @return string[]
306
     *                  <p>An array with its keys lower- or uppercased.</p>
307
     */
308 2
    public static function array_change_key_case(
309
        array $array,
310
        int $case = \CASE_LOWER,
311
        string $encoding = 'UTF-8'
312
    ): array {
313
        if (
314 2
            $case !== \CASE_LOWER
315
            &&
316 2
            $case !== \CASE_UPPER
317
        ) {
318
            $case = \CASE_LOWER;
319
        }
320
321 2
        $return = [];
322 2
        foreach ($array as $key => &$value) {
323 2
            $key = $case === \CASE_LOWER
324 2
                ? self::strtolower($key, $encoding)
325 2
                : self::strtoupper($key, $encoding);
326
327 2
            $return[$key] = $value;
328
        }
329
330 2
        return $return;
331
    }
332
333
    /**
334
     * Returns the substring between $start and $end, if found, or an empty
335
     * string. An optional offset may be supplied from which to begin the
336
     * search for the start string.
337
     *
338
     * @param string $str
339
     * @param string $start    <p>Delimiter marking the start of the substring.</p>
340
     * @param string $end      <p>Delimiter marking the end of the substring.</p>
341
     * @param int    $offset   [optional] <p>Index from which to begin the search. Default: 0</p>
342
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
343
     *
344
     * @psalm-pure
345
     *
346
     * @return string
347
     */
348 16
    public static function between(
349
        string $str,
350
        string $start,
351
        string $end,
352
        int $offset = 0,
353
        string $encoding = 'UTF-8'
354
    ): string {
355 16
        if ($encoding === 'UTF-8') {
356 8
            $start_position = \mb_strpos($str, $start, $offset);
357 8
            if ($start_position === false) {
358 1
                return '';
359
            }
360
361 7
            $substr_index = $start_position + (int) \mb_strlen($start);
362 7
            $end_position = \mb_strpos($str, $end, $substr_index);
363
            if (
364 7
                $end_position === false
365
                ||
366 7
                $end_position === $substr_index
367
            ) {
368 2
                return '';
369
            }
370
371 5
            return (string) \mb_substr($str, $substr_index, $end_position - $substr_index);
372
        }
373
374 8
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
375
376 8
        $start_position = self::strpos($str, $start, $offset, $encoding);
377 8
        if ($start_position === false) {
378 1
            return '';
379
        }
380
381 7
        $substr_index = $start_position + (int) self::strlen($start, $encoding);
382 7
        $end_position = self::strpos($str, $end, $substr_index, $encoding);
383
        if (
384 7
            $end_position === false
385
            ||
386 7
            $end_position === $substr_index
387
        ) {
388 2
            return '';
389
        }
390
391 5
        return (string) self::substr(
392 5
            $str,
393 5
            $substr_index,
394 5
            $end_position - $substr_index,
395 5
            $encoding
396
        );
397
    }
398
399
    /**
400
     * Convert binary into a string.
401
     *
402
     * INFO: opposite to UTF8::str_to_binary()
403
     *
404
     * EXAMPLE: <code>UTF8::binary_to_str('11110000100111111001100010000011'); // '😃'</code>
405
     *
406
     * @param string $bin 1|0
407
     *
408
     * @psalm-pure
409
     *
410
     * @return string
411
     */
412 2
    public static function binary_to_str($bin): string
413
    {
414 2
        if (!isset($bin[0])) {
415
            return '';
416
        }
417
418 2
        $convert = \base_convert($bin, 2, 16);
419 2
        if ($convert === '0') {
420 1
            return '';
421
        }
422
423 2
        return \pack('H*', $convert);
424
    }
425
426
    /**
427
     * Returns the UTF-8 Byte Order Mark Character.
428
     *
429
     * INFO: take a look at UTF8::$bom for e.g. UTF-16 and UTF-32 BOM values
430
     *
431
     * EXAMPLE: <code>UTF8::bom(); // "\xEF\xBB\xBF"</code>
432
     *
433
     * @psalm-pure
434
     *
435
     * @return string
436
     *                <p>UTF-8 Byte Order Mark.</p>
437
     */
438 4
    public static function bom(): string
439
    {
440 4
        return "\xef\xbb\xbf";
441
    }
442
443
    /**
444
     * @alias of UTF8::chr_map()
445
     *
446
     * @param callable $callback
447
     * @param string   $str
448
     *
449
     * @psalm-pure
450
     *
451
     * @return string[]
452
     *
453
     * @see   UTF8::chr_map()
454
     */
455 2
    public static function callback($callback, string $str): array
456
    {
457 2
        return self::chr_map($callback, $str);
458
    }
459
460
    /**
461
     * Returns the character at $index, with indexes starting at 0.
462
     *
463
     * @param string $str      <p>The input string.</p>
464
     * @param int    $index    <p>Position of the character.</p>
465
     * @param string $encoding [optional] <p>Default is UTF-8</p>
466
     *
467
     * @psalm-pure
468
     *
469
     * @return string
470
     *                <p>The character at $index.</p>
471
     */
472 9
    public static function char_at(string $str, int $index, string $encoding = 'UTF-8'): string
473
    {
474 9
        if ($encoding === 'UTF-8') {
475 5
            return (string) \mb_substr($str, $index, 1);
476
        }
477
478 4
        return (string) self::substr($str, $index, 1, $encoding);
479
    }
480
481
    /**
482
     * Returns an array consisting of the characters in the string.
483
     *
484
     * @param string $str <p>The input string.</p>
485
     *
486
     * @psalm-pure
487
     *
488
     * @return string[]
489
     *                  <p>An array of chars.</p>
490
     */
491 4
    public static function chars(string $str): array
492
    {
493
        /** @var string[] */
494 4
        return self::str_split($str);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::str_split($str) returns an array which contains values of type string[] which are incompatible with the documented value type string.
Loading history...
495
    }
496
497
    /**
498
     * This method will auto-detect your server environment for UTF-8 support.
499
     *
500
     * @return true|null
501
     *
502
     * @internal <p>You don't need to run it manually, it will be triggered if it's needed.</p>
503
     */
504 4
    public static function checkForSupport()
505
    {
506 4
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
507
            self::$SUPPORT['already_checked_via_portable_utf8'] = true;
508
509
            // http://php.net/manual/en/book.mbstring.php
510
            self::$SUPPORT['mbstring'] = self::mbstring_loaded();
511
512
            self::$SUPPORT['mbstring_func_overload'] = self::mbstring_overloaded();
513
            if (self::$SUPPORT['mbstring'] === true) {
514
                \mb_internal_encoding('UTF-8');
515
                \mb_regex_encoding('UTF-8');
516
                self::$SUPPORT['mbstring_internal_encoding'] = 'UTF-8';
517
            }
518
519
            // http://php.net/manual/en/book.iconv.php
520
            self::$SUPPORT['iconv'] = self::iconv_loaded();
521
522
            // http://php.net/manual/en/book.intl.php
523
            self::$SUPPORT['intl'] = self::intl_loaded();
524
525
            // http://php.net/manual/en/class.intlchar.php
526
            self::$SUPPORT['intlChar'] = self::intlChar_loaded();
527
528
            // http://php.net/manual/en/book.ctype.php
529
            self::$SUPPORT['ctype'] = self::ctype_loaded();
530
531
            // http://php.net/manual/en/class.finfo.php
532
            self::$SUPPORT['finfo'] = self::finfo_loaded();
533
534
            // http://php.net/manual/en/book.json.php
535
            self::$SUPPORT['json'] = self::json_loaded();
536
537
            // http://php.net/manual/en/book.pcre.php
538
            self::$SUPPORT['pcre_utf8'] = self::pcre_utf8_support();
539
540
            self::$SUPPORT['symfony_polyfill_used'] = self::symfony_polyfill_used();
541
            if (self::$SUPPORT['symfony_polyfill_used'] === true) {
542
                \mb_internal_encoding('UTF-8');
543
                self::$SUPPORT['mbstring_internal_encoding'] = 'UTF-8';
544
            }
545
546
            return true;
547
        }
548
549 4
        return null;
550
    }
551
552
    /**
553
     * Generates a UTF-8 encoded character from the given code point.
554
     *
555
     * INFO: opposite to UTF8::ord()
556
     *
557
     * EXAMPLE: <code>UTF8::chr(0x2603); // '☃'</code>
558
     *
559
     * @param int    $code_point <p>The code point for which to generate a character.</p>
560
     * @param string $encoding   [optional] <p>Default is UTF-8</p>
561
     *
562
     * @psalm-pure
563
     *
564
     * @return string|null
565
     *                     <p>Multi-byte character, returns null on failure or empty input.</p>
566
     */
567 21
    public static function chr($code_point, string $encoding = 'UTF-8')
568
    {
569
        // init
570
        /**
571
         * @psalm-suppress ImpureStaticVariable
572
         *
573
         * @var array<string,string>
574
         */
575 21
        static $CHAR_CACHE = [];
576
577 21
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
578 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
579
        }
580
581
        if (
582 21
            $encoding !== 'UTF-8'
583
            &&
584 21
            $encoding !== 'ISO-8859-1'
585
            &&
586 21
            $encoding !== 'WINDOWS-1252'
587
            &&
588 21
            self::$SUPPORT['mbstring'] === false
589
        ) {
590
            /**
591
             * @psalm-suppress ImpureFunctionCall - is is only a warning
592
             */
593
            \trigger_error('UTF8::chr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
594
        }
595
596 21
        if (!\is_int($code_point) || $code_point <= 0) {
0 ignored issues
show
introduced by
The condition is_int($code_point) is always true.
Loading history...
597 5
            return null;
598
        }
599
600 21
        $cache_key = $code_point . '_' . $encoding;
601 21
        if (isset($CHAR_CACHE[$cache_key])) {
602 19
            return $CHAR_CACHE[$cache_key];
603
        }
604
605 10
        if ($code_point <= 0x80) { // only for "simple"-chars
606
607 9
            if (self::$CHR === null) {
608 1
                self::$CHR = self::getData('chr');
609
            }
610
611
            /**
612
             * @psalm-suppress PossiblyNullArrayAccess
613
             */
614 9
            $chr = self::$CHR[$code_point];
615
616 9
            if ($encoding !== 'UTF-8') {
617 1
                $chr = self::encode($encoding, $chr);
618
            }
619
620 9
            return $CHAR_CACHE[$cache_key] = $chr;
621
        }
622
623
        //
624
        // fallback via "IntlChar"
625
        //
626
627 6
        if (self::$SUPPORT['intlChar'] === true) {
628 6
            $chr = \IntlChar::chr($code_point);
629
630 6
            if ($encoding !== 'UTF-8') {
631
                $chr = self::encode($encoding, $chr);
632
            }
633
634 6
            return $CHAR_CACHE[$cache_key] = $chr;
635
        }
636
637
        //
638
        // fallback via vanilla php
639
        //
640
641
        if (self::$CHR === null) {
642
            self::$CHR = self::getData('chr');
643
        }
644
645
        $code_point = (int) $code_point;
646
        if ($code_point <= 0x7FF) {
647
            /**
648
             * @psalm-suppress PossiblyNullArrayAccess
649
             */
650
            $chr = self::$CHR[($code_point >> 6) + 0xC0] .
651
                   self::$CHR[($code_point & 0x3F) + 0x80];
652
        } elseif ($code_point <= 0xFFFF) {
653
            /**
654
             * @psalm-suppress PossiblyNullArrayAccess
655
             */
656
            $chr = self::$CHR[($code_point >> 12) + 0xE0] .
657
                   self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
658
                   self::$CHR[($code_point & 0x3F) + 0x80];
659
        } else {
660
            /**
661
             * @psalm-suppress PossiblyNullArrayAccess
662
             */
663
            $chr = self::$CHR[($code_point >> 18) + 0xF0] .
664
                   self::$CHR[(($code_point >> 12) & 0x3F) + 0x80] .
665
                   self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
666
                   self::$CHR[($code_point & 0x3F) + 0x80];
667
        }
668
669
        if ($encoding !== 'UTF-8') {
670
            $chr = self::encode($encoding, $chr);
671
        }
672
673
        return $CHAR_CACHE[$cache_key] = $chr;
674
    }
675
676
    /**
677
     * Applies callback to all characters of a string.
678
     *
679
     * EXAMPLE: <code>UTF8::chr_map([UTF8::class, 'strtolower'], 'Κόσμε'); // ['κ','ό', 'σ', 'μ', 'ε']</code>
680
     *
681
     * @param callable $callback <p>The callback function.</p>
682
     * @param string   $str      <p>UTF-8 string to run callback on.</p>
683
     *
684
     * @psalm-pure
685
     *
686
     * @return string[]
687
     *                  <p>The outcome of the callback, as array.</p>
688
     */
689 2
    public static function chr_map($callback, string $str): array
690
    {
691 2
        return \array_map(
692 2
            $callback,
693 2
            self::str_split($str)
694
        );
695
    }
696
697
    /**
698
     * Generates an array of byte length of each character of a Unicode string.
699
     *
700
     * 1 byte => U+0000  - U+007F
701
     * 2 byte => U+0080  - U+07FF
702
     * 3 byte => U+0800  - U+FFFF
703
     * 4 byte => U+10000 - U+10FFFF
704
     *
705
     * EXAMPLE: <code>UTF8::chr_size_list('中文空白-test'); // [3, 3, 3, 3, 1, 1, 1, 1, 1]</code>
706
     *
707
     * @param string $str <p>The original unicode string.</p>
708
     *
709
     * @psalm-pure
710
     *
711
     * @return int[]
712
     *               <p>An array of byte lengths of each character.</p>
713
     */
714 4
    public static function chr_size_list(string $str): array
715
    {
716 4
        if ($str === '') {
717 4
            return [];
718
        }
719
720 4
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
721
            return \array_map(
722
                static function (string $data): int {
723
                    // "mb_" is available if overload is used, so use it ...
724
                    return \mb_strlen($data, 'CP850'); // 8-BIT
725
                },
726
                self::str_split($str)
727
            );
728
        }
729
730 4
        return \array_map('\strlen', self::str_split($str));
731
    }
732
733
    /**
734
     * Get a decimal code representation of a specific character.
735
     *
736
     * INFO: opposite to UTF8::decimal_to_chr()
737
     *
738
     * EXAMPLE: <code>UTF8::chr_to_decimal('§'); // 0xa7</code>
739
     *
740
     * @param string $char <p>The input character.</p>
741
     *
742
     * @psalm-pure
743
     *
744
     * @return int
745
     */
746 5
    public static function chr_to_decimal(string $char): int
747
    {
748 5
        if (self::$SUPPORT['iconv'] === true) {
749 5
            $chr_tmp = \iconv('UTF-8', 'UCS-4LE', $char);
750 5
            if ($chr_tmp !== false) {
751
                /** @phpstan-ignore-next-line - "unpack": only false if the format string contains errors */
752 5
                return \unpack('V', $chr_tmp)[1];
753
            }
754
        }
755
756
        $code = self::ord($char[0]);
757
        $bytes = 1;
758
759
        if (!($code & 0x80)) {
760
            // 0xxxxxxx
761
            return $code;
762
        }
763
764
        if (($code & 0xe0) === 0xc0) {
765
            // 110xxxxx
766
            $bytes = 2;
767
            $code &= ~0xc0;
768
        } elseif (($code & 0xf0) === 0xe0) {
769
            // 1110xxxx
770
            $bytes = 3;
771
            $code &= ~0xe0;
772
        } elseif (($code & 0xf8) === 0xf0) {
773
            // 11110xxx
774
            $bytes = 4;
775
            $code &= ~0xf0;
776
        }
777
778
        for ($i = 2; $i <= $bytes; ++$i) {
779
            // 10xxxxxx
780
            $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
781
        }
782
783
        return $code;
784
    }
785
786
    /**
787
     * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
788
     *
789
     * EXAMPLE: <code>UTF8::chr_to_hex('§'); // U+00a7</code>
790
     *
791
     * @param int|string $char   <p>The input character</p>
792
     * @param string     $prefix [optional]
793
     *
794
     * @psalm-pure
795
     *
796
     * @return string
797
     *                <p>The code point encoded as U+xxxx.</p>
798
     */
799 2
    public static function chr_to_hex($char, string $prefix = 'U+'): string
800
    {
801 2
        if ($char === '') {
802 2
            return '';
803
        }
804
805 2
        if ($char === '&#0;') {
806
            $char = '';
807
        }
808
809 2
        return self::int_to_hex(self::ord((string) $char), $prefix);
810
    }
811
812
    /**
813
     * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
814
     *
815
     * EXAMPLE: <code>UTF8::chunk_split('ABC-ÖÄÜ-中文空白-κόσμε', 3); // "ABC\r\n-ÖÄ\r\nÜ-中\r\n文空白\r\n-κό\r\nσμε"</code>
816
     *
817
     * @param string $body         <p>The original string to be split.</p>
818
     * @param int    $chunk_length [optional] <p>The maximum character length of a chunk.</p>
819
     * @param string $end          [optional] <p>The character(s) to be inserted at the end of each chunk.</p>
820
     *
821
     * @psalm-pure
822
     *
823
     * @return string
824
     *                <p>The chunked string.</p>
825
     */
826 4
    public static function chunk_split(string $body, int $chunk_length = 76, string $end = "\r\n"): string
827
    {
828 4
        return \implode($end, self::str_split($body, $chunk_length));
829
    }
830
831
    /**
832
     * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
833
     *
834
     * EXAMPLE: <code>UTF8::clean("\xEF\xBB\xBF„Abcdef\xc2\xa0\x20…” — 😃 - Düsseldorf", true, true); // '„Abcdef  …” — 😃 - Düsseldorf'</code>
835
     *
836
     * @param string $str                                     <p>The string to be sanitized.</p>
837
     * @param bool   $remove_bom                              [optional] <p>Set to true, if you need to remove
838
     *                                                        UTF-BOM.</p>
839
     * @param bool   $normalize_whitespace                    [optional] <p>Set to true, if you need to normalize the
840
     *                                                        whitespace.</p>
841
     * @param bool   $normalize_msword                        [optional] <p>Set to true, if you need to normalize MS
842
     *                                                        Word chars e.g.: "…"
843
     *                                                        => "..."</p>
844
     * @param bool   $keep_non_breaking_space                 [optional] <p>Set to true, to keep non-breaking-spaces,
845
     *                                                        in
846
     *                                                        combination with
847
     *                                                        $normalize_whitespace</p>
848
     * @param bool   $replace_diamond_question_mark           [optional] <p>Set to true, if you need to remove diamond
849
     *                                                        question mark e.g.: "�"</p>
850
     * @param bool   $remove_invisible_characters             [optional] <p>Set to false, if you not want to remove
851
     *                                                        invisible characters e.g.: "\0"</p>
852
     * @param bool   $remove_invisible_characters_url_encoded [optional] <p>Set to true, if you not want to remove
853
     *                                                        invisible url encoded characters e.g.: "%0B"<br> WARNING:
854
     *                                                        maybe contains false-positives e.g. aa%0Baa -> aaaa.
855
     *                                                        </p>
856
     *
857
     * @psalm-pure
858
     *
859
     * @return string
860
     *                <p>An clean UTF-8 encoded string.</p>
861
     */
862 90
    public static function clean(
863
        string $str,
864
        bool $remove_bom = false,
865
        bool $normalize_whitespace = false,
866
        bool $normalize_msword = false,
867
        bool $keep_non_breaking_space = false,
868
        bool $replace_diamond_question_mark = false,
869
        bool $remove_invisible_characters = true,
870
        bool $remove_invisible_characters_url_encoded = false
871
    ): string {
872
        // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
873
        // caused connection reset problem on larger strings
874
875 90
        $regex = '/
876
          (
877
            (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
878
            |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
879
            |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
880
            |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
881
            ){1,100}                      # ...one or more times
882
          )
883
        | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
884
        | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
885
        /x';
886 90
        $str = (string) \preg_replace($regex, '$1', $str);
887
888 90
        if ($replace_diamond_question_mark) {
889 33
            $str = self::replace_diamond_question_mark($str);
890
        }
891
892 90
        if ($remove_invisible_characters) {
893 90
            $str = self::remove_invisible_characters($str, $remove_invisible_characters_url_encoded);
894
        }
895
896 90
        if ($normalize_whitespace) {
897 37
            $str = self::normalize_whitespace($str, $keep_non_breaking_space);
898
        }
899
900 90
        if ($normalize_msword) {
901 4
            $str = self::normalize_msword($str);
902
        }
903
904 90
        if ($remove_bom) {
905 37
            $str = self::remove_bom($str);
906
        }
907
908 90
        return $str;
909
    }
910
911
    /**
912
     * Clean-up a string and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
913
     *
914
     * EXAMPLE: <code>UTF8::cleanup("\xEF\xBB\xBF„Abcdef\xc2\xa0\x20…” — 😃 - Düsseldorf", true, true); // '„Abcdef  …” — 😃 - Düsseldorf'</code>
915
     *
916
     * @param string $str <p>The input string.</p>
917
     *
918
     * @psalm-pure
919
     *
920
     * @return string
921
     */
922 33
    public static function cleanup($str): string
923
    {
924
        // init
925 33
        $str = (string) $str;
926
927 33
        if ($str === '') {
928 5
            return '';
929
        }
930
931
        // fixed ISO <-> UTF-8 Errors
932 33
        $str = self::fix_simple_utf8($str);
933
934
        // remove all none UTF-8 symbols
935
        // && remove diamond question mark (�)
936
        // && remove remove invisible characters (e.g. "\0")
937
        // && remove BOM
938
        // && normalize whitespace chars (but keep non-breaking-spaces)
939 33
        return self::clean(
940 33
            $str,
941 33
            true,
942 33
            true,
943 33
            false,
944 33
            true,
945 33
            true
946
        );
947
    }
948
949
    /**
950
     * Accepts a string or a array of strings and returns an array of Unicode code points.
951
     *
952
     * INFO: opposite to UTF8::string()
953
     *
954
     * EXAMPLE: <code>
955
     * UTF8::codepoints('κöñ'); // array(954, 246, 241)
956
     * // ... OR ...
957
     * UTF8::codepoints('κöñ', true); // array('U+03ba', 'U+00f6', 'U+00f1')
958
     * </code>
959
     *
960
     * @param string|string[] $arg         <p>A UTF-8 encoded string or an array of such strings.</p>
961
     * @param bool            $use_u_style <p>If True, will return code points in U+xxxx format,
962
     *                                     default, code points will be returned as integers.</p>
963
     *
964
     * @psalm-pure
965
     *
966
     * @return int[]|string[]
967
     *                        <p>
968
     *                        The array of code points:<br>
969
     *                        int[] for $u_style === false<br>
970
     *                        string[] for $u_style === true<br>
971
     *                        </p>
972
     */
973 12
    public static function codepoints($arg, bool $use_u_style = false): array
974
    {
975 12
        if (\is_string($arg)) {
976 12
            $arg = self::str_split($arg);
977
        }
978
979
        /**
980
         * @psalm-suppress DocblockTypeContradiction
981
         */
982 12
        if (!\is_array($arg)) {
0 ignored issues
show
introduced by
The condition is_array($arg) is always true.
Loading history...
983 4
            return [];
984
        }
985
986 12
        if ($arg === []) {
987 7
            return [];
988
        }
989
990 11
        $arg = \array_map(
991
            [
992 11
                self::class,
993
                'ord',
994
            ],
995 11
            $arg
996
        );
997
998 11
        if ($use_u_style) {
999 2
            $arg = \array_map(
1000
                [
1001 2
                    self::class,
1002
                    'int_to_hex',
1003
                ],
1004 2
                $arg
1005
            );
1006
        }
1007
1008 11
        return $arg;
1009
    }
1010
1011
    /**
1012
     * Trims the string and replaces consecutive whitespace characters with a
1013
     * single space. This includes tabs and newline characters, as well as
1014
     * multibyte whitespace such as the thin space and ideographic space.
1015
     *
1016
     * @param string $str <p>The input string.</p>
1017
     *
1018
     * @psalm-pure
1019
     *
1020
     * @return string
1021
     *                <p>A string with trimmed $str and condensed whitespace.</p>
1022
     */
1023 13
    public static function collapse_whitespace(string $str): string
1024
    {
1025 13
        if (self::$SUPPORT['mbstring'] === true) {
1026 13
            return \trim((string) \mb_ereg_replace('[[:space:]]+', ' ', $str));
1027
        }
1028
1029
        return \trim(self::regex_replace($str, '[[:space:]]+', ' '));
1030
    }
1031
1032
    /**
1033
     * Returns count of characters used in a string.
1034
     *
1035
     * EXAMPLE: <code>UTF8::count_chars('κaκbκc'); // array('κ' => 3, 'a' => 1, 'b' => 1, 'c' => 1)</code>
1036
     *
1037
     * @param string $str                     <p>The input string.</p>
1038
     * @param bool   $clean_utf8              [optional] <p>Remove non UTF-8 chars from the string.</p>
1039
     * @param bool   $try_to_use_mb_functions [optional] <p>Set to false, if you don't want to use
1040
     *
1041
     * @psalm-pure
1042
     *
1043
     * @return int[]
1044
     *               <p>An associative array of Character as keys and
1045
     *               their count as values.</p>
1046
     */
1047 19
    public static function count_chars(
1048
        string $str,
1049
        bool $clean_utf8 = false,
1050
        bool $try_to_use_mb_functions = true
1051
    ): array {
1052 19
        return \array_count_values(
1053 19
            self::str_split(
1054 19
                $str,
1055 19
                1,
1056 19
                $clean_utf8,
1057 19
                $try_to_use_mb_functions
1058
            )
1059
        );
1060
    }
1061
1062
    /**
1063
     * Create a valid CSS identifier for e.g. "class"- or "id"-attributes.
1064
     *
1065
     * EXAMPLE: <code>UTF8::css_identifier('123foo/bar!!!'); // _23foo-bar</code>
1066
     *
1067
     * copy&past from https://github.com/drupal/core/blob/8.8.x/lib/Drupal/Component/Utility/Html.php#L95
1068
     *
1069
     * @param string   $str        <p>INFO: if no identifier is given e.g. " " or "", we will create a unique string automatically</p>
1070
     * @param string[] $filter
1071
     * @param bool     $strip_tags
1072
     * @param bool     $strtolower
1073
     *
1074
     * @psalm-pure
1075
     *
1076
     * @return string
1077
     *
1078
     * @phpstan-param array<string,string> $filter
1079
     */
1080 1
    public static function css_identifier(
1081
        string $str = '',
1082
        array $filter = [
1083
            ' ' => '-',
1084
            '/' => '-',
1085
            '[' => '',
1086
            ']' => '',
1087
        ],
1088
        bool $strip_tags = false,
1089
        bool $strtolower = true
1090
    ): string {
1091
        // We could also use strtr() here but its much slower than str_replace(). In
1092
        // order to keep '__' to stay '__' we first replace it with a different
1093
        // placeholder after checking that it is not defined as a filter.
1094 1
        $double_underscore_replacements = 0;
1095
1096
        // Fallback ...
1097 1
        if (\trim($str) === '') {
1098 1
            $str = \uniqid('auto-generated-css-class', true);
1099
        } else {
1100 1
            $str = self::clean($str);
1101
        }
1102
1103 1
        if ($strip_tags) {
1104
            $str = \strip_tags($str);
1105
        }
1106
1107 1
        if ($strtolower) {
1108 1
            $str = \strtolower($str);
1109
        }
1110
1111 1
        if (!isset($filter['__'])) {
1112 1
            $str = \str_replace('__', '##', $str, $double_underscore_replacements);
1113
        }
1114
1115 1
        $str = \str_replace(\array_keys($filter), \array_values($filter), $str);
1116
        // Replace temporary placeholder '##' with '__' only if the original
1117
        // $identifier contained '__'.
1118 1
        if ($double_underscore_replacements > 0) {
1119
            $str = \str_replace('##', '__', $str);
1120
        }
1121
1122
        // Valid characters in a CSS identifier are:
1123
        // - the hyphen (U+002D)
1124
        // - a-z (U+0030 - U+0039)
1125
        // - A-Z (U+0041 - U+005A)
1126
        // - the underscore (U+005F)
1127
        // - 0-9 (U+0061 - U+007A)
1128
        // - ISO 10646 characters U+00A1 and higher
1129
        // We strip out any character not in the above list.
1130 1
        $str = (string) \preg_replace('/[^\x{002D}\x{0030}-\x{0039}\x{0041}-\x{005A}\x{005F}\x{0061}-\x{007A}\x{00A1}-\x{FFFF}]/u', '', $str);
1131
        // Identifiers cannot start with a digit, two hyphens, or a hyphen followed by a digit.
1132 1
        $str = (string) \preg_replace(['/^[0-9]/', '/^(-[0-9])|^(--)/'], ['_', '__'], $str);
1133
1134 1
        return \trim($str, '-');
1135
    }
1136
1137
    /**
1138
     * Remove css media-queries.
1139
     *
1140
     * @param string $str
1141
     *
1142
     * @psalm-pure
1143
     *
1144
     * @return string
1145
     */
1146 1
    public static function css_stripe_media_queries(string $str): string
1147
    {
1148 1
        return (string) \preg_replace(
1149 1
            '#@media\\s+(?:only\\s)?(?:[\\s{(]|screen|all)\\s?[^{]+{.*}\\s*}\\s*#isumU',
1150 1
            '',
1151 1
            $str
1152
        );
1153
    }
1154
1155
    /**
1156
     * Checks whether ctype is available on the server.
1157
     *
1158
     * @psalm-pure
1159
     *
1160
     * @return bool
1161
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
1162
     *
1163
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
1164
     */
1165
    public static function ctype_loaded(): bool
1166
    {
1167
        return \extension_loaded('ctype');
1168
    }
1169
1170
    /**
1171
     * Converts an int value into a UTF-8 character.
1172
     *
1173
     * INFO: opposite to UTF8::string()
1174
     *
1175
     * EXAMPLE: <code>UTF8::decimal_to_chr(931); // 'Σ'</code>
1176
     *
1177
     * @param int|string $int
1178
     *
1179
     * @phpstan-param int|numeric-string $int
1180
     *
1181
     * @psalm-pure
1182
     *
1183
     * @return string
1184
     */
1185 20
    public static function decimal_to_chr($int): string
1186
    {
1187
        // We cannot use html_entity_decode() here, as it will not return
1188
        // characters for many values < 160.
1189 20
        return mb_convert_encoding('&#' . $int . ';', 'UTF-8', 'HTML-ENTITIES');
0 ignored issues
show
Bug Best Practice introduced by
The expression return mb_convert_encodi...TF-8', 'HTML-ENTITIES') could return the type array which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
1190
    }
1191
1192
    /**
1193
     * Decodes a MIME header field
1194
     *
1195
     * @param string $str
1196
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
1197
     *
1198
     * @psalm-pure
1199
     *
1200
     * @return false|string
1201
     *                      <p>A decoded MIME field on success,
1202
     *                      or false if an error occurs during the decoding.</p>
1203
     */
1204 2
    public static function decode_mimeheader($str, string $encoding = 'UTF-8')
1205
    {
1206 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
1207 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
1208
        }
1209
1210
        // always fallback via symfony polyfill
1211 2
        return \iconv_mime_decode($str, \ICONV_MIME_DECODE_CONTINUE_ON_ERROR, $encoding);
1212
    }
1213
1214
    /**
1215
     * Convert any two-letter country code (ISO 3166-1) to the corresponding Emoji.
1216
     *
1217
     * @see https://en.wikipedia.org/wiki/ISO_3166-1
1218
     *
1219
     * @param string $country_code_iso_3166_1 <p>e.g. DE</p>
1220
     *
1221
     * @return string
1222
     *                <p>Emoji or empty string on error.</p>
1223
     */
1224 1
    public static function emoji_from_country_code(string $country_code_iso_3166_1): string
1225
    {
1226 1
        if ($country_code_iso_3166_1 === '') {
1227 1
            return '';
1228
        }
1229
1230 1
        if (self::strlen($country_code_iso_3166_1) !== 2) {
1231 1
            return '';
1232
        }
1233
1234 1
        $country_code_iso_3166_1 = \strtoupper($country_code_iso_3166_1);
1235
1236 1
        $flagOffset = 0x1F1E6;
1237 1
        $asciiOffset = 0x41;
1238
1239 1
        return (self::chr((self::ord($country_code_iso_3166_1[0]) - $asciiOffset + $flagOffset)) ?? '') .
1240 1
               (self::chr((self::ord($country_code_iso_3166_1[1]) - $asciiOffset + $flagOffset)) ?? '');
1241
    }
1242
1243
    /**
1244
     * Decodes a string which was encoded by "UTF8::emoji_encode()".
1245
     *
1246
     * INFO: opposite to UTF8::emoji_encode()
1247
     *
1248
     * EXAMPLE: <code>
1249
     * UTF8::emoji_decode('foo CHARACTER_OGRE', false); // 'foo 👹'
1250
     * //
1251
     * UTF8::emoji_decode('foo _-_PORTABLE_UTF8_-_308095726_-_627590803_-_8FTU_ELBATROP_-_', true); // 'foo 👹'
1252
     * </code>
1253
     *
1254
     * @param string $str                            <p>The input string.</p>
1255
     * @param bool   $use_reversible_string_mappings [optional] <p>
1256
     *                                               When <b>TRUE</b>, we se a reversible string mapping
1257
     *                                               between "emoji_encode" and "emoji_decode".</p>
1258
     *
1259
     * @psalm-pure
1260
     *
1261
     * @return string
1262
     */
1263 9
    public static function emoji_decode(
1264
        string $str,
1265
        bool $use_reversible_string_mappings = false
1266
    ): string {
1267 9
        if (self::$EMOJI_KEYS_CACHE === null) {
1268
            /** @phpstan-ignore-next-line - we need to load the data first */
1269
            self::initEmojiData();
1270
        }
1271
1272 9
        if ($use_reversible_string_mappings) {
1273 9
            return (string) \str_replace(
1274 9
                (array) self::$EMOJI_KEYS_REVERSIBLE_CACHE,
1275 9
                (array) self::$EMOJI_VALUES_CACHE,
1276 9
                $str
1277
            );
1278
        }
1279
1280 1
        return (string) \str_replace(
1281 1
            (array) self::$EMOJI_KEYS_CACHE,
1282 1
            (array) self::$EMOJI_VALUES_CACHE,
1283 1
            $str
1284
        );
1285
    }
1286
1287
    /**
1288
     * Encode a string with emoji chars into a non-emoji string.
1289
     *
1290
     * INFO: opposite to UTF8::emoji_decode()
1291
     *
1292
     * EXAMPLE: <code>
1293
     * UTF8::emoji_encode('foo 👹', false)); // 'foo CHARACTER_OGRE'
1294
     * //
1295
     * UTF8::emoji_encode('foo 👹', true)); // 'foo _-_PORTABLE_UTF8_-_308095726_-_627590803_-_8FTU_ELBATROP_-_'
1296
     * </code>
1297
     *
1298
     * @param string $str                            <p>The input string</p>
1299
     * @param bool   $use_reversible_string_mappings [optional] <p>
1300
     *                                               when <b>TRUE</b>, we use a reversible string mapping
1301
     *                                               between "emoji_encode" and "emoji_decode"</p>
1302
     *
1303
     * @psalm-pure
1304
     *
1305
     * @return string
1306
     */
1307 12
    public static function emoji_encode(
1308
        string $str,
1309
        bool $use_reversible_string_mappings = false
1310
    ): string {
1311 12
        if (self::$EMOJI_KEYS_CACHE === null) {
1312
            /** @phpstan-ignore-next-line - we need to load the data first */
1313 1
            self::initEmojiData();
1314
        }
1315
1316 12
        if ($use_reversible_string_mappings) {
1317 9
            return (string) \str_replace(
1318 9
                (array) self::$EMOJI_VALUES_CACHE,
1319 9
                (array) self::$EMOJI_KEYS_REVERSIBLE_CACHE,
1320 9
                $str
1321
            );
1322
        }
1323
1324 4
        return (string) \str_replace(
1325 4
            (array) self::$EMOJI_VALUES_CACHE,
1326 4
            (array) self::$EMOJI_KEYS_CACHE,
1327 4
            $str
1328
        );
1329
    }
1330
1331
    /**
1332
     * Encode a string with a new charset-encoding.
1333
     *
1334
     * INFO:  This function will also try to fix broken / double encoding,
1335
     *        so you can call this function also on a UTF-8 string and you don't mess up the string.
1336
     *
1337
     * EXAMPLE: <code>
1338
     * UTF8::encode('ISO-8859-1', '-ABC-中文空白-'); // '-ABC-????-'
1339
     * //
1340
     * UTF8::encode('UTF-8', '-ABC-中文空白-'); // '-ABC-中文空白-'
1341
     * //
1342
     * UTF8::encode('HTML', '-ABC-中文空白-'); // '-ABC-&#20013;&#25991;&#31354;&#30333;-'
1343
     * //
1344
     * UTF8::encode('BASE64', '-ABC-中文空白-'); // 'LUFCQy3kuK3mlofnqbrnmb0t'
1345
     * </code>
1346
     *
1347
     * @param string $to_encoding                   <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.</p>
1348
     * @param string $str                           <p>The input string</p>
1349
     * @param bool   $auto_detect_the_from_encoding [optional] <p>Force the new encoding (we try to fix broken / double
1350
     *                                              encoding for UTF-8)<br> otherwise we auto-detect the current
1351
     *                                              string-encoding</p>
1352
     * @param string $from_encoding                 [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
1353
     *                                              A empty string will trigger the autodetect anyway.</p>
1354
     *
1355
     * @psalm-pure
1356
     *
1357
     * @return string
1358
     *
1359
     * @psalm-suppress InvalidReturnStatement
1360
     */
1361 28
    public static function encode(
1362
        string $to_encoding,
1363
        string $str,
1364
        bool $auto_detect_the_from_encoding = true,
1365
        string $from_encoding = ''
1366
    ): string {
1367 28
        if ($str === '' || $to_encoding === '') {
1368 13
            return $str;
1369
        }
1370
1371 28
        if ($to_encoding !== 'UTF-8' && $to_encoding !== 'CP850') {
1372 7
            $to_encoding = self::normalize_encoding($to_encoding, 'UTF-8');
1373
        }
1374
1375 28
        if ($from_encoding && $from_encoding !== 'UTF-8' && $from_encoding !== 'CP850') {
1376 2
            $from_encoding = self::normalize_encoding($from_encoding);
1377
        }
1378
1379
        if (
1380 28
            $to_encoding
1381
            &&
1382 28
            $from_encoding
1383
            &&
1384 28
            $from_encoding === $to_encoding
1385
        ) {
1386
            return $str;
1387
        }
1388
1389 28
        if ($to_encoding === 'JSON') {
1390 1
            $return = self::json_encode($str);
1391 1
            if ($return === false) {
1392
                throw new \InvalidArgumentException('The input string [' . $str . '] can not be used for json_encode().');
1393
            }
1394
1395 1
            return $return;
1396
        }
1397 28
        if ($from_encoding === 'JSON') {
1398 1
            $str = self::json_decode($str);
1399 1
            $from_encoding = '';
1400
        }
1401
1402 28
        if ($to_encoding === 'BASE64') {
1403 2
            return \base64_encode($str);
1404
        }
1405 28
        if ($from_encoding === 'BASE64') {
1406 2
            $str = \base64_decode($str, true);
1407 2
            $from_encoding = '';
1408
        }
1409
1410 28
        if ($to_encoding === 'HTML-ENTITIES') {
1411 2
            return self::html_encode($str, true);
1412
        }
1413 28
        if ($from_encoding === 'HTML-ENTITIES') {
1414 2
            $str = self::html_entity_decode($str, \ENT_COMPAT);
1415 2
            $from_encoding = '';
1416
        }
1417
1418 28
        $from_encoding_auto_detected = false;
1419
        if (
1420 28
            $auto_detect_the_from_encoding
1421
            ||
1422 28
            !$from_encoding
1423
        ) {
1424 28
            $from_encoding_auto_detected = self::str_detect_encoding($str);
1425
        }
1426
1427
        // DEBUG
1428
        //var_dump($to_encoding, $from_encoding, $from_encoding_auto_detected, $str, "\n\n");
1429
1430 28
        if ($from_encoding_auto_detected !== false) {
1431 25
            $from_encoding = $from_encoding_auto_detected;
1432 6
        } elseif ($auto_detect_the_from_encoding) {
1433
            // fallback for the "autodetect"-mode
1434 6
            return self::to_utf8($str);
1435
        }
1436
1437
        if (
1438 25
            !$from_encoding
1439
            ||
1440 25
            $from_encoding === $to_encoding
1441
        ) {
1442 15
            return $str;
1443
        }
1444
1445
        if (
1446 20
            $to_encoding === 'UTF-8'
1447
            &&
1448
            (
1449 18
                $from_encoding === 'WINDOWS-1252'
1450
                ||
1451 20
                $from_encoding === 'ISO-8859-1'
1452
            )
1453
        ) {
1454 14
            return self::to_utf8($str);
1455
        }
1456
1457
        if (
1458 12
            $to_encoding === 'ISO-8859-1'
1459
            &&
1460
            (
1461 6
                $from_encoding === 'WINDOWS-1252'
1462
                ||
1463 12
                $from_encoding === 'UTF-8'
1464
            )
1465
        ) {
1466 6
            return self::to_iso8859($str);
1467
        }
1468
1469
        if (
1470 10
            $to_encoding !== 'UTF-8'
1471
            &&
1472 10
            $to_encoding !== 'ISO-8859-1'
1473
            &&
1474 10
            $to_encoding !== 'WINDOWS-1252'
1475
            &&
1476 10
            self::$SUPPORT['mbstring'] === false
1477
        ) {
1478
            /**
1479
             * @psalm-suppress ImpureFunctionCall - is is only a warning
1480
             */
1481
            \trigger_error('UTF8::encode() without mbstring cannot handle "' . $to_encoding . '" encoding', \E_USER_WARNING);
1482
        }
1483
1484 10
        if (self::$SUPPORT['mbstring'] === true) {
1485 10
            $str_encoded = \mb_convert_encoding(
1486 10
                $str,
1487 10
                $to_encoding,
1488 10
                $from_encoding
1489
            );
1490
1491 10
            if ($str_encoded) {
1492
                \assert(\is_string($str_encoded));
1493
1494 10
                return $str_encoded;
1495
            }
1496
        }
1497
1498
        /** @noinspection PhpUsageOfSilenceOperatorInspection - Detected an incomplete multibyte character in input string */
1499
        $return = @\iconv($from_encoding, $to_encoding, $str);
1500
        if ($return !== false) {
1501
            return $return;
1502
        }
1503
1504
        return $str;
1505
    }
1506
1507
    /**
1508
     * @param string $str
1509
     * @param string $from_charset      [optional] <p>Set the input charset.</p>
1510
     * @param string $to_charset        [optional] <p>Set the output charset.</p>
1511
     * @param string $transfer_encoding [optional] <p>Set the transfer encoding.</p>
1512
     * @param string $linefeed          [optional] <p>Set the used linefeed.</p>
1513
     * @param int    $indent            [optional] <p>Set the max length indent.</p>
1514
     *
1515
     * @psalm-pure
1516
     *
1517
     * @return false|string
1518
     *                      <p>An encoded MIME field on success,
1519
     *                      or false if an error occurs during the encoding.</p>
1520
     */
1521 1
    public static function encode_mimeheader(
1522
        string $str,
1523
        string $from_charset = 'UTF-8',
1524
        string $to_charset = 'UTF-8',
1525
        string $transfer_encoding = 'Q',
1526
        string $linefeed = "\r\n",
1527
        int $indent = 76
1528
    ) {
1529 1
        if ($from_charset !== 'UTF-8' && $from_charset !== 'CP850') {
1530
            $from_charset = self::normalize_encoding($from_charset, 'UTF-8');
1531
        }
1532
1533 1
        if ($to_charset !== 'UTF-8' && $to_charset !== 'CP850') {
1534 1
            $to_charset = self::normalize_encoding($to_charset, 'UTF-8');
1535
        }
1536
1537
        // always fallback via symfony polyfill
1538 1
        return \iconv_mime_encode(
1539 1
            '',
1540 1
            $str,
1541
            [
1542 1
                'scheme'           => $transfer_encoding,
1543 1
                'line-length'      => $indent,
1544 1
                'input-charset'    => $from_charset,
1545 1
                'output-charset'   => $to_charset,
1546 1
                'line-break-chars' => $linefeed,
1547
            ]
1548
        );
1549
    }
1550
1551
    /**
1552
     * Create an extract from a sentence, so if the search-string was found, it try to centered in the output.
1553
     *
1554
     * @param string   $str                       <p>The input string.</p>
1555
     * @param string   $search                    <p>The searched string.</p>
1556
     * @param int|null $length                    [optional] <p>Default: null === text->length / 2</p>
1557
     * @param string   $replacer_for_skipped_text [optional] <p>Default: …</p>
1558
     * @param string   $encoding                  [optional] <p>Set the charset for e.g. "mb_" function</p>
1559
     *
1560
     * @psalm-pure
1561
     *
1562
     * @return string
1563
     */
1564 1
    public static function extract_text(
1565
        string $str,
1566
        string $search = '',
1567
        int $length = null,
1568
        string $replacer_for_skipped_text = '…',
1569
        string $encoding = 'UTF-8'
1570
    ): string {
1571 1
        if ($str === '') {
1572 1
            return '';
1573
        }
1574
1575 1
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
1576
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
1577
        }
1578
1579 1
        $trim_chars = "\t\r\n -_()!~?=+/*\\,.:;\"'[]{}`&";
1580
1581 1
        if ($length === null) {
1582 1
            $length = (int) \round((int) self::strlen($str, $encoding) / 2);
1583
        }
1584
1585 1
        if ($search === '') {
1586 1
            if ($encoding === 'UTF-8') {
1587 1
                if ($length > 0) {
1588 1
                    $string_length = (int) \mb_strlen($str);
1589 1
                    $end = ($length - 1) > $string_length ? $string_length : ($length - 1);
1590
                } else {
1591 1
                    $end = 0;
1592
                }
1593
1594 1
                $pos = (int) \min(
1595 1
                    \mb_strpos($str, ' ', $end),
1596 1
                    \mb_strpos($str, '.', $end)
1597
                );
1598
            } else {
1599
                if ($length > 0) {
1600
                    $string_length = (int) self::strlen($str, $encoding);
1601
                    $end = ($length - 1) > $string_length ? $string_length : ($length - 1);
1602
                } else {
1603
                    $end = 0;
1604
                }
1605
1606
                $pos = (int) \min(
1607
                    self::strpos($str, ' ', $end, $encoding),
1608
                    self::strpos($str, '.', $end, $encoding)
1609
                );
1610
            }
1611
1612 1
            if ($pos) {
1613 1
                if ($encoding === 'UTF-8') {
1614 1
                    $str_sub = \mb_substr($str, 0, $pos);
1615
                } else {
1616
                    $str_sub = self::substr($str, 0, $pos, $encoding);
1617
                }
1618
1619 1
                if ($str_sub === false) {
1620
                    return '';
1621
                }
1622
1623 1
                return \rtrim($str_sub, $trim_chars) . $replacer_for_skipped_text;
1624
            }
1625
1626
            return $str;
1627
        }
1628
1629 1
        if ($encoding === 'UTF-8') {
1630 1
            $word_position = (int) \mb_stripos($str, $search);
1631 1
            $half_side = (int) ($word_position - $length / 2 + (int) \mb_strlen($search) / 2);
1632
        } else {
1633
            $word_position = (int) self::stripos($str, $search, 0, $encoding);
1634
            $half_side = (int) ($word_position - $length / 2 + (int) self::strlen($search, $encoding) / 2);
1635
        }
1636
1637 1
        $pos_start = 0;
1638 1
        if ($half_side > 0) {
1639 1
            if ($encoding === 'UTF-8') {
1640 1
                $half_text = \mb_substr($str, 0, $half_side);
1641
            } else {
1642
                $half_text = self::substr($str, 0, $half_side, $encoding);
1643
            }
1644 1
            if ($half_text !== false) {
1645 1
                if ($encoding === 'UTF-8') {
1646 1
                    $pos_start = (int) \max(
1647 1
                        \mb_strrpos($half_text, ' '),
1648 1
                        \mb_strrpos($half_text, '.')
1649
                    );
1650
                } else {
1651
                    $pos_start = (int) \max(
1652
                        self::strrpos($half_text, ' ', 0, $encoding),
1653
                        self::strrpos($half_text, '.', 0, $encoding)
1654
                    );
1655
                }
1656
            }
1657
        }
1658
1659 1
        if ($word_position && $half_side > 0) {
1660 1
            $offset = $pos_start + $length - 1;
1661 1
            $real_length = (int) self::strlen($str, $encoding);
1662
1663 1
            if ($offset > $real_length) {
1664
                $offset = $real_length;
1665
            }
1666
1667 1
            if ($encoding === 'UTF-8') {
1668 1
                $pos_end = (int) \min(
1669 1
                    \mb_strpos($str, ' ', $offset),
1670 1
                    \mb_strpos($str, '.', $offset)
1671 1
                ) - $pos_start;
1672
            } else {
1673
                $pos_end = (int) \min(
1674
                    self::strpos($str, ' ', $offset, $encoding),
1675
                    self::strpos($str, '.', $offset, $encoding)
1676
                ) - $pos_start;
1677
            }
1678
1679 1
            if (!$pos_end || $pos_end <= 0) {
1680 1
                if ($encoding === 'UTF-8') {
1681 1
                    $str_sub = \mb_substr($str, $pos_start, (int) \mb_strlen($str));
1682
                } else {
1683
                    $str_sub = self::substr($str, $pos_start, (int) self::strlen($str, $encoding), $encoding);
1684
                }
1685 1
                if ($str_sub !== false) {
1686 1
                    $extract = $replacer_for_skipped_text . \ltrim($str_sub, $trim_chars);
1687
                } else {
1688 1
                    $extract = '';
1689
                }
1690
            } else {
1691 1
                if ($encoding === 'UTF-8') {
1692 1
                    $str_sub = \mb_substr($str, $pos_start, $pos_end);
1693
                } else {
1694
                    $str_sub = self::substr($str, $pos_start, $pos_end, $encoding);
1695
                }
1696 1
                if ($str_sub !== false) {
1697 1
                    $extract = $replacer_for_skipped_text . \trim($str_sub, $trim_chars) . $replacer_for_skipped_text;
1698
                } else {
1699 1
                    $extract = '';
1700
                }
1701
            }
1702
        } else {
1703 1
            $offset = $length - 1;
1704 1
            $true_length = (int) self::strlen($str, $encoding);
1705
1706 1
            if ($offset > $true_length) {
1707
                $offset = $true_length;
1708
            }
1709
1710 1
            if ($encoding === 'UTF-8') {
1711 1
                $pos_end = (int) \min(
1712 1
                    \mb_strpos($str, ' ', $offset),
1713 1
                    \mb_strpos($str, '.', $offset)
1714
                );
1715
            } else {
1716
                $pos_end = (int) \min(
1717
                    self::strpos($str, ' ', $offset, $encoding),
1718
                    self::strpos($str, '.', $offset, $encoding)
1719
                );
1720
            }
1721
1722 1
            if ($pos_end) {
1723 1
                if ($encoding === 'UTF-8') {
1724 1
                    $str_sub = \mb_substr($str, 0, $pos_end);
1725
                } else {
1726
                    $str_sub = self::substr($str, 0, $pos_end, $encoding);
1727
                }
1728 1
                if ($str_sub !== false) {
1729 1
                    $extract = \rtrim($str_sub, $trim_chars) . $replacer_for_skipped_text;
1730
                } else {
1731 1
                    $extract = '';
1732
                }
1733
            } else {
1734 1
                $extract = $str;
1735
            }
1736
        }
1737
1738 1
        return $extract;
1739
    }
1740
1741
    /**
1742
     * Reads entire file into a string.
1743
     *
1744
     * EXAMPLE: <code>UTF8::file_get_contents('utf16le.txt'); // ...</code>
1745
     *
1746
     * WARNING: Do not use UTF-8 Option ($convert_to_utf8) for binary files (e.g.: images) !!!
1747
     *
1748
     * @see http://php.net/manual/en/function.file-get-contents.php
1749
     *
1750
     * @param string        $filename         <p>
1751
     *                                        Name of the file to read.
1752
     *                                        </p>
1753
     * @param bool          $use_include_path [optional] <p>
1754
     *                                        Prior to PHP 5, this parameter is called
1755
     *                                        use_include_path and is a bool.
1756
     *                                        As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
1757
     *                                        to trigger include path
1758
     *                                        search.
1759
     *                                        </p>
1760
     * @param resource|null $context          [optional] <p>
1761
     *                                        A valid context resource created with
1762
     *                                        stream_context_create. If you don't need to use a
1763
     *                                        custom context, you can skip this parameter by &null;.
1764
     *                                        </p>
1765
     * @param int|null      $offset           [optional] <p>
1766
     *                                        The offset where the reading starts.
1767
     *                                        </p>
1768
     * @param int|null      $max_length       [optional] <p>
1769
     *                                        Maximum length of data read. The default is to read until end
1770
     *                                        of file is reached.
1771
     *                                        </p>
1772
     * @param int           $timeout          <p>The time in seconds for the timeout.</p>
1773
     * @param bool          $convert_to_utf8  <strong>WARNING!!!</strong> <p>Maybe you can't use this option for
1774
     *                                        some files, because they used non default utf-8 chars. Binary files
1775
     *                                        like images or pdf will not be converted.</p>
1776
     * @param string        $from_encoding    [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
1777
     *                                        A empty string will trigger the autodetect anyway.</p>
1778
     *
1779
     * @psalm-pure
1780
     *
1781
     * @return false|string
1782
     *                      <p>The function returns the read data as string or <b>false</b> on failure.</p>
1783
     */
1784 12
    public static function file_get_contents(
1785
        string $filename,
1786
        bool $use_include_path = false,
1787
        $context = null,
1788
        int $offset = null,
1789
        int $max_length = null,
1790
        int $timeout = 10,
1791
        bool $convert_to_utf8 = true,
1792
        string $from_encoding = ''
1793
    ) {
1794
        // init
1795
        /** @noinspection CallableParameterUseCaseInTypeContextInspection - is ok here */
1796 12
        $filename = Bootup::filter_sanitize_string_polyfill($filename);
1797 12
        if ($filename === false) {
1798
            return false;
1799
        }
1800
1801 12
        if ($timeout && $context === null) {
1802 9
            $context = \stream_context_create(
1803
                [
1804
                    'http' => [
1805 9
                        'timeout' => $timeout,
1806
                    ],
1807
                ]
1808
            );
1809
        }
1810
1811 12
        if ($offset === null) {
1812 12
            $offset = 0;
1813
        }
1814
1815 12
        if (\is_int($max_length)) {
1816 2
            if ($max_length < 0) {
1817
                $max_length = 0;
1818
            }
1819
1820 2
            $data = \file_get_contents($filename, $use_include_path, $context, $offset, $max_length);
1821
        } else {
1822 12
            $data = \file_get_contents($filename, $use_include_path, $context, $offset);
1823
        }
1824
1825
        // return false on error
1826 12
        if ($data === false) {
1827
            return false;
1828
        }
1829
1830 12
        if ($convert_to_utf8) {
1831
            if (
1832 12
                !self::is_binary($data, true)
1833
                ||
1834 9
                self::is_utf16($data, false) !== false
1835
                ||
1836 12
                self::is_utf32($data, false) !== false
1837
            ) {
1838 9
                $data = self::encode('UTF-8', $data, false, $from_encoding);
1839 9
                $data = self::cleanup($data);
1840
            }
1841
        }
1842
1843 12
        return $data;
1844
    }
1845
1846
    /**
1847
     * Checks if a file starts with BOM (Byte Order Mark) character.
1848
     *
1849
     * EXAMPLE: <code>UTF8::file_has_bom('utf8_with_bom.txt'); // true</code>
1850
     *
1851
     * @param string $file_path <p>Path to a valid file.</p>
1852
     *
1853
     * @throws \RuntimeException if file_get_contents() returned false
1854
     *
1855
     * @return bool
1856
     *              <p><strong>true</strong> if the file has BOM at the start, <strong>false</strong> otherwise</p>
1857
     *
1858
     * @psalm-pure
1859
     */
1860 2
    public static function file_has_bom(string $file_path): bool
1861
    {
1862 2
        $file_content = \file_get_contents($file_path);
1863 2
        if ($file_content === false) {
1864
            throw new \RuntimeException('file_get_contents() returned false for:' . $file_path);
1865
        }
1866
1867 2
        return self::string_has_bom($file_content);
1868
    }
1869
1870
    /**
1871
     * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1872
     *
1873
     * EXAMPLE: <code>UTF8::filter(array("\xE9", 'à', 'a')); // array('é', 'à', 'a')</code>
1874
     *
1875
     * @param array|object|string $var
1876
     * @param int                 $normalization_form
1877
     * @param string              $leading_combining
1878
     *
1879
     * @psalm-pure
1880
     *
1881
     * @return mixed
1882
     *
1883
     * @template TFilter
1884
     * @phpstan-param TFilter $var
1885
     * @phpstan-return TFilter
1886
     */
1887 64
    public static function filter(
1888
        $var,
1889
        int $normalization_form = \Normalizer::NFC,
1890
        string $leading_combining = '◌'
1891
    ) {
1892 64
        switch (\gettype($var)) {
1893 64
            case 'object':
1894 64
            case 'array':
1895 6
                foreach ($var as &$v) {
1896 6
                    $v = self::filter($v, $normalization_form, $leading_combining);
1897
                }
1898 6
                unset($v);
1899
1900 6
                break;
1901 64
            case 'string':
1902
1903 62
                if (\strpos($var, "\r") !== false) {
1904 2
                    $var = self::normalize_line_ending($var);
1905
                }
1906
1907 62
                if (!ASCII::is_ascii($var)) {
1908 32
                    if (\Normalizer::isNormalized($var, $normalization_form)) {
1909 26
                        $n = '-';
1910
                    } else {
1911 12
                        $n = \Normalizer::normalize($var, $normalization_form);
1912
1913 12
                        if ($n && isset($n[0])) {
1914 6
                            $var = $n;
1915
                        } else {
1916 8
                            $var = self::encode('UTF-8', $var);
1917
                        }
1918
                    }
1919
1920
                    \assert(\is_string($var));
1921
                    if (
1922 32
                        $n
1923
                        &&
1924 32
                        $var[0] >= "\x80"
1925
                        &&
1926 32
                        isset($n[0], $leading_combining[0])
1927
                        &&
1928 32
                        \preg_match('/^\\p{Mn}/u', $var)
1929
                    ) {
1930
                        // Prevent leading combining chars
1931
                        // for NFC-safe concatenations.
1932 2
                        $var = $leading_combining . $var;
1933
                    }
1934
                }
1935
1936 62
                break;
1937
            default:
1938
                // nothing
1939
        }
1940
1941
        /** @noinspection PhpSillyAssignmentInspection */
1942
        /** @phpstan-var TFilter $var */
1943 64
        $var = $var;
1944
1945 64
        return $var;
1946
    }
1947
1948
    /**
1949
     * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1950
     *
1951
     * Gets a specific external variable by name and optionally filters it.
1952
     *
1953
     * EXAMPLE: <code>
1954
     * // _GET['foo'] = 'bar';
1955
     * UTF8::filter_input(INPUT_GET, 'foo', FILTER_UNSAFE_RAW)); // 'bar'
1956
     * </code>
1957
     *
1958
     * @see http://php.net/manual/en/function.filter-input.php
1959
     *
1960
     * @param int            $type          <p>
1961
     *                                      One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1962
     *                                      <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1963
     *                                      <b>INPUT_ENV</b>.
1964
     *                                      </p>
1965
     * @param string         $variable_name <p>
1966
     *                                      Name of a variable to get.
1967
     *                                      </p>
1968
     * @param int            $filter        [optional] <p>
1969
     *                                      The ID of the filter to apply. The
1970
     *                                      manual page lists the available filters.
1971
     *                                      </p>
1972
     * @param int|int[]|null $options       [optional] <p>
1973
     *                                      Associative array of options or bitwise disjunction of flags. If filter
1974
     *                                      accepts options, flags can be provided in "flags" field of array.
1975
     *                                      </p>
1976
     *
1977
     * @psalm-pure
1978
     *
1979
     * @return mixed
1980
     *               <p>
1981
     *               Value of the requested variable on success, <b>FALSE</b> if the filter fails, or <b>NULL</b> if the
1982
     *               <i>variable_name</i> variable is not set. If the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it
1983
     *               returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter fails.
1984
     *               </p>
1985
     */
1986 1
    public static function filter_input(
1987
        int $type,
1988
        string $variable_name,
1989
        int $filter = \FILTER_DEFAULT,
1990
        $options = null
1991
    ) {
1992
        /**
1993
         * @psalm-suppress ImpureFunctionCall - we use func_num_args only for args count matching here
1994
         */
1995 1
        if ($options === null || \func_num_args() < 4) {
1996 1
            $var = \filter_input($type, $variable_name, $filter);
1997
        } else {
1998
            $var = \filter_input($type, $variable_name, $filter, $options);
1999
        }
2000
2001 1
        return self::filter($var);
2002
    }
2003
2004
    /**
2005
     * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2006
     *
2007
     * Gets external variables and optionally filters them.
2008
     *
2009
     * EXAMPLE: <code>
2010
     * // _GET['foo'] = 'bar';
2011
     * UTF8::filter_input_array(INPUT_GET, array('foo' => 'FILTER_UNSAFE_RAW')); // array('bar')
2012
     * </code>
2013
     *
2014
     * @see http://php.net/manual/en/function.filter-input-array.php
2015
     *
2016
     * @param int        $type       <p>
2017
     *                               One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
2018
     *                               <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
2019
     *                               <b>INPUT_ENV</b>.
2020
     *                               </p>
2021
     * @param array|null $definition [optional] <p>
2022
     *                               An array defining the arguments. A valid key is a string
2023
     *                               containing a variable name and a valid value is either a filter type, or an array
2024
     *                               optionally specifying the filter, flags and options. If the value is an
2025
     *                               array, valid keys are filter which specifies the
2026
     *                               filter type,
2027
     *                               flags which specifies any flags that apply to the
2028
     *                               filter, and options which specifies any options that
2029
     *                               apply to the filter. See the example below for a better understanding.
2030
     *                               </p>
2031
     *                               <p>
2032
     *                               This parameter can be also an integer holding a filter constant. Then all values in the
2033
     *                               input array are filtered by this filter.
2034
     *                               </p>
2035
     * @param bool       $add_empty  [optional] <p>
2036
     *                               Add missing keys as <b>NULL</b> to the return value.
2037
     *                               </p>
2038
     *
2039
     * @psalm-pure
2040
     *
2041
     * @return mixed
2042
     *               <p>
2043
     *               An array containing the values of the requested variables on success, or <b>FALSE</b> on failure.
2044
     *               An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not
2045
     *               set. Or if the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it returns <b>FALSE</b> if the variable
2046
     *               is not set and <b>NULL</b> if the filter fails.
2047
     *               </p>
2048
     */
2049 1
    public static function filter_input_array(
2050
        int $type,
2051
        $definition = null,
2052
        bool $add_empty = true
2053
    ) {
2054
        /**
2055
         * @psalm-suppress ImpureFunctionCall - we use func_num_args only for args count matching here
2056
         */
2057 1
        if ($definition === null || \func_num_args() < 2) {
2058
            $a = \filter_input_array($type);
2059
        } else {
2060 1
            $a = \filter_input_array($type, $definition, $add_empty);
2061
        }
2062
2063 1
        return self::filter($a);
2064
    }
2065
2066
    /**
2067
     * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2068
     *
2069
     * Filters a variable with a specified filter.
2070
     *
2071
     * EXAMPLE: <code>UTF8::filter_var('-ABC-中文空白-', FILTER_VALIDATE_URL); // false</code>
2072
     *
2073
     * @see http://php.net/manual/en/function.filter-var.php
2074
     *
2075
     * @param float|int|string|null $variable <p>
2076
     *                                        Value to filter.
2077
     *                                        </p>
2078
     * @param int                   $filter   [optional] <p>
2079
     *                                        The ID of the filter to apply. The
2080
     *                                        manual page lists the available filters.
2081
     *                                        </p>
2082
     * @param int|int[]|null        $options  [optional] <p>
2083
     *                                        Associative array of options or bitwise disjunction of flags. If filter
2084
     *                                        accepts options, flags can be provided in "flags" field of array. For
2085
     *                                        the "callback" filter, callable type should be passed. The
2086
     *                                        callback must accept one argument, the value to be filtered, and return
2087
     *                                        the value after filtering/sanitizing it.
2088
     *                                        </p>
2089
     *                                        <p>
2090
     *                                        <code>
2091
     *                                        // for filters that accept options, use this format
2092
     *                                        $options = array(
2093
     *                                        'options' => array(
2094
     *                                        'default' => 3, // value to return if the filter fails
2095
     *                                        // other options here
2096
     *                                        'min_range' => 0
2097
     *                                        ),
2098
     *                                        'flags' => FILTER_FLAG_ALLOW_OCTAL,
2099
     *                                        );
2100
     *                                        $var = filter_var('0755', FILTER_VALIDATE_INT, $options);
2101
     *                                        // for filter that only accept flags, you can pass them directly
2102
     *                                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);
2103
     *                                        // for filter that only accept flags, you can also pass as an array
2104
     *                                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN,
2105
     *                                        array('flags' => FILTER_NULL_ON_FAILURE));
2106
     *                                        // callback validate filter
2107
     *                                        function foo($value)
2108
     *                                        {
2109
     *                                        // Expected format: Surname, GivenNames
2110
     *                                        if (strpos($value, ", ") === false) return false;
2111
     *                                        list($surname, $givennames) = explode(", ", $value, 2);
2112
     *                                        $empty = (empty($surname) || empty($givennames));
2113
     *                                        $notstrings = (!is_string($surname) || !is_string($givennames));
2114
     *                                        if ($empty || $notstrings) {
2115
     *                                        return false;
2116
     *                                        } else {
2117
     *                                        return $value;
2118
     *                                        }
2119
     *                                        }
2120
     *                                        $var = filter_var('Doe, Jane Sue', FILTER_CALLBACK, array('options' => 'foo'));
2121
     *                                        </code>
2122
     *                                        </p>
2123
     *
2124
     * @psalm-pure
2125
     *
2126
     * @return mixed
2127
     *               <p>The filtered data, or <b>FALSE</b> if the filter fails.</p>
2128
     */
2129 2
    public static function filter_var(
2130
        $variable,
2131
        int $filter = \FILTER_DEFAULT,
2132
        $options = null
2133
    ) {
2134
        /**
2135
         * @psalm-suppress ImpureFunctionCall - we use func_num_args only for args count matching here
2136
         */
2137 2
        if (\func_num_args() < 3) {
2138 2
            $variable = \filter_var($variable, $filter);
2139
        } else {
2140 2
            $variable = \filter_var($variable, $filter, $options);
0 ignored issues
show
Bug introduced by
It seems like $options can also be of type null; however, parameter $options of filter_var() does only seem to accept array|integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

2140
            $variable = \filter_var($variable, $filter, /** @scrutinizer ignore-type */ $options);
Loading history...
2141
        }
2142
2143 2
        return self::filter($variable);
2144
    }
2145
2146
    /**
2147
     * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2148
     *
2149
     * Gets multiple variables and optionally filters them.
2150
     *
2151
     * EXAMPLE: <code>
2152
     * $filters = [
2153
     *     'name'  => ['filter'  => FILTER_CALLBACK, 'options' => [UTF8::class, 'ucwords']],
2154
     *     'age'   => ['filter'  => FILTER_VALIDATE_INT, 'options' => ['min_range' => 1, 'max_range' => 120]],
2155
     *     'email' => FILTER_VALIDATE_EMAIL,
2156
     * ];
2157
     *
2158
     * $data = [
2159
     *     'name' => 'κόσμε',
2160
     *     'age' => '18',
2161
     *     'email' => '[email protected]'
2162
     * ];
2163
     *
2164
     * UTF8::filter_var_array($data, $filters, true); // ['name' => 'Κόσμε', 'age' => 18, 'email' => '[email protected]']
2165
     * </code>
2166
     *
2167
     * @see http://php.net/manual/en/function.filter-var-array.php
2168
     *
2169
     * @param array<mixed>   $data       <p>
2170
     *                                   An array with string keys containing the data to filter.
2171
     *                                   </p>
2172
     * @param array|int|null $definition [optional] <p>
2173
     *                                   An array defining the arguments. A valid key is a string
2174
     *                                   containing a variable name and a valid value is either a
2175
     *                                   filter type, or an
2176
     *                                   array optionally specifying the filter, flags and options.
2177
     *                                   If the value is an array, valid keys are filter
2178
     *                                   which specifies the filter type,
2179
     *                                   flags which specifies any flags that apply to the
2180
     *                                   filter, and options which specifies any options that
2181
     *                                   apply to the filter. See the example below for a better understanding.
2182
     *                                   </p>
2183
     *                                   <p>
2184
     *                                   This parameter can be also an integer holding a filter constant. Then all values
2185
     *                                   in the input array are filtered by this filter.
2186
     *                                   </p>
2187
     * @param bool           $add_empty  [optional] <p>
2188
     *                                   Add missing keys as <b>NULL</b> to the return value.
2189
     *                                   </p>
2190
     *
2191
     * @psalm-pure
2192
     *
2193
     * @return mixed
2194
     *               <p>
2195
     *               An array containing the values of the requested variables on success, or <b>FALSE</b> on failure.
2196
     *               An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not
2197
     *               set.
2198
     *               </p>
2199
     */
2200 2
    public static function filter_var_array(
2201
        array $data,
2202
        $definition = null,
2203
        bool $add_empty = true
2204
    ) {
2205
        /**
2206
         * @psalm-suppress ImpureFunctionCall - we use func_num_args only for args count matching here
2207
         */
2208 2
        if (\func_num_args() < 2) {
2209 2
            $a = \filter_var_array($data);
2210
        } else {
2211 2
            $a = \filter_var_array($data, $definition, $add_empty);
0 ignored issues
show
Bug introduced by
It seems like $definition can also be of type null; however, parameter $options of filter_var_array() does only seem to accept array|integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

2211
            $a = \filter_var_array($data, /** @scrutinizer ignore-type */ $definition, $add_empty);
Loading history...
2212
        }
2213
2214 2
        return self::filter($a);
2215
    }
2216
2217
    /**
2218
     * Checks whether finfo is available on the server.
2219
     *
2220
     * @psalm-pure
2221
     *
2222
     * @return bool
2223
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
2224
     *
2225
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
2226
     */
2227
    public static function finfo_loaded(): bool
2228
    {
2229
        return \class_exists('finfo');
2230
    }
2231
2232
    /**
2233
     * Returns the first $n characters of the string.
2234
     *
2235
     * @param string $str      <p>The input string.</p>
2236
     * @param int    $n        <p>Number of characters to retrieve from the start.</p>
2237
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
2238
     *
2239
     * @psalm-pure
2240
     *
2241
     * @return string
2242
     */
2243 13
    public static function first_char(
2244
        string $str,
2245
        int $n = 1,
2246
        string $encoding = 'UTF-8'
2247
    ): string {
2248 13
        if ($str === '' || $n <= 0) {
2249 5
            return '';
2250
        }
2251
2252 8
        if ($encoding === 'UTF-8') {
2253 4
            return (string) \mb_substr($str, 0, $n);
2254
        }
2255
2256 4
        return (string) self::substr($str, 0, $n, $encoding);
2257
    }
2258
2259
    /**
2260
     * Check if the number of Unicode characters isn't greater than the specified integer.
2261
     *
2262
     * EXAMPLE: <code>UTF8::fits_inside('κόσμε', 6); // false</code>
2263
     *
2264
     * @param string $str      the original string to be checked
2265
     * @param int    $box_size the size in number of chars to be checked against string
2266
     *
2267
     * @psalm-pure
2268
     *
2269
     * @return bool
2270
     *              <p><strong>TRUE</strong> if string is less than or equal to $box_size, <strong>FALSE</strong> otherwise.</p>
2271
     */
2272 2
    public static function fits_inside(string $str, int $box_size): bool
2273
    {
2274 2
        return (int) self::strlen($str) <= $box_size;
2275
    }
2276
2277
    /**
2278
     * Try to fix simple broken UTF-8 strings.
2279
     *
2280
     * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
2281
     *
2282
     * EXAMPLE: <code>UTF8::fix_simple_utf8('Düsseldorf'); // 'Düsseldorf'</code>
2283
     *
2284
     * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
2285
     * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
2286
     * See: http://en.wikipedia.org/wiki/Windows-1252
2287
     *
2288
     * @param string $str <p>The input string</p>
2289
     *
2290
     * @psalm-pure
2291
     *
2292
     * @return string
2293
     */
2294 46
    public static function fix_simple_utf8(string $str): string
2295
    {
2296 46
        if ($str === '') {
2297 4
            return '';
2298
        }
2299
2300
        /**
2301
         * @psalm-suppress ImpureStaticVariable
2302
         *
2303
         * @var array<mixed>|null
2304
         */
2305 46
        static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null;
2306
2307
        /**
2308
         * @psalm-suppress ImpureStaticVariable
2309
         *
2310
         * @var array<mixed>|null
2311
         */
2312 46
        static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null;
2313
2314 46
        if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) {
2315 1
            if (self::$BROKEN_UTF8_FIX === null) {
2316 1
                self::$BROKEN_UTF8_FIX = self::getData('utf8_fix');
2317
            }
2318
2319 1
            $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = \array_keys(self::$BROKEN_UTF8_FIX ?: []);
2320 1
            $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = self::$BROKEN_UTF8_FIX;
2321
        }
2322
2323
        \assert(\is_array($BROKEN_UTF8_TO_UTF8_VALUES_CACHE));
2324
2325 46
        return \str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str);
2326
    }
2327
2328
    /**
2329
     * Fix a double (or multiple) encoded UTF8 string.
2330
     *
2331
     * EXAMPLE: <code>UTF8::fix_utf8('Fédération'); // 'Fédération'</code>
2332
     *
2333
     * @param string|string[] $str you can use a string or an array of strings
2334
     *
2335
     * @psalm-pure
2336
     *
2337
     * @return string|string[]
2338
     *                         <p>Will return the fixed input-"array" or
2339
     *                         the fixed input-"string".</p>
2340
     *
2341
     * @template TFixUtf8
2342
     * @phpstan-param TFixUtf8 $str
2343
     * @phpstan-return TFixUtf8
2344
     */
2345 2
    public static function fix_utf8($str)
2346
    {
2347 2
        if (\is_array($str)) {
2348 2
            foreach ($str as &$v) {
2349 2
                $v = self::fix_utf8($v);
2350
            }
2351 2
            unset($v);
2352
2353
            /**
2354
             * @psalm-suppress InvalidReturnStatement
2355
             */
2356 2
            return $str;
2357
        }
2358
2359 2
        $str = (string) $str;
2360 2
        $last = '';
2361 2
        while ($last !== $str) {
2362 2
            $last = $str;
2363
            /**
2364
             * @psalm-suppress PossiblyInvalidArgument
2365
             */
2366 2
            $str = self::to_utf8(
2367 2
                self::utf8_decode($str, true)
2368
            );
2369
        }
2370
2371
        /**
2372
         * @psalm-suppress InvalidReturnStatement
2373
         */
2374 2
        return $str;
2375
    }
2376
2377
    /**
2378
     * Get character of a specific character.
2379
     *
2380
     * EXAMPLE: <code>UTF8::getCharDirection('ا'); // 'RTL'</code>
2381
     *
2382
     * @param string $char
2383
     *
2384
     * @psalm-pure
2385
     *
2386
     * @return string
2387
     *                <p>'RTL' or 'LTR'.</p>
2388
     */
2389 2
    public static function getCharDirection(string $char): string
2390
    {
2391 2
        if (self::$SUPPORT['intlChar'] === true) {
2392 2
            $tmp_return = \IntlChar::charDirection($char);
2393
2394
            // from "IntlChar"-Class
2395
            $char_direction = [
2396 2
                'RTL' => [1, 13, 14, 15, 21],
2397
                'LTR' => [0, 11, 12, 20],
2398
            ];
2399
2400 2
            if (\in_array($tmp_return, $char_direction['LTR'], true)) {
2401
                return 'LTR';
2402
            }
2403
2404 2
            if (\in_array($tmp_return, $char_direction['RTL'], true)) {
2405 2
                return 'RTL';
2406
            }
2407
        }
2408
2409 2
        $c = static::chr_to_decimal($char);
2410
2411 2
        if (!($c >= 0x5be && $c <= 0x10b7f)) {
2412 2
            return 'LTR';
2413
        }
2414
2415 2
        if ($c <= 0x85e) {
2416 2
            if ($c === 0x5be ||
2417 2
                $c === 0x5c0 ||
2418 2
                $c === 0x5c3 ||
2419 2
                $c === 0x5c6 ||
2420 2
                ($c >= 0x5d0 && $c <= 0x5ea) ||
2421 2
                ($c >= 0x5f0 && $c <= 0x5f4) ||
2422 2
                $c === 0x608 ||
2423 2
                $c === 0x60b ||
2424 2
                $c === 0x60d ||
2425 2
                $c === 0x61b ||
2426 2
                ($c >= 0x61e && $c <= 0x64a) ||
2427
                ($c >= 0x66d && $c <= 0x66f) ||
2428
                ($c >= 0x671 && $c <= 0x6d5) ||
2429
                ($c >= 0x6e5 && $c <= 0x6e6) ||
2430
                ($c >= 0x6ee && $c <= 0x6ef) ||
2431
                ($c >= 0x6fa && $c <= 0x70d) ||
2432
                $c === 0x710 ||
2433
                ($c >= 0x712 && $c <= 0x72f) ||
2434
                ($c >= 0x74d && $c <= 0x7a5) ||
2435
                $c === 0x7b1 ||
2436
                ($c >= 0x7c0 && $c <= 0x7ea) ||
2437
                ($c >= 0x7f4 && $c <= 0x7f5) ||
2438
                $c === 0x7fa ||
2439
                ($c >= 0x800 && $c <= 0x815) ||
2440
                $c === 0x81a ||
2441
                $c === 0x824 ||
2442
                $c === 0x828 ||
2443
                ($c >= 0x830 && $c <= 0x83e) ||
2444
                ($c >= 0x840 && $c <= 0x858) ||
2445 2
                $c === 0x85e
2446
            ) {
2447 2
                return 'RTL';
2448
            }
2449 2
        } elseif ($c === 0x200f) {
2450
            return 'RTL';
2451 2
        } elseif ($c >= 0xfb1d) {
2452 2
            if ($c === 0xfb1d ||
2453 2
                ($c >= 0xfb1f && $c <= 0xfb28) ||
2454 2
                ($c >= 0xfb2a && $c <= 0xfb36) ||
2455 2
                ($c >= 0xfb38 && $c <= 0xfb3c) ||
2456 2
                $c === 0xfb3e ||
2457 2
                ($c >= 0xfb40 && $c <= 0xfb41) ||
2458 2
                ($c >= 0xfb43 && $c <= 0xfb44) ||
2459 2
                ($c >= 0xfb46 && $c <= 0xfbc1) ||
2460 2
                ($c >= 0xfbd3 && $c <= 0xfd3d) ||
2461 2
                ($c >= 0xfd50 && $c <= 0xfd8f) ||
2462 2
                ($c >= 0xfd92 && $c <= 0xfdc7) ||
2463 2
                ($c >= 0xfdf0 && $c <= 0xfdfc) ||
2464 2
                ($c >= 0xfe70 && $c <= 0xfe74) ||
2465 2
                ($c >= 0xfe76 && $c <= 0xfefc) ||
2466 2
                ($c >= 0x10800 && $c <= 0x10805) ||
2467 2
                $c === 0x10808 ||
2468 2
                ($c >= 0x1080a && $c <= 0x10835) ||
2469 2
                ($c >= 0x10837 && $c <= 0x10838) ||
2470 2
                $c === 0x1083c ||
2471 2
                ($c >= 0x1083f && $c <= 0x10855) ||
2472 2
                ($c >= 0x10857 && $c <= 0x1085f) ||
2473 2
                ($c >= 0x10900 && $c <= 0x1091b) ||
2474 2
                ($c >= 0x10920 && $c <= 0x10939) ||
2475 2
                $c === 0x1093f ||
2476 2
                $c === 0x10a00 ||
2477 2
                ($c >= 0x10a10 && $c <= 0x10a13) ||
2478 2
                ($c >= 0x10a15 && $c <= 0x10a17) ||
2479 2
                ($c >= 0x10a19 && $c <= 0x10a33) ||
2480 2
                ($c >= 0x10a40 && $c <= 0x10a47) ||
2481 2
                ($c >= 0x10a50 && $c <= 0x10a58) ||
2482 2
                ($c >= 0x10a60 && $c <= 0x10a7f) ||
2483 2
                ($c >= 0x10b00 && $c <= 0x10b35) ||
2484 2
                ($c >= 0x10b40 && $c <= 0x10b55) ||
2485 2
                ($c >= 0x10b58 && $c <= 0x10b72) ||
2486 2
                ($c >= 0x10b78)
2487
            ) {
2488 2
                return 'RTL';
2489
            }
2490
        }
2491
2492 2
        return 'LTR';
2493
    }
2494
2495
    /**
2496
     * Check for php-support.
2497
     *
2498
     * @param string|null $key
2499
     *
2500
     * @psalm-pure
2501
     *
2502
     * @return mixed
2503
     *               Return the full support-"array", if $key === null<br>
2504
     *               return bool-value, if $key is used and available<br>
2505
     *               otherwise return <strong>null</strong>
2506
     */
2507 27
    public static function getSupportInfo(string $key = null)
2508
    {
2509 27
        if ($key === null) {
2510 4
            return self::$SUPPORT;
2511
        }
2512
2513 25
        if (self::$INTL_TRANSLITERATOR_LIST === null) {
2514 1
            self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
2515
        }
2516
        // compatibility fix for old versions
2517 25
        self::$SUPPORT['intl__transliterator_list_ids'] = self::$INTL_TRANSLITERATOR_LIST;
2518
2519 25
        return self::$SUPPORT[$key] ?? null;
2520
    }
2521
2522
    /**
2523
     * Warning: this method only works for some file-types (png, jpg)
2524
     *          if you need more supported types, please use e.g. "finfo"
2525
     *
2526
     * @param string $str
2527
     * @param array  $fallback <p>with this keys: 'ext', 'mime', 'type'
2528
     *
2529
     * @psalm-pure
2530
     *
2531
     * @return null[]|string[]
2532
     *                         <p>with this keys: 'ext', 'mime', 'type'</p>
2533
     *
2534
     * @phpstan-param array{ext: null|string, mime: null|string, type: null|string} $fallback
2535
     */
2536 39
    public static function get_file_type(
2537
        string $str,
2538
        array $fallback = [
2539
            'ext'  => null,
2540
            'mime' => 'application/octet-stream',
2541
            'type' => null,
2542
        ]
2543
    ): array {
2544 39
        if ($str === '') {
2545
            return $fallback;
2546
        }
2547
2548
        /** @var false|string $str_info - needed for PhpStan (stubs error) */
2549 39
        $str_info = \substr($str, 0, 2);
2550 39
        if ($str_info === false || \strlen($str_info) !== 2) {
2551 10
            return $fallback;
2552
        }
2553
2554
        // DEBUG
2555
        //var_dump($str_info);
2556
2557 36
        $str_info = \unpack('C2chars', $str_info);
2558
2559 36
        if ($str_info === false) {
2560
            return $fallback;
2561
        }
2562 36
        $type_code = (int) ($str_info['chars1'] . $str_info['chars2']);
2563
2564
        // DEBUG
2565
        //var_dump($type_code);
2566
2567
        //
2568
        // info: https://en.wikipedia.org/wiki/Magic_number_%28programming%29#Format_indicator
2569
        //
2570
        switch ($type_code) {
2571
            // WARNING: do not add too simple comparisons, because of false-positive results:
2572
            //
2573
            // 3780 => 'pdf', 7790 => 'exe', 7784 => 'midi', 8075 => 'zip',
2574
            // 8297 => 'rar', 7173 => 'gif', 7373 => 'tiff' 6677 => 'bmp', ...
2575
            //
2576 36
            case 255216:
2577
                $ext = 'jpg';
2578
                $mime = 'image/jpeg';
2579
                $type = 'binary';
2580
2581
                break;
2582 36
            case 13780:
2583 7
                $ext = 'png';
2584 7
                $mime = 'image/png';
2585 7
                $type = 'binary';
2586
2587 7
                break;
2588
            default:
2589 35
                return $fallback;
2590
        }
2591
2592
        return [
2593 7
            'ext'  => $ext,
2594 7
            'mime' => $mime,
2595 7
            'type' => $type,
2596
        ];
2597
    }
2598
2599
    /**
2600
     * @param int    $length         <p>Length of the random string.</p>
2601
     * @param string $possible_chars [optional] <p>Characters string for the random selection.</p>
2602
     * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
2603
     *
2604
     * @return string
2605
     */
2606 1
    public static function get_random_string(
2607
        int $length,
2608
        string $possible_chars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789',
2609
        string $encoding = 'UTF-8'
2610
    ): string {
2611
        // init
2612 1
        $i = 0;
2613 1
        $str = '';
2614
2615
        //
2616
        // add random chars
2617
        //
2618
2619 1
        if ($encoding === 'UTF-8') {
2620 1
            $max_length = (int) \mb_strlen($possible_chars);
2621 1
            if ($max_length === 0) {
2622 1
                return '';
2623
            }
2624
2625 1
            while ($i < $length) {
2626
                try {
2627 1
                    $rand_int = \random_int(0, $max_length - 1);
2628
                } catch (\Exception $e) {
2629
                    $rand_int = \mt_rand(0, $max_length - 1);
2630
                }
2631 1
                $char = \mb_substr($possible_chars, $rand_int, 1);
2632 1
                if ($char !== false) {
2633 1
                    $str .= $char;
2634 1
                    ++$i;
2635
                }
2636
            }
2637
        } else {
2638
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2639
2640
            $max_length = (int) self::strlen($possible_chars, $encoding);
2641
            if ($max_length === 0) {
2642
                return '';
2643
            }
2644
2645
            while ($i < $length) {
2646
                try {
2647
                    $rand_int = \random_int(0, $max_length - 1);
2648
                } catch (\Exception $e) {
2649
                    $rand_int = \mt_rand(0, $max_length - 1);
2650
                }
2651
                $char = self::substr($possible_chars, $rand_int, 1, $encoding);
2652
                if ($char !== false) {
2653
                    $str .= $char;
2654
                    ++$i;
2655
                }
2656
            }
2657
        }
2658
2659 1
        return $str;
2660
    }
2661
2662
    /**
2663
     * @param int|string $extra_entropy [optional] <p>Extra entropy via a string or int value.</p>
2664
     * @param bool       $use_md5       [optional] <p>Return the unique identifier as md5-hash? Default: true</p>
2665
     *
2666
     * @return string
2667
     */
2668 1
    public static function get_unique_string($extra_entropy = '', bool $use_md5 = true): string
2669
    {
2670
        try {
2671 1
            $rand_int = \random_int(0, \mt_getrandmax());
2672
        } catch (\Exception $e) {
2673
            $rand_int = \mt_rand(0, \mt_getrandmax());
2674
        }
2675
2676
        $unique_helper = $rand_int .
2677 1
                         \session_id() .
2678 1
                         ($_SERVER['REMOTE_ADDR'] ?? '') .
2679 1
                         ($_SERVER['SERVER_ADDR'] ?? '') .
2680 1
                         $extra_entropy;
2681
2682 1
        $unique_string = \uniqid($unique_helper, true);
2683
2684 1
        if ($use_md5) {
2685 1
            $unique_string = \md5($unique_string . $unique_helper);
2686
        }
2687
2688 1
        return $unique_string;
2689
    }
2690
2691
    /**
2692
     * Returns true if the string contains a lower case char, false otherwise.
2693
     *
2694
     * @param string $str <p>The input string.</p>
2695
     *
2696
     * @psalm-pure
2697
     *
2698
     * @return bool
2699
     *              <p>Whether or not the string contains a lower case character.</p>
2700
     */
2701 47
    public static function has_lowercase(string $str): bool
2702
    {
2703 47
        if (self::$SUPPORT['mbstring'] === true) {
2704 47
            return \mb_ereg_match('.*[[:lower:]]', $str);
2705
        }
2706
2707
        return self::str_matches_pattern($str, '.*[[:lower:]]');
2708
    }
2709
2710
    /**
2711
     * Returns true if the string contains whitespace, false otherwise.
2712
     *
2713
     * @param string $str <p>The input string.</p>
2714
     *
2715
     * @psalm-pure
2716
     *
2717
     * @return bool
2718
     *              <p>Whether or not the string contains whitespace.</p>
2719
     */
2720 11
    public static function has_whitespace(string $str): bool
2721
    {
2722 11
        if (self::$SUPPORT['mbstring'] === true) {
2723 11
            return \mb_ereg_match('.*[[:space:]]', $str);
2724
        }
2725
2726
        return self::str_matches_pattern($str, '.*[[:space:]]');
2727
    }
2728
2729
    /**
2730
     * Returns true if the string contains an upper case char, false otherwise.
2731
     *
2732
     * @param string $str <p>The input string.</p>
2733
     *
2734
     * @psalm-pure
2735
     *
2736
     * @return bool
2737
     *              <p>Whether or not the string contains an upper case character.</p>
2738
     */
2739 12
    public static function has_uppercase(string $str): bool
2740
    {
2741 12
        if (self::$SUPPORT['mbstring'] === true) {
2742 12
            return \mb_ereg_match('.*[[:upper:]]', $str);
2743
        }
2744
2745
        return self::str_matches_pattern($str, '.*[[:upper:]]');
2746
    }
2747
2748
    /**
2749
     * Converts a hexadecimal value into a UTF-8 character.
2750
     *
2751
     * INFO: opposite to UTF8::chr_to_hex()
2752
     *
2753
     * EXAMPLE: <code>UTF8::hex_to_chr('U+00a7'); // '§'</code>
2754
     *
2755
     * @param string $hexdec <p>The hexadecimal value.</p>
2756
     *
2757
     * @psalm-pure
2758
     *
2759
     * @return false|string one single UTF-8 character
2760
     */
2761 4
    public static function hex_to_chr(string $hexdec)
2762
    {
2763
        /** @noinspection PhpUsageOfSilenceOperatorInspection - Invalid characters passed for attempted conversion, these have been ignored */
2764 4
        return self::decimal_to_chr((int) @\hexdec($hexdec));
2765
    }
2766
2767
    /**
2768
     * Converts hexadecimal U+xxxx code point representation to integer.
2769
     *
2770
     * INFO: opposite to UTF8::int_to_hex()
2771
     *
2772
     * EXAMPLE: <code>UTF8::hex_to_int('U+00f1'); // 241</code>
2773
     *
2774
     * @param string $hexdec <p>The hexadecimal code point representation.</p>
2775
     *
2776
     * @psalm-pure
2777
     *
2778
     * @return false|int
2779
     *                   <p>The code point, or false on failure.</p>
2780
     */
2781 2
    public static function hex_to_int($hexdec)
2782
    {
2783
        // init
2784 2
        $hexdec = (string) $hexdec;
2785
2786 2
        if ($hexdec === '') {
2787 2
            return false;
2788
        }
2789
2790 2
        if (\preg_match('/^(?:\\\u|U\+|)([a-zA-Z0-9]{4,6})$/', $hexdec, $match)) {
2791 2
            return \intval($match[1], 16);
2792
        }
2793
2794 2
        return false;
2795
    }
2796
2797
    /**
2798
     * Converts a UTF-8 string to a series of HTML numbered entities.
2799
     *
2800
     * INFO: opposite to UTF8::html_decode()
2801
     *
2802
     * EXAMPLE: <code>UTF8::html_encode('中文空白'); // '&#20013;&#25991;&#31354;&#30333;'</code>
2803
     *
2804
     * @param string $str              <p>The Unicode string to be encoded as numbered entities.</p>
2805
     * @param bool   $keep_ascii_chars [optional] <p>Keep ASCII chars.</p>
2806
     * @param string $encoding         [optional] <p>Set the charset for e.g. "mb_" function</p>
2807
     *
2808
     * @psalm-pure
2809
     *
2810
     * @return string HTML numbered entities
2811
     */
2812 14
    public static function html_encode(
2813
        string $str,
2814
        bool $keep_ascii_chars = false,
2815
        string $encoding = 'UTF-8'
2816
    ): string {
2817 14
        if ($str === '') {
2818 4
            return '';
2819
        }
2820
2821 14
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2822 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2823
        }
2824
2825
        // INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2826 14
        if (self::$SUPPORT['mbstring'] === true) {
2827 14
            if ($keep_ascii_chars) {
2828 13
                $start_code = 0x80;
2829
            } else {
2830 3
                $start_code = 0x00;
2831
            }
2832
2833 14
            if ($encoding === 'UTF-8') {
2834
                /** @var false|string|null $return - needed for PhpStan (stubs error) */
2835 14
                $return = \mb_encode_numericentity(
0 ignored issues
show
Bug introduced by
The call to mb_encode_numericentity() has too few arguments starting with encoding. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

2835
                $return = /** @scrutinizer ignore-call */ \mb_encode_numericentity(

This check compares calls to functions or methods with their respective definitions. If the call has less arguments than are defined, it raises an issue.

If a function is defined several times with a different number of parameters, the check may pick up the wrong definition and report false positives. One codebase where this has been known to happen is Wordpress. Please note the @ignore annotation hint above.

Loading history...
2836 14
                    $str,
2837 14
                    [$start_code, 0xfffff, 0, 0xfffff]
2838
                );
2839 14
                if ($return !== null && $return !== false) {
2840 14
                    return $return;
2841
                }
2842
            }
2843
2844
            /** @var false|string|null $return - needed for PhpStan (stubs error) */
2845 4
            $return = \mb_encode_numericentity(
2846 4
                $str,
2847 4
                [$start_code, 0xfffff, 0, 0xfffff],
2848 4
                $encoding
2849
            );
2850 4
            if ($return !== null && $return !== false) {
2851 4
                return $return;
2852
            }
2853
        }
2854
2855
        //
2856
        // fallback via vanilla php
2857
        //
2858
2859
        return \implode(
2860
            '',
2861
            \array_map(
2862
                static function (string $chr) use ($keep_ascii_chars, $encoding): string {
2863
                    return self::single_chr_html_encode($chr, $keep_ascii_chars, $encoding);
2864
                },
2865
                self::str_split($str)
2866
            )
2867
        );
2868
    }
2869
2870
    /**
2871
     * UTF-8 version of html_entity_decode()
2872
     *
2873
     * The reason we are not using html_entity_decode() by itself is because
2874
     * while it is not technically correct to leave out the semicolon
2875
     * at the end of an entity most browsers will still interpret the entity
2876
     * correctly. html_entity_decode() does not convert entities without
2877
     * semicolons, so we are left with our own little solution here. Bummer.
2878
     *
2879
     * Convert all HTML entities to their applicable characters.
2880
     *
2881
     * INFO: opposite to UTF8::html_encode()
2882
     *
2883
     * EXAMPLE: <code>UTF8::html_entity_decode('&#20013;&#25991;&#31354;&#30333;'); // '中文空白'</code>
2884
     *
2885
     * @see http://php.net/manual/en/function.html-entity-decode.php
2886
     *
2887
     * @param string   $str      <p>
2888
     *                           The input string.
2889
     *                           </p>
2890
     * @param int|null $flags    [optional] <p>
2891
     *                           A bitmask of one or more of the following flags, which specify how to handle quotes
2892
     *                           and which document type to use. The default is ENT_COMPAT | ENT_HTML401.
2893
     *                           <table>
2894
     *                           Available <i>flags</i> constants
2895
     *                           <tr valign="top">
2896
     *                           <td>Constant Name</td>
2897
     *                           <td>Description</td>
2898
     *                           </tr>
2899
     *                           <tr valign="top">
2900
     *                           <td><b>ENT_COMPAT</b></td>
2901
     *                           <td>Will convert double-quotes and leave single-quotes alone.</td>
2902
     *                           </tr>
2903
     *                           <tr valign="top">
2904
     *                           <td><b>ENT_QUOTES</b></td>
2905
     *                           <td>Will convert both double and single quotes.</td>
2906
     *                           </tr>
2907
     *                           <tr valign="top">
2908
     *                           <td><b>ENT_NOQUOTES</b></td>
2909
     *                           <td>Will leave both double and single quotes unconverted.</td>
2910
     *                           </tr>
2911
     *                           <tr valign="top">
2912
     *                           <td><b>ENT_HTML401</b></td>
2913
     *                           <td>
2914
     *                           Handle code as HTML 4.01.
2915
     *                           </td>
2916
     *                           </tr>
2917
     *                           <tr valign="top">
2918
     *                           <td><b>ENT_XML1</b></td>
2919
     *                           <td>
2920
     *                           Handle code as XML 1.
2921
     *                           </td>
2922
     *                           </tr>
2923
     *                           <tr valign="top">
2924
     *                           <td><b>ENT_XHTML</b></td>
2925
     *                           <td>
2926
     *                           Handle code as XHTML.
2927
     *                           </td>
2928
     *                           </tr>
2929
     *                           <tr valign="top">
2930
     *                           <td><b>ENT_HTML5</b></td>
2931
     *                           <td>
2932
     *                           Handle code as HTML 5.
2933
     *                           </td>
2934
     *                           </tr>
2935
     *                           </table>
2936
     *                           </p>
2937
     * @param string   $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
2938
     *
2939
     * @psalm-pure
2940
     *
2941
     * @return string the decoded string
2942
     */
2943 34
    public static function html_entity_decode(
2944
        string $str,
2945
        int $flags = null,
2946
        string $encoding = 'UTF-8'
2947
    ): string {
2948
        if (
2949 34
            !isset($str[3]) // examples: &; || &x;
2950
            ||
2951 34
            \strpos($str, '&') === false // no "&"
2952
        ) {
2953 23
            return $str;
2954
        }
2955
2956 34
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2957 9
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2958
        }
2959
2960 34
        if ($flags === null) {
2961 11
            $flags = \ENT_QUOTES | \ENT_HTML5;
2962
        }
2963
2964
        if (
2965 34
            $encoding !== 'UTF-8'
2966
            &&
2967 34
            $encoding !== 'ISO-8859-1'
2968
            &&
2969 34
            $encoding !== 'WINDOWS-1252'
2970
            &&
2971 34
            self::$SUPPORT['mbstring'] === false
2972
        ) {
2973
            /**
2974
             * @psalm-suppress ImpureFunctionCall - is is only a warning
2975
             */
2976
            \trigger_error('UTF8::html_entity_decode() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
2977
        }
2978
2979
        do {
2980 34
            $str_compare = $str;
2981
2982 34
            if (\strpos($str, '&') !== false) {
2983 34
                if (\strpos($str, '&#') !== false) {
2984
                    // decode also numeric & UTF16 two byte entities
2985 25
                    $str = (string) \preg_replace(
2986 25
                        '/(&#(?:x0*[0-9a-fA-F]{2,6}(?![0-9a-fA-F;])|(?:0*\d{2,6}(?![0-9;]))))/S',
2987 25
                        '$1;',
2988 25
                        $str
2989
                    );
2990
                }
2991
2992 34
                $str = \html_entity_decode(
2993 34
                    $str,
2994 34
                    $flags,
2995 34
                    $encoding
2996
                );
2997
            }
2998 34
        } while ($str_compare !== $str);
2999
3000 34
        return $str;
3001
    }
3002
3003
    /**
3004
     * Create a escape html version of the string via "UTF8::htmlspecialchars()".
3005
     *
3006
     * @param string $str
3007
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
3008
     *
3009
     * @psalm-pure
3010
     *
3011
     * @return string
3012
     */
3013 6
    public static function html_escape(string $str, string $encoding = 'UTF-8'): string
3014
    {
3015 6
        return self::htmlspecialchars(
3016 6
            $str,
3017 6
            \ENT_QUOTES | \ENT_SUBSTITUTE,
3018 6
            $encoding
3019
        );
3020
    }
3021
3022
    /**
3023
     * Remove empty html-tag.
3024
     *
3025
     * e.g.: <pre><tag></tag></pre>
3026
     *
3027
     * @param string $str
3028
     *
3029
     * @psalm-pure
3030
     *
3031
     * @return string
3032
     */
3033 1
    public static function html_stripe_empty_tags(string $str): string
3034
    {
3035 1
        return (string) \preg_replace(
3036 1
            '/<[^\\/>]*?>\\s*?<\\/[^>]*?>/u',
3037 1
            '',
3038 1
            $str
3039
        );
3040
    }
3041
3042
    /**
3043
     * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities().
3044
     *
3045
     * EXAMPLE: <code>UTF8::htmlentities('<白-öäü>'); // '&lt;&#30333;-&ouml;&auml;&uuml;&gt;'</code>
3046
     *
3047
     * @see http://php.net/manual/en/function.htmlentities.php
3048
     *
3049
     * @param string $str           <p>
3050
     *                              The input string.
3051
     *                              </p>
3052
     * @param int    $flags         [optional] <p>
3053
     *                              A bitmask of one or more of the following flags, which specify how to handle
3054
     *                              quotes, invalid code unit sequences and the used document type. The default is
3055
     *                              ENT_COMPAT | ENT_HTML401.
3056
     *                              <table>
3057
     *                              Available <i>flags</i> constants
3058
     *                              <tr valign="top">
3059
     *                              <td>Constant Name</td>
3060
     *                              <td>Description</td>
3061
     *                              </tr>
3062
     *                              <tr valign="top">
3063
     *                              <td><b>ENT_COMPAT</b></td>
3064
     *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
3065
     *                              </tr>
3066
     *                              <tr valign="top">
3067
     *                              <td><b>ENT_QUOTES</b></td>
3068
     *                              <td>Will convert both double and single quotes.</td>
3069
     *                              </tr>
3070
     *                              <tr valign="top">
3071
     *                              <td><b>ENT_NOQUOTES</b></td>
3072
     *                              <td>Will leave both double and single quotes unconverted.</td>
3073
     *                              </tr>
3074
     *                              <tr valign="top">
3075
     *                              <td><b>ENT_IGNORE</b></td>
3076
     *                              <td>
3077
     *                              Silently discard invalid code unit sequences instead of returning
3078
     *                              an empty string. Using this flag is discouraged as it
3079
     *                              may have security implications.
3080
     *                              </td>
3081
     *                              </tr>
3082
     *                              <tr valign="top">
3083
     *                              <td><b>ENT_SUBSTITUTE</b></td>
3084
     *                              <td>
3085
     *                              Replace invalid code unit sequences with a Unicode Replacement Character
3086
     *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty
3087
     *                              string.
3088
     *                              </td>
3089
     *                              </tr>
3090
     *                              <tr valign="top">
3091
     *                              <td><b>ENT_DISALLOWED</b></td>
3092
     *                              <td>
3093
     *                              Replace invalid code points for the given document type with a
3094
     *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
3095
     *                              (otherwise) instead of leaving them as is. This may be useful, for
3096
     *                              instance, to ensure the well-formedness of XML documents with
3097
     *                              embedded external content.
3098
     *                              </td>
3099
     *                              </tr>
3100
     *                              <tr valign="top">
3101
     *                              <td><b>ENT_HTML401</b></td>
3102
     *                              <td>
3103
     *                              Handle code as HTML 4.01.
3104
     *                              </td>
3105
     *                              </tr>
3106
     *                              <tr valign="top">
3107
     *                              <td><b>ENT_XML1</b></td>
3108
     *                              <td>
3109
     *                              Handle code as XML 1.
3110
     *                              </td>
3111
     *                              </tr>
3112
     *                              <tr valign="top">
3113
     *                              <td><b>ENT_XHTML</b></td>
3114
     *                              <td>
3115
     *                              Handle code as XHTML.
3116
     *                              </td>
3117
     *                              </tr>
3118
     *                              <tr valign="top">
3119
     *                              <td><b>ENT_HTML5</b></td>
3120
     *                              <td>
3121
     *                              Handle code as HTML 5.
3122
     *                              </td>
3123
     *                              </tr>
3124
     *                              </table>
3125
     *                              </p>
3126
     * @param string $encoding      [optional] <p>
3127
     *                              Like <b>htmlspecialchars</b>,
3128
     *                              <b>htmlentities</b> takes an optional third argument
3129
     *                              <i>encoding</i> which defines encoding used in
3130
     *                              conversion.
3131
     *                              Although this argument is technically optional, you are highly
3132
     *                              encouraged to specify the correct value for your code.
3133
     *                              </p>
3134
     * @param bool   $double_encode [optional] <p>
3135
     *                              When <i>double_encode</i> is turned off PHP will not
3136
     *                              encode existing html entities. The default is to convert everything.
3137
     *                              </p>
3138
     *
3139
     * @psalm-pure
3140
     *
3141
     * @return string
3142
     *                <p>
3143
     *                The encoded string.
3144
     *                <br><br>
3145
     *                If the input <i>string</i> contains an invalid code unit
3146
     *                sequence within the given <i>encoding</i> an empty string
3147
     *                will be returned, unless either the <b>ENT_IGNORE</b> or
3148
     *                <b>ENT_SUBSTITUTE</b> flags are set.
3149
     *                </p>
3150
     */
3151 9
    public static function htmlentities(
3152
        string $str,
3153
        int $flags = \ENT_COMPAT,
3154
        string $encoding = 'UTF-8',
3155
        bool $double_encode = true
3156
    ): string {
3157 9
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
3158 7
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
3159
        }
3160
3161 9
        $str = \htmlentities(
3162 9
            $str,
3163 9
            $flags,
3164 9
            $encoding,
3165 9
            $double_encode
3166
        );
3167
3168
        /**
3169
         * PHP doesn't replace a backslash to its html entity since this is something
3170
         * that's mostly used to escape characters when inserting in a database. Since
3171
         * we're using a decent database layer, we don't need this shit and we're replacing
3172
         * the double backslashes by its' html entity equivalent.
3173
         *
3174
         * https://github.com/forkcms/library/blob/master/spoon/filter/filter.php#L303
3175
         */
3176 9
        $str = \str_replace('\\', '&#92;', $str);
3177
3178 9
        return self::html_encode($str, true, $encoding);
3179
    }
3180
3181
    /**
3182
     * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
3183
     *
3184
     * INFO: Take a look at "UTF8::htmlentities()"
3185
     *
3186
     * EXAMPLE: <code>UTF8::htmlspecialchars('<白-öäü>'); // '&lt;白-öäü&gt;'</code>
3187
     *
3188
     * @see http://php.net/manual/en/function.htmlspecialchars.php
3189
     *
3190
     * @param string $str           <p>
3191
     *                              The string being converted.
3192
     *                              </p>
3193
     * @param int    $flags         [optional] <p>
3194
     *                              A bitmask of one or more of the following flags, which specify how to handle
3195
     *                              quotes, invalid code unit sequences and the used document type. The default is
3196
     *                              ENT_COMPAT | ENT_HTML401.
3197
     *                              <table>
3198
     *                              Available <i>flags</i> constants
3199
     *                              <tr valign="top">
3200
     *                              <td>Constant Name</td>
3201
     *                              <td>Description</td>
3202
     *                              </tr>
3203
     *                              <tr valign="top">
3204
     *                              <td><b>ENT_COMPAT</b></td>
3205
     *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
3206
     *                              </tr>
3207
     *                              <tr valign="top">
3208
     *                              <td><b>ENT_QUOTES</b></td>
3209
     *                              <td>Will convert both double and single quotes.</td>
3210
     *                              </tr>
3211
     *                              <tr valign="top">
3212
     *                              <td><b>ENT_NOQUOTES</b></td>
3213
     *                              <td>Will leave both double and single quotes unconverted.</td>
3214
     *                              </tr>
3215
     *                              <tr valign="top">
3216
     *                              <td><b>ENT_IGNORE</b></td>
3217
     *                              <td>
3218
     *                              Silently discard invalid code unit sequences instead of returning
3219
     *                              an empty string. Using this flag is discouraged as it
3220
     *                              may have security implications.
3221
     *                              </td>
3222
     *                              </tr>
3223
     *                              <tr valign="top">
3224
     *                              <td><b>ENT_SUBSTITUTE</b></td>
3225
     *                              <td>
3226
     *                              Replace invalid code unit sequences with a Unicode Replacement Character
3227
     *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty
3228
     *                              string.
3229
     *                              </td>
3230
     *                              </tr>
3231
     *                              <tr valign="top">
3232
     *                              <td><b>ENT_DISALLOWED</b></td>
3233
     *                              <td>
3234
     *                              Replace invalid code points for the given document type with a
3235
     *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
3236
     *                              (otherwise) instead of leaving them as is. This may be useful, for
3237
     *                              instance, to ensure the well-formedness of XML documents with
3238
     *                              embedded external content.
3239
     *                              </td>
3240
     *                              </tr>
3241
     *                              <tr valign="top">
3242
     *                              <td><b>ENT_HTML401</b></td>
3243
     *                              <td>
3244
     *                              Handle code as HTML 4.01.
3245
     *                              </td>
3246
     *                              </tr>
3247
     *                              <tr valign="top">
3248
     *                              <td><b>ENT_XML1</b></td>
3249
     *                              <td>
3250
     *                              Handle code as XML 1.
3251
     *                              </td>
3252
     *                              </tr>
3253
     *                              <tr valign="top">
3254
     *                              <td><b>ENT_XHTML</b></td>
3255
     *                              <td>
3256
     *                              Handle code as XHTML.
3257
     *                              </td>
3258
     *                              </tr>
3259
     *                              <tr valign="top">
3260
     *                              <td><b>ENT_HTML5</b></td>
3261
     *                              <td>
3262
     *                              Handle code as HTML 5.
3263
     *                              </td>
3264
     *                              </tr>
3265
     *                              </table>
3266
     *                              </p>
3267
     * @param string $encoding      [optional] <p>
3268
     *                              Defines encoding used in conversion.
3269
     *                              </p>
3270
     *                              <p>
3271
     *                              For the purposes of this function, the encodings
3272
     *                              ISO-8859-1, ISO-8859-15,
3273
     *                              UTF-8, cp866,
3274
     *                              cp1251, cp1252, and
3275
     *                              KOI8-R are effectively equivalent, provided the
3276
     *                              <i>string</i> itself is valid for the encoding, as
3277
     *                              the characters affected by <b>htmlspecialchars</b> occupy
3278
     *                              the same positions in all of these encodings.
3279
     *                              </p>
3280
     * @param bool   $double_encode [optional] <p>
3281
     *                              When <i>double_encode</i> is turned off PHP will not
3282
     *                              encode existing html entities, the default is to convert everything.
3283
     *                              </p>
3284
     *
3285
     * @psalm-pure
3286
     *
3287
     * @return string the converted string.
3288
     *                </p>
3289
     *                <p>
3290
     *                If the input <i>string</i> contains an invalid code unit
3291
     *                sequence within the given <i>encoding</i> an empty string
3292
     *                will be returned, unless either the <b>ENT_IGNORE</b> or
3293
     *                <b>ENT_SUBSTITUTE</b> flags are set
3294
     */
3295 8
    public static function htmlspecialchars(
3296
        string $str,
3297
        int $flags = \ENT_COMPAT,
3298
        string $encoding = 'UTF-8',
3299
        bool $double_encode = true
3300
    ): string {
3301 8
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
3302 8
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
3303
        }
3304
3305 8
        return \htmlspecialchars(
3306 8
            $str,
3307 8
            $flags,
3308 8
            $encoding,
3309 8
            $double_encode
3310
        );
3311
    }
3312
3313
    /**
3314
     * Checks whether iconv is available on the server.
3315
     *
3316
     * @psalm-pure
3317
     *
3318
     * @return bool
3319
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
3320
     *
3321
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
3322
     */
3323
    public static function iconv_loaded(): bool
3324
    {
3325
        return \extension_loaded('iconv');
3326
    }
3327
3328
    /**
3329
     * Converts Integer to hexadecimal U+xxxx code point representation.
3330
     *
3331
     * INFO: opposite to UTF8::hex_to_int()
3332
     *
3333
     * EXAMPLE: <code>UTF8::int_to_hex(241); // 'U+00f1'</code>
3334
     *
3335
     * @param int    $int    <p>The integer to be converted to hexadecimal code point.</p>
3336
     * @param string $prefix [optional]
3337
     *
3338
     * @psalm-pure
3339
     *
3340
     * @return string the code point, or empty string on failure
3341
     */
3342 6
    public static function int_to_hex(int $int, string $prefix = 'U+'): string
3343
    {
3344 6
        $hex = \dechex($int);
3345
3346 6
        $hex = (\strlen($hex) < 4 ? \substr('0000' . $hex, -4) : $hex);
3347
3348 6
        return $prefix . $hex . '';
3349
    }
3350
3351
    /**
3352
     * Checks whether intl-char is available on the server.
3353
     *
3354
     * @psalm-pure
3355
     *
3356
     * @return bool
3357
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
3358
     *
3359
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
3360
     */
3361
    public static function intlChar_loaded(): bool
3362
    {
3363
        return \class_exists('IntlChar');
3364
    }
3365
3366
    /**
3367
     * Checks whether intl is available on the server.
3368
     *
3369
     * @psalm-pure
3370
     *
3371
     * @return bool
3372
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
3373
     *
3374
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
3375
     */
3376 5
    public static function intl_loaded(): bool
3377
    {
3378 5
        return \extension_loaded('intl');
3379
    }
3380
3381
    /**
3382
     * Returns true if the string contains only alphabetic chars, false otherwise.
3383
     *
3384
     * @param string $str <p>The input string.</p>
3385
     *
3386
     * @psalm-pure
3387
     *
3388
     * @return bool
3389
     *              <p>Whether or not $str contains only alphabetic chars.</p>
3390
     */
3391 10
    public static function is_alpha(string $str): bool
3392
    {
3393 10
        if (self::$SUPPORT['mbstring'] === true) {
3394 10
            return \mb_ereg_match('^[[:alpha:]]*$', $str);
3395
        }
3396
3397
        return self::str_matches_pattern($str, '^[[:alpha:]]*$');
3398
    }
3399
3400
    /**
3401
     * Returns true if the string contains only alphabetic and numeric chars, false otherwise.
3402
     *
3403
     * @param string $str <p>The input string.</p>
3404
     *
3405
     * @psalm-pure
3406
     *
3407
     * @return bool
3408
     *              <p>Whether or not $str contains only alphanumeric chars.</p>
3409
     */
3410 13
    public static function is_alphanumeric(string $str): bool
3411
    {
3412 13
        if (self::$SUPPORT['mbstring'] === true) {
3413 13
            return \mb_ereg_match('^[[:alnum:]]*$', $str);
3414
        }
3415
3416
        return self::str_matches_pattern($str, '^[[:alnum:]]*$');
3417
    }
3418
3419
    /**
3420
     * Returns true if the string contains only punctuation chars, false otherwise.
3421
     *
3422
     * @param string $str <p>The input string.</p>
3423
     *
3424
     * @psalm-pure
3425
     *
3426
     * @return bool
3427
     *              <p>Whether or not $str contains only punctuation chars.</p>
3428
     */
3429 10
    public static function is_punctuation(string $str): bool
3430
    {
3431 10
        return self::str_matches_pattern($str, '^[[:punct:]]*$');
3432
    }
3433
3434
    /**
3435
     * Returns true if the string contains only printable (non-invisible) chars, false otherwise.
3436
     *
3437
     * @param string $str                       <p>The input string.</p>
3438
     * @param bool   $ignore_control_characters [optional] <p>Ignore control characters like [LRM] or [LSEP].</p>
3439
     *
3440
     * @psalm-pure
3441
     *
3442
     * @return bool
3443
     *              <p>Whether or not $str contains only printable (non-invisible) chars.</p>
3444
     */
3445 1
    public static function is_printable(string $str, bool $ignore_control_characters = false): bool
3446
    {
3447 1
        return self::remove_invisible_characters($str, false, '', $ignore_control_characters) === $str;
3448
    }
3449
3450
    /**
3451
     * Checks if a string is 7 bit ASCII.
3452
     *
3453
     * EXAMPLE: <code>UTF8::is_ascii('白'); // false</code>
3454
     *
3455
     * @param string $str <p>The string to check.</p>
3456
     *
3457
     * @psalm-pure
3458
     *
3459
     * @return bool
3460
     *              <p>
3461
     *              <strong>true</strong> if it is ASCII<br>
3462
     *              <strong>false</strong> otherwise
3463
     *              </p>
3464
     */
3465 8
    public static function is_ascii(string $str): bool
3466
    {
3467 8
        return ASCII::is_ascii($str);
3468
    }
3469
3470
    /**
3471
     * Returns true if the string is base64 encoded, false otherwise.
3472
     *
3473
     * EXAMPLE: <code>UTF8::is_base64('4KSu4KWL4KSo4KS/4KSa'); // true</code>
3474
     *
3475
     * @param string|null $str                   <p>The input string.</p>
3476
     * @param bool        $empty_string_is_valid [optional] <p>Is an empty string valid base64 or not?</p>
3477
     *
3478
     * @psalm-pure
3479
     *
3480
     * @return bool
3481
     *              <p>Whether or not $str is base64 encoded.</p>
3482
     */
3483 16
    public static function is_base64($str, bool $empty_string_is_valid = false): bool
3484
    {
3485
        if (
3486 16
            !$empty_string_is_valid
3487
            &&
3488 16
            $str === ''
3489
        ) {
3490 3
            return false;
3491
        }
3492
3493 15
        if (!\is_string($str)) {
3494 2
            return false;
3495
        }
3496
3497 15
        $base64String = \base64_decode($str, true);
3498
3499 15
        return $base64String !== false && \base64_encode($base64String) === $str;
3500
    }
3501
3502
    /**
3503
     * Check if the input is binary... (is look like a hack).
3504
     *
3505
     * EXAMPLE: <code>UTF8::is_binary(01); // true</code>
3506
     *
3507
     * @param int|string $input
3508
     * @param bool       $strict
3509
     *
3510
     * @psalm-pure
3511
     *
3512
     * @return bool
3513
     */
3514 39
    public static function is_binary($input, bool $strict = false): bool
3515
    {
3516 39
        $input = (string) $input;
3517 39
        if ($input === '') {
3518 10
            return false;
3519
        }
3520
3521 39
        if (\preg_match('~^[01]+$~', $input)) {
3522 13
            return true;
3523
        }
3524
3525 39
        $ext = self::get_file_type($input);
3526 39
        if ($ext['type'] === 'binary') {
3527 7
            return true;
3528
        }
3529
3530 38
        if (!$strict) {
3531 7
            $test_length = \strlen($input);
3532 7
            $test_null_counting = \substr_count($input, "\x0", 0, $test_length);
3533 7
            if (($test_null_counting / $test_length) > 0.25) {
3534 5
                return true;
3535
            }
3536
        }
3537
3538 38
        if ($strict) {
3539 38
            if (self::$SUPPORT['finfo'] === false) {
3540
                throw new \RuntimeException('ext-fileinfo: is not installed');
3541
            }
3542
3543
            /**
3544
             * @psalm-suppress ImpureMethodCall - it will return the same result for the same file ...
3545
             */
3546 38
            $finfo_encoding = (new \finfo(\FILEINFO_MIME_ENCODING))->buffer($input);
3547 38
            if ($finfo_encoding && $finfo_encoding === 'binary') {
3548 20
                return true;
3549
            }
3550
        }
3551
3552 33
        return false;
3553
    }
3554
3555
    /**
3556
     * Check if the file is binary.
3557
     *
3558
     * EXAMPLE: <code>UTF8::is_binary('./utf32.txt'); // true</code>
3559
     *
3560
     * @param string $file
3561
     *
3562
     * @return bool
3563
     */
3564 6
    public static function is_binary_file($file): bool
3565
    {
3566
        // init
3567 6
        $block = '';
3568
3569 6
        $fp = \fopen($file, 'rb');
3570 6
        if (\is_resource($fp)) {
3571 6
            $block = \fread($fp, 512);
3572 6
            \fclose($fp);
3573
        }
3574
3575 6
        if ($block === '' || $block === false) {
3576 2
            return false;
3577
        }
3578
3579 6
        return self::is_binary($block, true);
3580
    }
3581
3582
    /**
3583
     * Returns true if the string contains only whitespace chars, false otherwise.
3584
     *
3585
     * @param string $str <p>The input string.</p>
3586
     *
3587
     * @psalm-pure
3588
     *
3589
     * @return bool
3590
     *              <p>Whether or not $str contains only whitespace characters.</p>
3591
     */
3592 15
    public static function is_blank(string $str): bool
3593
    {
3594 15
        if (self::$SUPPORT['mbstring'] === true) {
3595 15
            return \mb_ereg_match('^[[:space:]]*$', $str);
3596
        }
3597
3598
        return self::str_matches_pattern($str, '^[[:space:]]*$');
3599
    }
3600
3601
    /**
3602
     * Checks if the given string is equal to any "Byte Order Mark".
3603
     *
3604
     * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
3605
     *
3606
     * EXAMPLE: <code>UTF8::is_bom("\xef\xbb\xbf"); // true</code>
3607
     *
3608
     * @param string $str <p>The input string.</p>
3609
     *
3610
     * @psalm-pure
3611
     *
3612
     * @return bool
3613
     *              <p><strong>true</strong> if the $utf8_chr is Byte Order Mark, <strong>false</strong> otherwise.</p>
3614
     */
3615 2
    public static function is_bom($str): bool
3616
    {
3617
        /** @noinspection PhpUnusedLocalVariableInspection */
3618 2
        foreach (self::$BOM as $bom_string => &$bom_byte_length) {
3619 2
            if ($str === $bom_string) {
3620 2
                return true;
3621
            }
3622
        }
3623
3624 2
        return false;
3625
    }
3626
3627
    /**
3628
     * Determine whether the string is considered to be empty.
3629
     *
3630
     * A variable is considered empty if it does not exist or if its value equals FALSE.
3631
     * empty() does not generate a warning if the variable does not exist.
3632
     *
3633
     * @param array|float|int|string $str
3634
     *
3635
     * @psalm-pure
3636
     *
3637
     * @return bool
3638
     *              <p>Whether or not $str is empty().</p>
3639
     */
3640 1
    public static function is_empty($str): bool
3641
    {
3642 1
        return empty($str);
3643
    }
3644
3645
    /**
3646
     * Returns true if the string contains only hexadecimal chars, false otherwise.
3647
     *
3648
     * @param string $str <p>The input string.</p>
3649
     *
3650
     * @psalm-pure
3651
     *
3652
     * @return bool
3653
     *              <p>Whether or not $str contains only hexadecimal chars.</p>
3654
     */
3655 13
    public static function is_hexadecimal(string $str): bool
3656
    {
3657 13
        if (self::$SUPPORT['mbstring'] === true) {
3658 13
            return \mb_ereg_match('^[[:xdigit:]]*$', $str);
3659
        }
3660
3661
        return self::str_matches_pattern($str, '^[[:xdigit:]]*$');
3662
    }
3663
3664
    /**
3665
     * Check if the string contains any HTML tags.
3666
     *
3667
     * EXAMPLE: <code>UTF8::is_html('<b>lall</b>'); // true</code>
3668
     *
3669
     * @param string $str <p>The input string.</p>
3670
     *
3671
     * @psalm-pure
3672
     *
3673
     * @return bool
3674
     *              <p>Whether or not $str contains html elements.</p>
3675
     */
3676 3
    public static function is_html(string $str): bool
3677
    {
3678 3
        if ($str === '') {
3679 3
            return false;
3680
        }
3681
3682
        // init
3683 3
        $matches = [];
3684
3685 3
        $str = self::emoji_encode($str); // hack for emoji support :/
3686
3687 3
        \preg_match("/<\\/?\\w+(?:(?:\\s+\\w+(?:\\s*=\\s*(?:\".*?\"|'.*?'|[^'\">\\s]+))?)*\\s*|\\s*)\\/?>/u", $str, $matches);
3688
3689 3
        return $matches !== [];
3690
    }
3691
3692
    /**
3693
     * Check if $url is an correct url.
3694
     *
3695
     * @param string $url
3696
     * @param bool   $disallow_localhost
3697
     *
3698
     * @psalm-pure
3699
     *
3700
     * @return bool
3701
     */
3702 1
    public static function is_url(string $url, bool $disallow_localhost = false): bool
3703
    {
3704 1
        if ($url === '') {
3705 1
            return false;
3706
        }
3707
3708
        // WARNING: keep this as hack protection
3709 1
        if (!self::str_istarts_with_any($url, ['http://', 'https://'])) {
3710 1
            return false;
3711
        }
3712
3713
        // e.g. -> the server itself connect to "https://foo.localhost/phpmyadmin/...
3714 1
        if ($disallow_localhost) {
3715 1
            if (self::str_istarts_with_any(
3716 1
                $url,
3717
                [
3718 1
                    'http://localhost',
3719
                    'https://localhost',
3720
                    'http://127.0.0.1',
3721
                    'https://127.0.0.1',
3722
                    'http://::1',
3723
                    'https://::1',
3724
                ]
3725
            )) {
3726 1
                return false;
3727
            }
3728
3729 1
            $regex = '/^(?:http(?:s)?:\/\/).*?(?:\.localhost)/iu';
3730 1
            if (\preg_match($regex, $url)) {
3731 1
                return false;
3732
            }
3733
        }
3734
3735
        // INFO: this is needed for e.g. "http://müller.de/" (internationalized domain names) and non ASCII-parameters
3736 1
        $regex = '/^(?:http(?:s)?:\\/\\/)(?:[\p{L}0-9][\p{L}0-9_-]*(?:\\.[\p{L}0-9][\p{L}0-9_-]*))(?:\\d+)?(?:\\/\\.*)?/iu';
3737 1
        if (\preg_match($regex, $url)) {
3738 1
            return true;
3739
        }
3740
3741 1
        return \filter_var($url, \FILTER_VALIDATE_URL) !== false;
3742
    }
3743
3744
    /**
3745
     * Try to check if "$str" is a JSON-string.
3746
     *
3747
     * EXAMPLE: <code>UTF8::is_json('{"array":[1,"¥","ä"]}'); // true</code>
3748
     *
3749
     * @param string $str                                    <p>The input string.</p>
3750
     * @param bool   $only_array_or_object_results_are_valid [optional] <p>Only array and objects are valid json
3751
     *                                                       results.</p>
3752
     *
3753
     * @return bool
3754
     *              <p>Whether or not the $str is in JSON format.</p>
3755
     */
3756 42
    public static function is_json(string $str, bool $only_array_or_object_results_are_valid = true): bool
3757
    {
3758 42
        if ($str === '') {
3759 4
            return false;
3760
        }
3761
3762 40
        if (self::$SUPPORT['json'] === false) {
3763
            throw new \RuntimeException('ext-json: is not installed');
3764
        }
3765
3766 40
        $jsonOrNull = self::json_decode($str);
3767 40
        if ($jsonOrNull === null && \strtoupper($str) !== 'NULL') {
3768 18
            return false;
3769
        }
3770
3771
        if (
3772 24
            $only_array_or_object_results_are_valid
3773
            &&
3774 24
            !\is_object($jsonOrNull)
3775
            &&
3776 24
            !\is_array($jsonOrNull)
3777
        ) {
3778 5
            return false;
3779
        }
3780
3781 19
        return \json_last_error() === \JSON_ERROR_NONE;
3782
    }
3783
3784
    /**
3785
     * @param string $str <p>The input string.</p>
3786
     *
3787
     * @psalm-pure
3788
     *
3789
     * @return bool
3790
     *              <p>Whether or not $str contains only lowercase chars.</p>
3791
     */
3792 8
    public static function is_lowercase(string $str): bool
3793
    {
3794 8
        if (self::$SUPPORT['mbstring'] === true) {
3795 8
            return \mb_ereg_match('^[[:lower:]]*$', $str);
3796
        }
3797
3798
        return self::str_matches_pattern($str, '^[[:lower:]]*$');
3799
    }
3800
3801
    /**
3802
     * Returns true if the string is serialized, false otherwise.
3803
     *
3804
     * @param string $str <p>The input string.</p>
3805
     *
3806
     * @psalm-pure
3807
     *
3808
     * @return bool
3809
     *              <p>Whether or not $str is serialized.</p>
3810
     */
3811 7
    public static function is_serialized(string $str): bool
3812
    {
3813 7
        if ($str === '') {
3814 1
            return false;
3815
        }
3816
3817
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
3818
        /** @noinspection UnserializeExploitsInspection */
3819 6
        return $str === 'b:0;'
3820
               ||
3821 6
               @\unserialize($str, []) !== false;
3822
    }
3823
3824
    /**
3825
     * Returns true if the string contains only lower case chars, false
3826
     * otherwise.
3827
     *
3828
     * @param string $str <p>The input string.</p>
3829
     *
3830
     * @psalm-pure
3831
     *
3832
     * @return bool
3833
     *              <p>Whether or not $str contains only lower case characters.</p>
3834
     */
3835 8
    public static function is_uppercase(string $str): bool
3836
    {
3837 8
        if (self::$SUPPORT['mbstring'] === true) {
3838 8
            return \mb_ereg_match('^[[:upper:]]*$', $str);
3839
        }
3840
3841
        return self::str_matches_pattern($str, '^[[:upper:]]*$');
3842
    }
3843
3844
    /**
3845
     * Check if the string is UTF-16.
3846
     *
3847
     * EXAMPLE: <code>
3848
     * UTF8::is_utf16(file_get_contents('utf-16-le.txt')); // 1
3849
     * //
3850
     * UTF8::is_utf16(file_get_contents('utf-16-be.txt')); // 2
3851
     * //
3852
     * UTF8::is_utf16(file_get_contents('utf-8.txt')); // false
3853
     * </code>
3854
     *
3855
     * @param string $str                       <p>The input string.</p>
3856
     * @param bool   $check_if_string_is_binary
3857
     *
3858
     * @psalm-pure
3859
     *
3860
     * @return false|int
3861
     *                   <strong>false</strong> if is't not UTF-16,<br>
3862
     *                   <strong>1</strong> for UTF-16LE,<br>
3863
     *                   <strong>2</strong> for UTF-16BE
3864
     */
3865 21
    public static function is_utf16($str, bool $check_if_string_is_binary = true)
3866
    {
3867
        // init
3868 21
        $str = (string) $str;
3869 21
        $str_chars = [];
3870
3871
        // fix for the "binary"-check
3872 21
        if ($check_if_string_is_binary !== false && self::string_has_bom($str)) {
3873 2
            $check_if_string_is_binary = false;
3874
        }
3875
3876
        if (
3877 21
            $check_if_string_is_binary
3878
            &&
3879 21
            !self::is_binary($str, true)
3880
        ) {
3881 2
            return false;
3882
        }
3883
3884 21
        if (self::$SUPPORT['mbstring'] === false) {
3885
            /**
3886
             * @psalm-suppress ImpureFunctionCall - is is only a warning
3887
             */
3888 3
            \trigger_error('UTF8::is_utf16() without mbstring may did not work correctly', \E_USER_WARNING);
3889
        }
3890
3891 21
        $str = self::remove_bom($str);
3892
3893 21
        $maybe_utf16le = 0;
3894 21
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
3895 21
        if ($test) {
3896 15
            $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
3897 15
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
3898 15
            if ($test3 === $test) {
3899
                /**
3900
                 * @psalm-suppress RedundantCondition
3901
                 */
3902 15
                if ($str_chars === []) {
3903 15
                    $str_chars = self::count_chars($str, true, false);
3904
                }
3905 15
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
Bug introduced by
It seems like $test3 can also be of type array; however, parameter $str of voku\helper\UTF8::count_chars() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

3905
                foreach (self::count_chars(/** @scrutinizer ignore-type */ $test3) as $test3char => &$test3charEmpty) {
Loading history...
3906 15
                    if (\in_array($test3char, $str_chars, true)) {
3907 15
                        ++$maybe_utf16le;
3908
                    }
3909
                }
3910 15
                unset($test3charEmpty);
3911
            }
3912
        }
3913
3914 21
        $maybe_utf16be = 0;
3915 21
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
3916 21
        if ($test) {
3917 15
            $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
3918 15
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
3919 15
            if ($test3 === $test) {
3920 15
                if ($str_chars === []) {
3921 7
                    $str_chars = self::count_chars($str, true, false);
3922
                }
3923 15
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3924 15
                    if (\in_array($test3char, $str_chars, true)) {
3925 15
                        ++$maybe_utf16be;
3926
                    }
3927
                }
3928 15
                unset($test3charEmpty);
3929
            }
3930
        }
3931
3932 21
        if ($maybe_utf16be !== $maybe_utf16le) {
3933 7
            if ($maybe_utf16le > $maybe_utf16be) {
3934 5
                return 1;
3935
            }
3936
3937 6
            return 2;
3938
        }
3939
3940 17
        return false;
3941
    }
3942
3943
    /**
3944
     * Check if the string is UTF-32.
3945
     *
3946
     * EXAMPLE: <code>
3947
     * UTF8::is_utf32(file_get_contents('utf-32-le.txt')); // 1
3948
     * //
3949
     * UTF8::is_utf32(file_get_contents('utf-32-be.txt')); // 2
3950
     * //
3951
     * UTF8::is_utf32(file_get_contents('utf-8.txt')); // false
3952
     * </code>
3953
     *
3954
     * @param string $str                       <p>The input string.</p>
3955
     * @param bool   $check_if_string_is_binary
3956
     *
3957
     * @psalm-pure
3958
     *
3959
     * @return false|int
3960
     *                   <strong>false</strong> if is't not UTF-32,<br>
3961
     *                   <strong>1</strong> for UTF-32LE,<br>
3962
     *                   <strong>2</strong> for UTF-32BE
3963
     */
3964 19
    public static function is_utf32($str, bool $check_if_string_is_binary = true)
3965
    {
3966
        // init
3967 19
        $str = (string) $str;
3968 19
        $str_chars = [];
3969
3970
        // fix for the "binary"-check
3971 19
        if ($check_if_string_is_binary !== false && self::string_has_bom($str)) {
3972 2
            $check_if_string_is_binary = false;
3973
        }
3974
3975
        if (
3976 19
            $check_if_string_is_binary
3977
            &&
3978 19
            !self::is_binary($str, true)
3979
        ) {
3980 2
            return false;
3981
        }
3982
3983 19
        if (self::$SUPPORT['mbstring'] === false) {
3984
            /**
3985
             * @psalm-suppress ImpureFunctionCall - is is only a warning
3986
             */
3987 3
            \trigger_error('UTF8::is_utf32() without mbstring may did not work correctly', \E_USER_WARNING);
3988
        }
3989
3990 19
        $str = self::remove_bom($str);
3991
3992 19
        $maybe_utf32le = 0;
3993 19
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
3994 19
        if ($test) {
3995 13
            $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
3996 13
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
3997 13
            if ($test3 === $test) {
3998
                /**
3999
                 * @psalm-suppress RedundantCondition
4000
                 */
4001 13
                if ($str_chars === []) {
4002 13
                    $str_chars = self::count_chars($str, true, false);
4003
                }
4004 13
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
Bug introduced by
It seems like $test3 can also be of type array; however, parameter $str of voku\helper\UTF8::count_chars() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

4004
                foreach (self::count_chars(/** @scrutinizer ignore-type */ $test3) as $test3char => &$test3charEmpty) {
Loading history...
4005 13
                    if (\in_array($test3char, $str_chars, true)) {
4006 13
                        ++$maybe_utf32le;
4007
                    }
4008
                }
4009 13
                unset($test3charEmpty);
4010
            }
4011
        }
4012
4013 19
        $maybe_utf32be = 0;
4014 19
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
4015 19
        if ($test) {
4016 13
            $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
4017 13
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
4018 13
            if ($test3 === $test) {
4019 13
                if ($str_chars === []) {
4020 7
                    $str_chars = self::count_chars($str, true, false);
4021
                }
4022 13
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
4023 13
                    if (\in_array($test3char, $str_chars, true)) {
4024 13
                        ++$maybe_utf32be;
4025
                    }
4026
                }
4027 13
                unset($test3charEmpty);
4028
            }
4029
        }
4030
4031 19
        if ($maybe_utf32be !== $maybe_utf32le) {
4032 3
            if ($maybe_utf32le > $maybe_utf32be) {
4033 2
                return 1;
4034
            }
4035
4036 3
            return 2;
4037
        }
4038
4039 19
        return false;
4040
    }
4041
4042
    /**
4043
     * Checks whether the passed input contains only byte sequences that appear valid UTF-8.
4044
     *
4045
     * EXAMPLE: <code>
4046
     * UTF8::is_utf8(['Iñtërnâtiônàlizætiøn', 'foo']); // true
4047
     * //
4048
     * UTF8::is_utf8(["Iñtërnâtiônàlizætiøn\xA0\xA1", 'bar']); // false
4049
     * </code>
4050
     *
4051
     * @param int|string|string[]|null $str    <p>The input to be checked.</p>
4052
     * @param bool                     $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
4053
     *
4054
     * @psalm-pure
4055
     *
4056
     * @return bool
4057
     */
4058 83
    public static function is_utf8($str, bool $strict = false): bool
4059
    {
4060 83
        if (\is_array($str)) {
4061 2
            foreach ($str as &$v) {
4062 2
                if (!self::is_utf8($v, $strict)) {
4063 2
                    return false;
4064
                }
4065
            }
4066
4067
            return true;
4068
        }
4069
4070 83
        return self::is_utf8_string((string) $str, $strict);
4071
    }
4072
4073
    /**
4074
     * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
4075
     * Decodes a JSON string
4076
     *
4077
     * EXAMPLE: <code>UTF8::json_decode('[1,"\u00a5","\u00e4"]'); // array(1, '¥', 'ä')</code>
4078
     *
4079
     * @see http://php.net/manual/en/function.json-decode.php
4080
     *
4081
     * @param string $json    <p>
4082
     *                        The <i>json</i> string being decoded.
4083
     *                        </p>
4084
     *                        <p>
4085
     *                        This function only works with UTF-8 encoded strings.
4086
     *                        </p>
4087
     *                        <p>PHP implements a superset of
4088
     *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
4089
     *                        only supports these values when they are nested inside an array or an object.
4090
     *                        </p>
4091
     * @param bool   $assoc   [optional] <p>
4092
     *                        When <b>TRUE</b>, returned objects will be converted into
4093
     *                        associative arrays.
4094
     *                        </p>
4095
     * @param int    $depth   [optional] <p>
4096
     *                        User specified recursion depth.
4097
     *                        </p>
4098
     * @param int    $options [optional] <p>
4099
     *                        Bitmask of JSON decode options. Currently only
4100
     *                        <b>JSON_BIGINT_AS_STRING</b>
4101
     *                        is supported (default is to cast large integers as floats)
4102
     *                        </p>
4103
     *
4104
     * @psalm-pure
4105
     *
4106
     * @return mixed
4107
     *               <p>The value encoded in <i>json</i> in appropriate PHP type. Values true, false and
4108
     *               null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b> and <b>NULL</b> respectively.
4109
     *               <b>NULL</b> is returned if the <i>json</i> cannot be decoded or if the encoded data
4110
     *               is deeper than the recursion limit.</p>
4111
     */
4112 43
    public static function json_decode(
4113
        string $json,
4114
        bool $assoc = false,
4115
        int $depth = 512,
4116
        int $options = 0
4117
    ) {
4118 43
        $json = self::filter($json);
4119
4120 43
        if (self::$SUPPORT['json'] === false) {
4121
            throw new \RuntimeException('ext-json: is not installed');
4122
        }
4123
4124 43
        if ($depth < 1) {
4125
            $depth = 1;
4126
        }
4127
4128 43
        return \json_decode($json, $assoc, $depth, $options);
4129
    }
4130
4131
    /**
4132
     * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
4133
     * Returns the JSON representation of a value.
4134
     *
4135
     * EXAMPLE: <code>UTF8::json_enocde(array(1, '¥', 'ä')); // '[1,"\u00a5","\u00e4"]'</code>
4136
     *
4137
     * @see http://php.net/manual/en/function.json-encode.php
4138
     *
4139
     * @param mixed $value   <p>
4140
     *                       The <i>value</i> being encoded. Can be any type except
4141
     *                       a resource.
4142
     *                       </p>
4143
     *                       <p>
4144
     *                       All string data must be UTF-8 encoded.
4145
     *                       </p>
4146
     *                       <p>PHP implements a superset of
4147
     *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
4148
     *                       only supports these values when they are nested inside an array or an object.
4149
     *                       </p>
4150
     * @param int   $options [optional] <p>
4151
     *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
4152
     *                       <b>JSON_HEX_TAG</b>,
4153
     *                       <b>JSON_HEX_AMP</b>,
4154
     *                       <b>JSON_HEX_APOS</b>,
4155
     *                       <b>JSON_NUMERIC_CHECK</b>,
4156
     *                       <b>JSON_PRETTY_PRINT</b>,
4157
     *                       <b>JSON_UNESCAPED_SLASHES</b>,
4158
     *                       <b>JSON_FORCE_OBJECT</b>,
4159
     *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
4160
     *                       constants is described on
4161
     *                       the JSON constants page.
4162
     *                       </p>
4163
     * @param int   $depth   [optional] <p>
4164
     *                       Set the maximum depth. Must be greater than zero.
4165
     *                       </p>
4166
     *
4167
     * @psalm-pure
4168
     *
4169
     * @return false|string
4170
     *                      A JSON encoded <strong>string</strong> on success or<br>
4171
     *                      <strong>FALSE</strong> on failure
4172
     */
4173 5
    public static function json_encode($value, int $options = 0, int $depth = 512)
4174
    {
4175 5
        $value = self::filter($value);
4176
4177 5
        if (self::$SUPPORT['json'] === false) {
4178
            throw new \RuntimeException('ext-json: is not installed');
4179
        }
4180
4181 5
        if ($depth < 1) {
4182
            $depth = 1;
4183
        }
4184
4185 5
        return \json_encode($value, $options, $depth);
4186
    }
4187
4188
    /**
4189
     * Checks whether JSON is available on the server.
4190
     *
4191
     * @psalm-pure
4192
     *
4193
     * @return bool
4194
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
4195
     *
4196
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
4197
     */
4198
    public static function json_loaded(): bool
4199
    {
4200
        return \function_exists('json_decode');
4201
    }
4202
4203
    /**
4204
     * Makes string's first char lowercase.
4205
     *
4206
     * EXAMPLE: <code>UTF8::lcfirst('ÑTËRNÂTIÔNÀLIZÆTIØN'); // ñTËRNÂTIÔNÀLIZÆTIØN</code>
4207
     *
4208
     * @param string      $str                           <p>The input string</p>
4209
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
4210
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
4211
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
4212
     *                                                   tr</p>
4213
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
4214
     *                                                   -> ß</p>
4215
     *
4216
     * @psalm-pure
4217
     *
4218
     * @return string the resulting string
4219
     */
4220 46
    public static function lcfirst(
4221
        string $str,
4222
        string $encoding = 'UTF-8',
4223
        bool $clean_utf8 = false,
4224
        string $lang = null,
4225
        bool $try_to_keep_the_string_length = false
4226
    ): string {
4227 46
        if ($clean_utf8) {
4228
            $str = self::clean($str);
4229
        }
4230
4231 46
        $use_mb_functions = ($lang === null && !$try_to_keep_the_string_length);
4232
4233 46
        if ($encoding === 'UTF-8') {
4234 43
            $str_part_two = (string) \mb_substr($str, 1);
4235
4236 43
            if ($use_mb_functions) {
4237 43
                $str_part_one = \mb_strtolower(
4238 43
                    (string) \mb_substr($str, 0, 1)
4239
                );
4240
            } else {
4241
                $str_part_one = self::strtolower(
4242
                    (string) \mb_substr($str, 0, 1),
4243
                    $encoding,
4244
                    false,
4245
                    $lang,
4246 43
                    $try_to_keep_the_string_length
4247
                );
4248
            }
4249
        } else {
4250 3
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4251
4252 3
            $str_part_two = (string) self::substr($str, 1, null, $encoding);
4253
4254 3
            $str_part_one = self::strtolower(
4255 3
                (string) self::substr($str, 0, 1, $encoding),
4256 3
                $encoding,
4257 3
                false,
4258 3
                $lang,
4259 3
                $try_to_keep_the_string_length
4260
            );
4261
        }
4262
4263 46
        return $str_part_one . $str_part_two;
4264
    }
4265
4266
    /**
4267
     * Lowercase for all words in the string.
4268
     *
4269
     * @param string      $str                           <p>The input string.</p>
4270
     * @param string[]    $exceptions                    [optional] <p>Exclusion for some words.</p>
4271
     * @param string      $char_list                     [optional] <p>Additional chars that contains to words and do
4272
     *                                                   not start a new word.</p>
4273
     * @param string      $encoding                      [optional] <p>Set the charset.</p>
4274
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
4275
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
4276
     *                                                   tr</p>
4277
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
4278
     *                                                   -> ß</p>
4279
     *
4280
     * @psalm-pure
4281
     *
4282
     * @return string
4283
     */
4284 4
    public static function lcwords(
4285
        string $str,
4286
        array $exceptions = [],
4287
        string $char_list = '',
4288
        string $encoding = 'UTF-8',
4289
        bool $clean_utf8 = false,
4290
        string $lang = null,
4291
        bool $try_to_keep_the_string_length = false
4292
    ): string {
4293 4
        if (!$str) {
4294 2
            return '';
4295
        }
4296
4297 4
        $words = self::str_to_words($str, $char_list);
4298 4
        $use_exceptions = $exceptions !== [];
4299
4300 4
        $words_str = '';
4301 4
        foreach ($words as &$word) {
4302 4
            if (!$word) {
4303 4
                continue;
4304
            }
4305
4306
            if (
4307 4
                !$use_exceptions
4308
                ||
4309 4
                !\in_array($word, $exceptions, true)
4310
            ) {
4311 4
                $words_str .= self::lcfirst($word, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
4312
            } else {
4313 4
                $words_str .= $word;
4314
            }
4315
        }
4316
4317 4
        return $words_str;
4318
    }
4319
4320
    /**
4321
     * Strip whitespace or other characters from the beginning of a UTF-8 string.
4322
     *
4323
     * EXAMPLE: <code>UTF8::ltrim(' 中文空白  '); // '中文空白  '</code>
4324
     *
4325
     * @param string      $str   <p>The string to be trimmed</p>
4326
     * @param string|null $chars <p>Optional characters to be stripped</p>
4327
     *
4328
     * @psalm-pure
4329
     *
4330
     * @return string the string with unwanted characters stripped from the left
4331
     */
4332 23
    public static function ltrim(string $str = '', string $chars = null): string
4333
    {
4334 23
        if ($str === '') {
4335 3
            return '';
4336
        }
4337
4338 22
        if (self::$SUPPORT['mbstring'] === true) {
4339 22
            if ($chars !== null) {
4340
                /** @noinspection PregQuoteUsageInspection */
4341 11
                $chars = \preg_quote($chars);
4342 11
                $pattern = "^[${chars}]+";
4343
            } else {
4344 14
                $pattern = '^[\\s]+';
4345
            }
4346
4347 22
            return (string) \mb_ereg_replace($pattern, '', $str);
4348
        }
4349
4350
        if ($chars !== null) {
4351
            $chars = \preg_quote($chars, '/');
4352
            $pattern = "^[${chars}]+";
4353
        } else {
4354
            $pattern = '^[\\s]+';
4355
        }
4356
4357
        return self::regex_replace($str, $pattern, '');
4358
    }
4359
4360
    /**
4361
     * Returns the UTF-8 character with the maximum code point in the given data.
4362
     *
4363
     * EXAMPLE: <code>UTF8::max('abc-äöü-中文空白'); // 'ø'</code>
4364
     *
4365
     * @param array<string>|string $arg <p>A UTF-8 encoded string or an array of such strings.</p>
4366
     *
4367
     * @psalm-pure
4368
     *
4369
     * @return string|null the character with the highest code point than others, returns null on failure or empty input
4370
     */
4371 2
    public static function max($arg)
4372
    {
4373 2
        if (\is_array($arg)) {
4374 2
            $arg = \implode('', $arg);
4375
        }
4376
4377 2
        $codepoints = self::codepoints($arg);
4378 2
        if ($codepoints === []) {
4379 2
            return null;
4380
        }
4381
4382 2
        $codepoint_max = \max($codepoints);
4383
4384 2
        return self::chr((int) $codepoint_max);
4385
    }
4386
4387
    /**
4388
     * Calculates and returns the maximum number of bytes taken by any
4389
     * UTF-8 encoded character in the given string.
4390
     *
4391
     * EXAMPLE: <code>UTF8::max_chr_width('Intërnâtiônàlizætiøn'); // 2</code>
4392
     *
4393
     * @param string $str <p>The original Unicode string.</p>
4394
     *
4395
     * @psalm-pure
4396
     *
4397
     * @return int
4398
     *             <p>Max byte lengths of the given chars.</p>
4399
     */
4400 2
    public static function max_chr_width(string $str): int
4401
    {
4402 2
        $bytes = self::chr_size_list($str);
4403 2
        if ($bytes !== []) {
4404 2
            return (int) \max($bytes);
4405
        }
4406
4407 2
        return 0;
4408
    }
4409
4410
    /**
4411
     * Checks whether mbstring is available on the server.
4412
     *
4413
     * @psalm-pure
4414
     *
4415
     * @return bool
4416
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
4417
     *
4418
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
4419
     */
4420 28
    public static function mbstring_loaded(): bool
4421
    {
4422 28
        return \extension_loaded('mbstring');
4423
    }
4424
4425
    /**
4426
     * Returns the UTF-8 character with the minimum code point in the given data.
4427
     *
4428
     * EXAMPLE: <code>UTF8::min('abc-äöü-中文空白'); // '-'</code>
4429
     *
4430
     * @param string|string[] $arg <strong>A UTF-8 encoded string or an array of such strings.</strong>
4431
     *
4432
     * @psalm-pure
4433
     *
4434
     * @return string|null
4435
     *                     <p>The character with the lowest code point than others, returns null on failure or empty input.</p>
4436
     */
4437 2
    public static function min($arg)
4438
    {
4439 2
        if (\is_array($arg)) {
4440 2
            $arg = \implode('', $arg);
4441
        }
4442
4443 2
        $codepoints = self::codepoints($arg);
4444 2
        if ($codepoints === []) {
4445 2
            return null;
4446
        }
4447
4448 2
        $codepoint_min = \min($codepoints);
4449
4450 2
        return self::chr((int) $codepoint_min);
4451
    }
4452
4453
    /**
4454
     * Normalize the encoding-"name" input.
4455
     *
4456
     * EXAMPLE: <code>UTF8::normalize_encoding('UTF8'); // 'UTF-8'</code>
4457
     *
4458
     * @param mixed $encoding <p>e.g.: ISO, UTF8, WINDOWS-1251 etc.</p>
4459
     * @param mixed $fallback <p>e.g.: UTF-8</p>
4460
     *
4461
     * @psalm-pure
4462
     *
4463
     * @return mixed|string
4464
     *                      <p>e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.<br>Will return a empty string as fallback (by default)</p>
4465
     *
4466
     * @template TNormalizeEncodingFallback
4467
     * @phpstan-param string|TNormalizeEncodingFallback $fallback
4468
     * @phpstan-return string|TNormalizeEncodingFallback
4469
     */
4470 339
    public static function normalize_encoding($encoding, $fallback = '')
4471
    {
4472
        /**
4473
         * @psalm-suppress ImpureStaticVariable
4474
         *
4475
         * @var array<string,string>
4476
         */
4477 339
        static $STATIC_NORMALIZE_ENCODING_CACHE = [];
4478
4479
        // init
4480 339
        $encoding = (string) $encoding;
4481
4482 339
        if (!$encoding) {
4483 290
            return $fallback;
4484
        }
4485
4486
        if (
4487 53
            $encoding === 'UTF-8'
4488
            ||
4489 53
            $encoding === 'UTF8'
4490
        ) {
4491 29
            return 'UTF-8';
4492
        }
4493
4494
        if (
4495 44
            $encoding === '8BIT'
4496
            ||
4497 44
            $encoding === 'BINARY'
4498
        ) {
4499
            return 'CP850';
4500
        }
4501
4502
        if (
4503 44
            $encoding === 'HTML'
4504
            ||
4505 44
            $encoding === 'HTML-ENTITIES'
4506
        ) {
4507 2
            return 'HTML-ENTITIES';
4508
        }
4509
4510
        if (
4511 44
            $encoding === 'ISO'
4512
            ||
4513 44
            $encoding === 'ISO-8859-1'
4514
        ) {
4515 41
            return 'ISO-8859-1';
4516
        }
4517
4518
        if (
4519 11
            $encoding === '1' // only a fallback, for non "strict_types" usage ...
4520
            ||
4521 11
            $encoding === '0' // only a fallback, for non "strict_types" usage ...
4522
        ) {
4523
            return $fallback;
4524
        }
4525
4526 11
        if (isset($STATIC_NORMALIZE_ENCODING_CACHE[$encoding])) {
4527 8
            return $STATIC_NORMALIZE_ENCODING_CACHE[$encoding];
4528
        }
4529
4530 5
        if (self::$ENCODINGS === null) {
4531 1
            self::$ENCODINGS = self::getData('encodings');
4532
        }
4533
4534 5
        if (\in_array($encoding, self::$ENCODINGS, true)) {
4535 3
            $STATIC_NORMALIZE_ENCODING_CACHE[$encoding] = $encoding;
4536
4537 3
            return $encoding;
4538
        }
4539
4540 4
        $encoding_original = $encoding;
4541 4
        $encoding = \strtoupper($encoding);
4542 4
        $encoding_upper_helper = (string) \preg_replace('/[^a-zA-Z0-9]/u', '', $encoding);
4543
4544
        $equivalences = [
4545 4
            'ISO8859'     => 'ISO-8859-1',
4546
            'ISO88591'    => 'ISO-8859-1',
4547
            'ISO'         => 'ISO-8859-1',
4548
            'LATIN'       => 'ISO-8859-1',
4549
            'LATIN1'      => 'ISO-8859-1', // Western European
4550
            'ISO88592'    => 'ISO-8859-2',
4551
            'LATIN2'      => 'ISO-8859-2', // Central European
4552
            'ISO88593'    => 'ISO-8859-3',
4553
            'LATIN3'      => 'ISO-8859-3', // Southern European
4554
            'ISO88594'    => 'ISO-8859-4',
4555
            'LATIN4'      => 'ISO-8859-4', // Northern European
4556
            'ISO88595'    => 'ISO-8859-5',
4557
            'ISO88596'    => 'ISO-8859-6', // Greek
4558
            'ISO88597'    => 'ISO-8859-7',
4559
            'ISO88598'    => 'ISO-8859-8', // Hebrew
4560
            'ISO88599'    => 'ISO-8859-9',
4561
            'LATIN5'      => 'ISO-8859-9', // Turkish
4562
            'ISO885911'   => 'ISO-8859-11',
4563
            'TIS620'      => 'ISO-8859-11', // Thai
4564
            'ISO885910'   => 'ISO-8859-10',
4565
            'LATIN6'      => 'ISO-8859-10', // Nordic
4566
            'ISO885913'   => 'ISO-8859-13',
4567
            'LATIN7'      => 'ISO-8859-13', // Baltic
4568
            'ISO885914'   => 'ISO-8859-14',
4569
            'LATIN8'      => 'ISO-8859-14', // Celtic
4570
            'ISO885915'   => 'ISO-8859-15',
4571
            'LATIN9'      => 'ISO-8859-15', // Western European (with some extra chars e.g. €)
4572
            'ISO885916'   => 'ISO-8859-16',
4573
            'LATIN10'     => 'ISO-8859-16', // Southeast European
4574
            'CP1250'      => 'WINDOWS-1250',
4575
            'WIN1250'     => 'WINDOWS-1250',
4576
            'WINDOWS1250' => 'WINDOWS-1250',
4577
            'CP1251'      => 'WINDOWS-1251',
4578
            'WIN1251'     => 'WINDOWS-1251',
4579
            'WINDOWS1251' => 'WINDOWS-1251',
4580
            'CP1252'      => 'WINDOWS-1252',
4581
            'WIN1252'     => 'WINDOWS-1252',
4582
            'WINDOWS1252' => 'WINDOWS-1252',
4583
            'CP1253'      => 'WINDOWS-1253',
4584
            'WIN1253'     => 'WINDOWS-1253',
4585
            'WINDOWS1253' => 'WINDOWS-1253',
4586
            'CP1254'      => 'WINDOWS-1254',
4587
            'WIN1254'     => 'WINDOWS-1254',
4588
            'WINDOWS1254' => 'WINDOWS-1254',
4589
            'CP1255'      => 'WINDOWS-1255',
4590
            'WIN1255'     => 'WINDOWS-1255',
4591
            'WINDOWS1255' => 'WINDOWS-1255',
4592
            'CP1256'      => 'WINDOWS-1256',
4593
            'WIN1256'     => 'WINDOWS-1256',
4594
            'WINDOWS1256' => 'WINDOWS-1256',
4595
            'CP1257'      => 'WINDOWS-1257',
4596
            'WIN1257'     => 'WINDOWS-1257',
4597
            'WINDOWS1257' => 'WINDOWS-1257',
4598
            'CP1258'      => 'WINDOWS-1258',
4599
            'WIN1258'     => 'WINDOWS-1258',
4600
            'WINDOWS1258' => 'WINDOWS-1258',
4601
            'UTF16'       => 'UTF-16',
4602
            'UTF32'       => 'UTF-32',
4603
            'UTF8'        => 'UTF-8',
4604
            'UTF'         => 'UTF-8',
4605
            'UTF7'        => 'UTF-7',
4606
            '8BIT'        => 'CP850',
4607
            'BINARY'      => 'CP850',
4608
        ];
4609
4610 4
        if (!empty($equivalences[$encoding_upper_helper])) {
4611 3
            $encoding = $equivalences[$encoding_upper_helper];
4612
        }
4613
4614 4
        $STATIC_NORMALIZE_ENCODING_CACHE[$encoding_original] = $encoding;
4615
4616 4
        return $encoding;
4617
    }
4618
4619
    /**
4620
     * Standardize line ending to unix-like.
4621
     *
4622
     * @param string          $str      <p>The input string.</p>
4623
     * @param string|string[] $replacer <p>The replacer char e.g. "\n" (Linux) or "\r\n" (Windows). You can also use \PHP_EOL
4624
     *                                  here.</p>
4625
     *
4626
     * @psalm-pure
4627
     *
4628
     * @return string
4629
     *                <p>A string with normalized line ending.</p>
4630
     */
4631 4
    public static function normalize_line_ending(string $str, $replacer = "\n"): string
4632
    {
4633 4
        return \str_replace(["\r\n", "\r", "\n"], $replacer, $str);
4634
    }
4635
4636
    /**
4637
     * Normalize some MS Word special characters.
4638
     *
4639
     * EXAMPLE: <code>UTF8::normalize_msword('„Abcdef…”'); // '"Abcdef..."'</code>
4640
     *
4641
     * @param string $str <p>The string to be normalized.</p>
4642
     *
4643
     * @psalm-pure
4644
     *
4645
     * @return string
4646
     *                <p>A string with normalized characters for commonly used chars in Word documents.</p>
4647
     */
4648 10
    public static function normalize_msword(string $str): string
4649
    {
4650 10
        return ASCII::normalize_msword($str);
4651
    }
4652
4653
    /**
4654
     * Normalize the whitespace.
4655
     *
4656
     * EXAMPLE: <code>UTF8::normalize_whitespace("abc-\xc2\xa0-öäü-\xe2\x80\xaf-\xE2\x80\xAC", true); // "abc-\xc2\xa0-öäü- -"</code>
4657
     *
4658
     * @param string $str                          <p>The string to be normalized.</p>
4659
     * @param bool   $keep_non_breaking_space      [optional] <p>Set to true, to keep non-breaking-spaces.</p>
4660
     * @param bool   $keep_bidi_unicode_controls   [optional] <p>Set to true, to keep non-printable (for the web)
4661
     *                                             bidirectional text chars.</p>
4662
     * @param bool   $normalize_control_characters [optional] <p>Set to true, to convert e.g. LINE-, PARAGRAPH-SEPARATOR with "\n" and LINE TABULATION with "\t".</p>
4663
     *
4664
     * @psalm-pure
4665
     *
4666
     * @return string
4667
     *                <p>A string with normalized whitespace.</p>
4668
     */
4669 61
    public static function normalize_whitespace(
4670
        string $str,
4671
        bool $keep_non_breaking_space = false,
4672
        bool $keep_bidi_unicode_controls = false,
4673
        bool $normalize_control_characters = false
4674
    ): string {
4675 61
        return ASCII::normalize_whitespace(
4676 61
            $str,
4677 61
            $keep_non_breaking_space,
4678 61
            $keep_bidi_unicode_controls,
4679 61
            $normalize_control_characters
4680
        );
4681
    }
4682
4683
    /**
4684
     * Calculates Unicode code point of the given UTF-8 encoded character.
4685
     *
4686
     * INFO: opposite to UTF8::chr()
4687
     *
4688
     * EXAMPLE: <code>UTF8::ord('☃'); // 0x2603</code>
4689
     *
4690
     * @param string $chr      <p>The character of which to calculate code point.<p/>
4691
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
4692
     *
4693
     * @psalm-pure
4694
     *
4695
     * @return int
4696
     *             <p>Unicode code point of the given character,<br>
4697
     *             0 on invalid UTF-8 byte sequence</p>
4698
     */
4699 27
    public static function ord($chr, string $encoding = 'UTF-8'): int
4700
    {
4701
        /**
4702
         * @psalm-suppress ImpureStaticVariable
4703
         *
4704
         * @var array<string,int>
4705
         */
4706 27
        static $CHAR_CACHE = [];
4707
4708
        // init
4709 27
        $chr = (string) $chr;
4710
4711 27
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
4712 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4713
        }
4714
4715 27
        $cache_key = $chr . '_' . $encoding;
4716 27
        if (isset($CHAR_CACHE[$cache_key])) {
4717 27
            return $CHAR_CACHE[$cache_key];
4718
        }
4719
4720
        // check again, if it's still not UTF-8
4721 11
        if ($encoding !== 'UTF-8') {
4722 3
            $chr = self::encode($encoding, $chr);
4723
        }
4724
4725 11
        if (self::$ORD === null) {
4726 1
            self::$ORD = self::getData('ord');
4727
        }
4728
4729 11
        if (isset(self::$ORD[$chr])) {
4730 11
            return $CHAR_CACHE[$cache_key] = self::$ORD[$chr];
4731
        }
4732
4733
        //
4734
        // fallback via "IntlChar"
4735
        //
4736
4737 6
        if (self::$SUPPORT['intlChar'] === true) {
4738 5
            $code = \IntlChar::ord($chr);
4739 5
            if ($code) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $code of type integer|null is loosely compared to true; this is ambiguous if the integer can be 0. You might want to explicitly use !== null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
4740 5
                return $CHAR_CACHE[$cache_key] = $code;
4741
            }
4742
        }
4743
4744
        //
4745
        // fallback via vanilla php
4746
        //
4747
4748 1
        $chr = \unpack('C*', (string) \substr($chr, 0, 4));
4749
        /** @noinspection PhpSillyAssignmentInspection - hack for phpstan */
4750
        /** @var int[] $chr - "unpack": only false if the format string contains errors */
4751 1
        $chr = $chr;
4752 1
        $code = $chr ? $chr[1] : 0;
4753
4754 1
        if ($code >= 0xF0 && isset($chr[4])) {
4755
            return $CHAR_CACHE[$cache_key] = ((($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80);
4756
        }
4757
4758 1
        if ($code >= 0xE0 && isset($chr[3])) {
4759 1
            return $CHAR_CACHE[$cache_key] = ((($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80);
4760
        }
4761
4762 1
        if ($code >= 0xC0 && isset($chr[2])) {
4763 1
            return $CHAR_CACHE[$cache_key] = ((($code - 0xC0) << 6) + $chr[2] - 0x80);
4764
        }
4765
4766
        return $CHAR_CACHE[$cache_key] = $code;
4767
    }
4768
4769
    /**
4770
     * Parses the string into an array (into the the second parameter).
4771
     *
4772
     * WARNING: Unlike "parse_str()", this method does not (re-)place variables in the current scope,
4773
     *          if the second parameter is not set!
4774
     *
4775
     * EXAMPLE: <code>
4776
     * UTF8::parse_str('Iñtërnâtiônéàlizætiøn=測試&arr[]=foo+測試&arr[]=ການທົດສອບ', $array);
4777
     * echo $array['Iñtërnâtiônéàlizætiøn']; // '測試'
4778
     * </code>
4779
     *
4780
     * @see http://php.net/manual/en/function.parse-str.php
4781
     *
4782
     * @param string $str        <p>The input string.</p>
4783
     * @param array  $result     <p>The result will be returned into this reference parameter.</p>
4784
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4785
     *
4786
     * @psalm-pure
4787
     *
4788
     * @return bool
4789
     *              <p>Will return <strong>false</strong> if php can't parse the string and we haven't any $result.</p>
4790
     */
4791 2
    public static function parse_str(string $str, &$result, bool $clean_utf8 = false): bool
4792
    {
4793 2
        if ($clean_utf8) {
4794 2
            $str = self::clean($str);
4795
        }
4796
4797 2
        if (self::$SUPPORT['mbstring'] === true) {
4798 2
            $return = \mb_parse_str($str, $result);
4799
4800 2
            return $return !== false && $result !== [];
4801
        }
4802
4803
        /**
4804
         * @psalm-suppress ImpureFunctionCall - we use the second parameter, so we don't change variables by magic
4805
         */
4806
        \parse_str($str, $result);
4807
4808
        return $result !== [];
4809
    }
4810
4811
    /**
4812
     * Checks if \u modifier is available that enables Unicode support in PCRE.
4813
     *
4814
     * @psalm-pure
4815
     *
4816
     * @return bool
4817
     *              <p>
4818
     *              <strong>true</strong> if support is available,<br>
4819
     *              <strong>false</strong> otherwise
4820
     *              </p>
4821
     */
4822
    public static function pcre_utf8_support(): bool
4823
    {
4824
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
4825
        return (bool) @\preg_match('//u', '');
4826
    }
4827
4828
    /**
4829
     * Create an array containing a range of UTF-8 characters.
4830
     *
4831
     * EXAMPLE: <code>UTF8::range('κ', 'ζ'); // array('κ', 'ι', 'θ', 'η', 'ζ',)</code>
4832
     *
4833
     * @param int|string $var1      <p>Numeric or hexadecimal code points, or a UTF-8 character to start from.</p>
4834
     * @param int|string $var2      <p>Numeric or hexadecimal code points, or a UTF-8 character to end at.</p>
4835
     * @param bool       $use_ctype <p>use ctype to detect numeric and hexadecimal, otherwise we will use a simple
4836
     *                              "is_numeric"</p>
4837
     * @param string     $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
4838
     * @param float|int  $step      [optional] <p>
4839
     *                              If a step value is given, it will be used as the
4840
     *                              increment between elements in the sequence. step
4841
     *                              should be given as a positive number. If not specified,
4842
     *                              step will default to 1.
4843
     *                              </p>
4844
     *
4845
     * @psalm-pure
4846
     *
4847
     * @return string[]
4848
     */
4849 2
    public static function range(
4850
        $var1,
4851
        $var2,
4852
        bool $use_ctype = true,
4853
        string $encoding = 'UTF-8',
4854
        $step = 1
4855
    ): array {
4856 2
        if (!$var1 || !$var2) {
4857 2
            return [];
4858
        }
4859
4860 2
        if ($step !== 1) {
4861
            /**
4862
             * @psalm-suppress RedundantConditionGivenDocblockType
4863
             * @psalm-suppress DocblockTypeContradiction
4864
             */
4865 1
            if (!\is_numeric($step)) {
0 ignored issues
show
introduced by
The condition is_numeric($step) is always true.
Loading history...
4866
                throw new \InvalidArgumentException('$step need to be a number, type given: ' . \gettype($step));
4867
            }
4868
4869
            /**
4870
             * @psalm-suppress RedundantConditionGivenDocblockType - false-positive from psalm?
4871
             */
4872 1
            if ($step <= 0) {
4873
                throw new \InvalidArgumentException('$step need to be a positive number, given: ' . $step);
4874
            }
4875
        }
4876
4877 2
        if ($use_ctype && self::$SUPPORT['ctype'] === false) {
4878
            throw new \RuntimeException('ext-ctype: is not installed');
4879
        }
4880
4881 2
        $is_digit = false;
4882 2
        $is_xdigit = false;
4883
4884 2
        if ($use_ctype && \ctype_digit((string) $var1) && \ctype_digit((string) $var2)) {
4885 2
            $is_digit = true;
4886 2
            $start = (int) $var1;
4887 2
        } elseif ($use_ctype && \ctype_xdigit($var1) && \ctype_xdigit($var2)) {
4888
            $is_xdigit = true;
4889
            $start = (int) self::hex_to_int((string) $var1);
4890 2
        } elseif (!$use_ctype && \is_numeric($var1)) {
4891 1
            $start = (int) $var1;
4892
        } else {
4893 2
            $start = self::ord((string) $var1);
4894
        }
4895
4896 2
        if (!$start) {
4897
            return [];
4898
        }
4899
4900 2
        if ($is_digit) {
4901 2
            $end = (int) $var2;
4902 2
        } elseif ($is_xdigit) {
4903
            $end = (int) self::hex_to_int((string) $var2);
4904 2
        } elseif (!$use_ctype && \is_numeric($var2)) {
4905 1
            $end = (int) $var2;
4906
        } else {
4907 2
            $end = self::ord((string) $var2);
4908
        }
4909
4910 2
        if (!$end) {
4911
            return [];
4912
        }
4913
4914 2
        $array = [];
4915 2
        foreach (\range($start, $end, $step) as $i) {
4916 2
            $array[] = (string) self::chr((int) $i, $encoding);
4917
        }
4918
4919 2
        return $array;
4920
    }
4921
4922
    /**
4923
     * Multi decode HTML entity + fix urlencoded-win1252-chars.
4924
     *
4925
     * EXAMPLE: <code>UTF8::rawurldecode('tes%20öäü%20\u00edtest+test'); // 'tes öäü ítest+test'</code>
4926
     *
4927
     * e.g:
4928
     * 'test+test'                     => 'test+test'
4929
     * 'D&#252;sseldorf'               => 'Düsseldorf'
4930
     * 'D%FCsseldorf'                  => 'Düsseldorf'
4931
     * 'D&#xFC;sseldorf'               => 'Düsseldorf'
4932
     * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
4933
     * 'Düsseldorf'                   => 'Düsseldorf'
4934
     * 'D%C3%BCsseldorf'               => 'Düsseldorf'
4935
     * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
4936
     * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
4937
     *
4938
     * @param string $str          <p>The input string.</p>
4939
     * @param bool   $multi_decode <p>Decode as often as possible.</p>
4940
     *
4941
     * @psalm-pure
4942
     *
4943
     * @return string
4944
     *                <p>The decoded URL, as a string.</p>
4945
     */
4946 6
    public static function rawurldecode(string $str, bool $multi_decode = true): string
4947
    {
4948 6
        if ($str === '') {
4949 4
            return '';
4950
        }
4951
4952 6
        $str = self::urldecode_unicode_helper($str);
4953
4954 6
        if ($multi_decode) {
4955
            do {
4956 5
                $str_compare = $str;
4957
4958
                /**
4959
                 * @psalm-suppress PossiblyInvalidArgument
4960
                 */
4961 5
                $str = \rawurldecode(
4962 5
                    self::html_entity_decode(
4963 5
                        self::to_utf8($str),
4964 5
                        \ENT_QUOTES | \ENT_HTML5
4965
                    )
4966
                );
4967 5
            } while ($str_compare !== $str);
4968
        } else {
4969
            /**
4970
             * @psalm-suppress PossiblyInvalidArgument
4971
             */
4972 1
            $str = \rawurldecode(
4973 1
                self::html_entity_decode(
4974 1
                    self::to_utf8($str),
4975 1
                    \ENT_QUOTES | \ENT_HTML5
4976
                )
4977
            );
4978
        }
4979
4980 6
        return self::fix_simple_utf8($str);
4981
    }
4982
4983
    /**
4984
     * Replaces all occurrences of $pattern in $str by $replacement.
4985
     *
4986
     * @param string $str         <p>The input string.</p>
4987
     * @param string $pattern     <p>The regular expression pattern.</p>
4988
     * @param string $replacement <p>The string to replace with.</p>
4989
     * @param string $options     [optional] <p>Matching conditions to be used.</p>
4990
     * @param string $delimiter   [optional] <p>Delimiter the the regex. Default: '/'</p>
4991
     *
4992
     * @psalm-pure
4993
     *
4994
     * @return string
4995
     */
4996 18
    public static function regex_replace(
4997
        string $str,
4998
        string $pattern,
4999
        string $replacement,
5000
        string $options = '',
5001
        string $delimiter = '/'
5002
    ): string {
5003 18
        if ($options === 'msr') {
5004 9
            $options = 'ms';
5005
        }
5006
5007
        // fallback
5008 18
        if (!$delimiter) {
5009
            $delimiter = '/';
5010
        }
5011
5012 18
        return (string) \preg_replace(
5013 18
            $delimiter . $pattern . $delimiter . 'u' . $options,
5014 18
            $replacement,
5015 18
            $str
5016
        );
5017
    }
5018
5019
    /**
5020
     * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
5021
     *
5022
     * EXAMPLE: <code>UTF8::remove_bom("\xEF\xBB\xBFΜπορώ να"); // 'Μπορώ να'</code>
5023
     *
5024
     * @param string $str <p>The input string.</p>
5025
     *
5026
     * @psalm-pure
5027
     *
5028
     * @return string
5029
     *                <p>A string without UTF-BOM.</p>
5030
     */
5031 54
    public static function remove_bom(string $str): string
5032
    {
5033 54
        if ($str === '') {
5034 9
            return '';
5035
        }
5036
5037 54
        $str_length = \strlen($str);
5038 54
        foreach (self::$BOM as $bom_string => $bom_byte_length) {
5039 54
            if (\strncmp($str, $bom_string, $bom_byte_length) === 0) {
5040
                /** @var false|string $str_tmp - needed for PhpStan (stubs error) */
5041 9
                $str_tmp = \substr($str, $bom_byte_length, $str_length);
5042 9
                if ($str_tmp === false) {
5043
                    return '';
5044
                }
5045
5046 9
                $str_length -= $bom_byte_length;
5047
5048 54
                $str = (string) $str_tmp;
5049
            }
5050
        }
5051
5052 54
        return $str;
5053
    }
5054
5055
    /**
5056
     * Removes duplicate occurrences of a string in another string.
5057
     *
5058
     * EXAMPLE: <code>UTF8::remove_duplicates('öäü-κόσμεκόσμε-äöü', 'κόσμε'); // 'öäü-κόσμε-äöü'</code>
5059
     *
5060
     * @param string          $str  <p>The base string.</p>
5061
     * @param string|string[] $what <p>String to search for in the base string.</p>
5062
     *
5063
     * @psalm-pure
5064
     *
5065
     * @return string
5066
     *                <p>A string with removed duplicates.</p>
5067
     */
5068 2
    public static function remove_duplicates(string $str, $what = ' '): string
5069
    {
5070 2
        if (\is_string($what)) {
5071 2
            $what = [$what];
5072
        }
5073
5074
        /**
5075
         * @psalm-suppress RedundantConditionGivenDocblockType
5076
         */
5077 2
        if (\is_array($what)) {
0 ignored issues
show
introduced by
The condition is_array($what) is always true.
Loading history...
5078 2
            foreach ($what as $item) {
5079 2
                $str = (string) \preg_replace('/(' . \preg_quote($item, '/') . ')+/u', $item, $str);
5080
            }
5081
        }
5082
5083 2
        return $str;
5084
    }
5085
5086
    /**
5087
     * Remove html via "strip_tags()" from the string.
5088
     *
5089
     * @param string $str            <p>The input string.</p>
5090
     * @param string $allowable_tags [optional] <p>You can use the optional second parameter to specify tags which
5091
     *                               should not be stripped. Default: null
5092
     *                               </p>
5093
     *
5094
     * @psalm-pure
5095
     *
5096
     * @return string
5097
     *                <p>A string with without html tags.</p>
5098
     */
5099 6
    public static function remove_html(string $str, string $allowable_tags = ''): string
5100
    {
5101 6
        return \strip_tags($str, $allowable_tags);
5102
    }
5103
5104
    /**
5105
     * Remove all breaks [<br> | \r\n | \r | \n | ...] from the string.
5106
     *
5107
     * @param string $str         <p>The input string.</p>
5108
     * @param string $replacement [optional] <p>Default is a empty string.</p>
5109
     *
5110
     * @psalm-pure
5111
     *
5112
     * @return string
5113
     *                <p>A string without breaks.</p>
5114
     */
5115 6
    public static function remove_html_breaks(string $str, string $replacement = ''): string
5116
    {
5117 6
        return (string) \preg_replace("#/\r\n|\r|\n|<br.*/?>#isU", $replacement, $str);
5118
    }
5119
5120
    /**
5121
     * Remove invisible characters from a string.
5122
     *
5123
     * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
5124
     *
5125
     * EXAMPLE: <code>UTF8::remove_invisible_characters("κόσ\0με"); // 'κόσμε'</code>
5126
     *
5127
     * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
5128
     *
5129
     * @param string $str                           <p>The input string.</p>
5130
     * @param bool   $url_encoded                   [optional] <p>
5131
     *                                              Try to remove url encoded control character.
5132
     *                                              WARNING: maybe contains false-positives e.g. aa%0Baa -> aaaa.
5133
     *                                              <br>
5134
     *                                              Default: false
5135
     *                                              </p>
5136
     * @param string $replacement                   [optional] <p>The replacement character.</p>
5137
     * @param bool   $keep_basic_control_characters [optional] <p>Keep control characters like [LRM] or [LSEP].</p>
5138
     *
5139
     * @psalm-pure
5140
     *
5141
     * @return string
5142
     *                <p>A string without invisible chars.</p>
5143
     */
5144 92
    public static function remove_invisible_characters(
5145
        string $str,
5146
        bool $url_encoded = false,
5147
        string $replacement = '',
5148
        bool $keep_basic_control_characters = true
5149
    ): string {
5150 92
        return ASCII::remove_invisible_characters(
5151 92
            $str,
5152 92
            $url_encoded,
5153 92
            $replacement,
5154 92
            $keep_basic_control_characters
5155
        );
5156
    }
5157
5158
    /**
5159
     * Returns a new string with the prefix $substring removed, if present.
5160
     *
5161
     * @param string $str       <p>The input string.</p>
5162
     * @param string $substring <p>The prefix to remove.</p>
5163
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
5164
     *
5165
     * @psalm-pure
5166
     *
5167
     * @return string
5168
     *                <p>A string without the prefix $substring.</p>
5169
     */
5170 12
    public static function remove_left(
5171
        string $str,
5172
        string $substring,
5173
        string $encoding = 'UTF-8'
5174
    ): string {
5175
        if (
5176 12
            $substring
5177
            &&
5178 12
            \strpos($str, $substring) === 0
5179
        ) {
5180 6
            if ($encoding === 'UTF-8') {
5181 4
                return (string) \mb_substr(
5182 4
                    $str,
5183 4
                    (int) \mb_strlen($substring)
5184
                );
5185
            }
5186
5187 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
5188
5189 2
            return (string) self::substr(
5190 2
                $str,
5191 2
                (int) self::strlen($substring, $encoding),
5192 2
                null,
5193 2
                $encoding
5194
            );
5195
        }
5196
5197 6
        return $str;
5198
    }
5199
5200
    /**
5201
     * Returns a new string with the suffix $substring removed, if present.
5202
     *
5203
     * @param string $str
5204
     * @param string $substring <p>The suffix to remove.</p>
5205
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
5206
     *
5207
     * @psalm-pure
5208
     *
5209
     * @return string
5210
     *                <p>A string having a $str without the suffix $substring.</p>
5211
     */
5212 12
    public static function remove_right(
5213
        string $str,
5214
        string $substring,
5215
        string $encoding = 'UTF-8'
5216
    ): string {
5217 12
        if ($substring && \substr($str, -\strlen($substring)) === $substring) {
5218 6
            if ($encoding === 'UTF-8') {
5219 4
                return (string) \mb_substr(
5220 4
                    $str,
5221 4
                    0,
5222 4
                    (int) \mb_strlen($str) - (int) \mb_strlen($substring)
5223
                );
5224
            }
5225
5226 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
5227
5228 2
            return (string) self::substr(
5229 2
                $str,
5230 2
                0,
5231 2
                (int) self::strlen($str, $encoding) - (int) self::strlen($substring, $encoding),
5232 2
                $encoding
5233
            );
5234
        }
5235
5236 6
        return $str;
5237
    }
5238
5239
    /**
5240
     * Replaces all occurrences of $search in $str by $replacement.
5241
     *
5242
     * @param string $str            <p>The input string.</p>
5243
     * @param string $search         <p>The needle to search for.</p>
5244
     * @param string $replacement    <p>The string to replace with.</p>
5245
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5246
     *
5247
     * @psalm-pure
5248
     *
5249
     * @return string
5250
     *                <p>A string with replaced parts.</p>
5251
     */
5252 29
    public static function replace(
5253
        string $str,
5254
        string $search,
5255
        string $replacement,
5256
        bool $case_sensitive = true
5257
    ): string {
5258 29
        if ($case_sensitive) {
5259 22
            return \str_replace($search, $replacement, $str);
5260
        }
5261
5262 7
        return self::str_ireplace($search, $replacement, $str);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::str_ireplac...ch, $replacement, $str) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
5263
    }
5264
5265
    /**
5266
     * Replaces all occurrences of $search in $str by $replacement.
5267
     *
5268
     * @param string       $str            <p>The input string.</p>
5269
     * @param array        $search         <p>The elements to search for.</p>
5270
     * @param array|string $replacement    <p>The string to replace with.</p>
5271
     * @param bool         $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5272
     *
5273
     * @psalm-pure
5274
     *
5275
     * @return string
5276
     *                <p>A string with replaced parts.</p>
5277
     */
5278 30
    public static function replace_all(
5279
        string $str,
5280
        array $search,
5281
        $replacement,
5282
        bool $case_sensitive = true
5283
    ): string {
5284 30
        if ($case_sensitive) {
5285 23
            return \str_replace($search, $replacement, $str);
5286
        }
5287
5288 7
        return self::str_ireplace($search, $replacement, $str);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::str_ireplac...ch, $replacement, $str) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
5289
    }
5290
5291
    /**
5292
     * Replace the diamond question mark (�) and invalid-UTF8 chars with the replacement.
5293
     *
5294
     * EXAMPLE: <code>UTF8::replace_diamond_question_mark('中文空白�', ''); // '中文空白'</code>
5295
     *
5296
     * @param string $str                        <p>The input string</p>
5297
     * @param string $replacement_char           <p>The replacement character.</p>
5298
     * @param bool   $process_invalid_utf8_chars <p>Convert invalid UTF-8 chars </p>
5299
     *
5300
     * @psalm-pure
5301
     *
5302
     * @return string
5303
     *                <p>A string without diamond question marks (�).</p>
5304
     */
5305 35
    public static function replace_diamond_question_mark(
5306
        string $str,
5307
        string $replacement_char = '',
5308
        bool $process_invalid_utf8_chars = true
5309
    ): string {
5310 35
        if ($str === '') {
5311 9
            return '';
5312
        }
5313
5314 35
        if ($process_invalid_utf8_chars) {
5315 35
            if ($replacement_char === '') {
5316 35
                $replacement_char_helper = 'none';
5317
            } else {
5318 2
                $replacement_char_helper = \ord($replacement_char);
5319
            }
5320
5321 35
            if (self::$SUPPORT['mbstring'] === false) {
5322
                // if there is no native support for "mbstring",
5323
                // then we need to clean the string before ...
5324
                $str = self::clean($str);
5325
            }
5326
5327
            /**
5328
             * @psalm-suppress ImpureFunctionCall - we will reset the value in the next step
5329
             */
5330 35
            $save = \mb_substitute_character();
5331
            /** @noinspection PhpUsageOfSilenceOperatorInspection - ignore "Unknown character" warnings, it's working anyway */
5332 35
            @\mb_substitute_character($replacement_char_helper);
0 ignored issues
show
Security Best Practice introduced by
It seems like you do not handle an error condition for mb_substitute_character(). This can introduce security issues, and is generally not recommended. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-unhandled  annotation

5332
            /** @scrutinizer ignore-unhandled */ @\mb_substitute_character($replacement_char_helper);

If you suppress an error, we recommend checking for the error condition explicitly:

// For example instead of
@mkdir($dir);

// Better use
if (@mkdir($dir) === false) {
    throw new \RuntimeException('The directory '.$dir.' could not be created.');
}
Loading history...
5333
            // the polyfill maybe return false, so cast to string
5334 35
            $str = (string) \mb_convert_encoding($str, 'UTF-8', 'UTF-8');
5335 35
            \mb_substitute_character($save);
0 ignored issues
show
Bug introduced by
It seems like $save can also be of type true; however, parameter $substitute_character of mb_substitute_character() does only seem to accept integer|null|string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

5335
            \mb_substitute_character(/** @scrutinizer ignore-type */ $save);
Loading history...
5336
        }
5337
5338 35
        return \str_replace(
5339
            [
5340 35
                "\xEF\xBF\xBD",
5341
                '�',
5342
            ],
5343
            [
5344 35
                $replacement_char,
5345 35
                $replacement_char,
5346
            ],
5347 35
            $str
5348
        );
5349
    }
5350
5351
    /**
5352
     * Strip whitespace or other characters from the end of a UTF-8 string.
5353
     *
5354
     * EXAMPLE: <code>UTF8::rtrim('-ABC-中文空白-  '); // '-ABC-中文空白-'</code>
5355
     *
5356
     * @param string      $str   <p>The string to be trimmed.</p>
5357
     * @param string|null $chars <p>Optional characters to be stripped.</p>
5358
     *
5359
     * @psalm-pure
5360
     *
5361
     * @return string
5362
     *                <p>A string with unwanted characters stripped from the right.</p>
5363
     */
5364 21
    public static function rtrim(string $str = '', string $chars = null): string
5365
    {
5366 21
        if ($str === '') {
5367 3
            return '';
5368
        }
5369
5370 20
        if (self::$SUPPORT['mbstring'] === true) {
5371 20
            if ($chars !== null) {
5372
                /** @noinspection PregQuoteUsageInspection */
5373 9
                $chars = \preg_quote($chars);
5374 9
                $pattern = "[${chars}]+$";
5375
            } else {
5376 14
                $pattern = '[\\s]+$';
5377
            }
5378
5379 20
            return (string) \mb_ereg_replace($pattern, '', $str);
5380
        }
5381
5382
        if ($chars !== null) {
5383
            $chars = \preg_quote($chars, '/');
5384
            $pattern = "[${chars}]+$";
5385
        } else {
5386
            $pattern = '[\\s]+$';
5387
        }
5388
5389
        return self::regex_replace($str, $pattern, '');
5390
    }
5391
5392
    /**
5393
     * WARNING: Print native UTF-8 support (libs) by default, e.g. for debugging.
5394
     *
5395
     * @param bool $useEcho
5396
     *
5397
     * @psalm-pure
5398
     *
5399
     * @return string|void
5400
     */
5401 2
    public static function showSupport(bool $useEcho = true)
5402
    {
5403
        // init
5404 2
        $html = '';
5405
5406 2
        $html .= '<pre>';
5407 2
        foreach (self::$SUPPORT as $key => &$value) {
5408 2
            $html .= $key . ' - ' . \print_r($value, true) . "\n<br>";
0 ignored issues
show
Bug introduced by
Are you sure print_r($value, true) of type string|true can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

5408
            $html .= $key . ' - ' . /** @scrutinizer ignore-type */ \print_r($value, true) . "\n<br>";
Loading history...
5409
        }
5410 2
        $html .= '</pre>';
5411
5412 2
        if ($useEcho) {
5413 1
            echo $html;
5414
        }
5415
5416 2
        return $html;
5417
    }
5418
5419
    /**
5420
     * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
5421
     *
5422
     * EXAMPLE: <code>UTF8::single_chr_html_encode('κ'); // '&#954;'</code>
5423
     *
5424
     * @param string $char             <p>The Unicode character to be encoded as numbered entity.</p>
5425
     * @param bool   $keep_ascii_chars <p>Set to <strong>true</strong> to keep ASCII chars.</>
5426
     * @param string $encoding         [optional] <p>Set the charset for e.g. "mb_" function</p>
5427
     *
5428
     * @psalm-pure
5429
     *
5430
     * @return string
5431
     *                <p>The HTML numbered entity for the given character.</p>
5432
     */
5433 2
    public static function single_chr_html_encode(
5434
        string $char,
5435
        bool $keep_ascii_chars = false,
5436
        string $encoding = 'UTF-8'
5437
    ): string {
5438 2
        if ($char === '') {
5439 2
            return '';
5440
        }
5441
5442
        if (
5443 2
            $keep_ascii_chars
5444
            &&
5445 2
            ASCII::is_ascii($char)
5446
        ) {
5447 2
            return $char;
5448
        }
5449
5450 2
        return '&#' . self::ord($char, $encoding) . ';';
5451
    }
5452
5453
    /**
5454
     * @param string $str
5455
     * @param int    $tab_length
5456
     *
5457
     * @psalm-pure
5458
     *
5459
     * @return string
5460
     */
5461 5
    public static function spaces_to_tabs(string $str, int $tab_length = 4): string
5462
    {
5463 5
        if ($tab_length === 4) {
5464 3
            $tab = '    ';
5465 2
        } elseif ($tab_length === 2) {
5466 1
            $tab = '  ';
5467
        } else {
5468 1
            $tab = \str_repeat(' ', $tab_length);
5469
        }
5470
5471 5
        return \str_replace($tab, "\t", $str);
5472
    }
5473
5474
    /**
5475
     * Returns a camelCase version of the string. Trims surrounding spaces,
5476
     * capitalizes letters following digits, spaces, dashes and underscores,
5477
     * and removes spaces, dashes, as well as underscores.
5478
     *
5479
     * @param string      $str                           <p>The input string.</p>
5480
     * @param string      $encoding                      [optional] <p>Default: 'UTF-8'</p>
5481
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
5482
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
5483
     *                                                   tr</p>
5484
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
5485
     *                                                   -> ß</p>
5486
     *
5487
     * @psalm-pure
5488
     *
5489
     * @return string
5490
     */
5491 32
    public static function str_camelize(
5492
        string $str,
5493
        string $encoding = 'UTF-8',
5494
        bool $clean_utf8 = false,
5495
        string $lang = null,
5496
        bool $try_to_keep_the_string_length = false
5497
    ): string {
5498 32
        if ($clean_utf8) {
5499
            $str = self::clean($str);
5500
        }
5501
5502 32
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
5503 26
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
5504
        }
5505
5506 32
        $str = self::lcfirst(
5507 32
            \trim($str),
5508 32
            $encoding,
5509 32
            false,
5510 32
            $lang,
5511 32
            $try_to_keep_the_string_length
5512
        );
5513 32
        $str = (string) \preg_replace('/^[-_]+/', '', $str);
5514
5515 32
        $use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
5516
5517 32
        $str = (string) \preg_replace_callback(
5518 32
            '/[-_\\s]+(.)?/u',
5519
            /**
5520
             * @param array $match
5521
             *
5522
             * @psalm-pure
5523
             *
5524
             * @return string
5525
             */
5526
            static function (array $match) use ($use_mb_functions, $encoding, $lang, $try_to_keep_the_string_length): string {
5527 27
                if (isset($match[1])) {
5528 27
                    if ($use_mb_functions) {
5529 27
                        if ($encoding === 'UTF-8') {
5530 27
                            return \mb_strtoupper($match[1]);
5531
                        }
5532
5533
                        return \mb_strtoupper($match[1], $encoding);
5534
                    }
5535
5536
                    return self::strtoupper($match[1], $encoding, false, $lang, $try_to_keep_the_string_length);
5537
                }
5538
5539 1
                return '';
5540 32
            },
5541 32
            $str
5542
        );
5543
5544 32
        return (string) \preg_replace_callback(
5545 32
            '/[\\p{N}]+(.)?/u',
5546
            /**
5547
             * @param array $match
5548
             *
5549
             * @psalm-pure
5550
             *
5551
             * @return string
5552
             */
5553
            static function (array $match) use ($use_mb_functions, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length): string {
5554 6
                if ($use_mb_functions) {
5555 6
                    if ($encoding === 'UTF-8') {
5556 6
                        return \mb_strtoupper($match[0]);
5557
                    }
5558
5559
                    return \mb_strtoupper($match[0], $encoding);
5560
                }
5561
5562
                return self::strtoupper($match[0], $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
5563 32
            },
5564 32
            $str
5565
        );
5566
    }
5567
5568
    /**
5569
     * Returns the string with the first letter of each word capitalized,
5570
     * except for when the word is a name which shouldn't be capitalized.
5571
     *
5572
     * @param string $str
5573
     *
5574
     * @psalm-pure
5575
     *
5576
     * @return string
5577
     *                <p>A string with $str capitalized.</p>
5578
     */
5579 1
    public static function str_capitalize_name(string $str): string
5580
    {
5581 1
        return self::str_capitalize_name_helper(
5582 1
            self::str_capitalize_name_helper(
5583 1
                self::collapse_whitespace($str),
5584 1
                ' '
5585
            ),
5586 1
            '-'
5587
        );
5588
    }
5589
5590
    /**
5591
     * Returns true if the string contains $needle, false otherwise. By default
5592
     * the comparison is case-sensitive, but can be made insensitive by setting
5593
     * $case_sensitive to false.
5594
     *
5595
     * @param string $haystack       <p>The input string.</p>
5596
     * @param string $needle         <p>Substring to look for.</p>
5597
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5598
     *
5599
     * @psalm-pure
5600
     *
5601
     * @return bool
5602
     *              <p>Whether or not $haystack contains $needle.</p>
5603
     */
5604 21
    public static function str_contains(
5605
        string $haystack,
5606
        string $needle,
5607
        bool $case_sensitive = true
5608
    ): bool {
5609 21
        if ($case_sensitive) {
5610 11
            if (\PHP_VERSION_ID >= 80000) {
5611
                /** @phpstan-ignore-next-line - only for PHP8 */
5612
                return \str_contains($haystack, $needle);
5613
            }
5614
5615 11
            return \strpos($haystack, $needle) !== false;
5616
        }
5617
5618 10
        return \mb_stripos($haystack, $needle) !== false;
5619
    }
5620
5621
    /**
5622
     * Returns true if the string contains all $needles, false otherwise. By
5623
     * default the comparison is case-sensitive, but can be made insensitive by
5624
     * setting $case_sensitive to false.
5625
     *
5626
     * @param string $haystack       <p>The input string.</p>
5627
     * @param array  $needles        <p>SubStrings to look for.</p>
5628
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5629
     *
5630
     * @psalm-pure
5631
     *
5632
     * @return bool
5633
     *              <p>Whether or not $haystack contains $needle.</p>
5634
     */
5635 45
    public static function str_contains_all(
5636
        string $haystack,
5637
        array $needles,
5638
        bool $case_sensitive = true
5639
    ): bool {
5640 45
        if ($haystack === '' || $needles === []) {
5641 1
            return false;
5642
        }
5643
5644 44
        foreach ($needles as &$needle) {
5645 44
            if ($case_sensitive) {
5646 24
                if (!$needle || \strpos($haystack, $needle) === false) {
5647 12
                    return false;
5648
                }
5649
            }
5650
5651 33
            if (!$needle || \mb_stripos($haystack, $needle) === false) {
5652 33
                return false;
5653
            }
5654
        }
5655
5656 24
        return true;
5657
    }
5658
5659
    /**
5660
     * Returns true if the string contains any $needles, false otherwise. By
5661
     * default the comparison is case-sensitive, but can be made insensitive by
5662
     * setting $case_sensitive to false.
5663
     *
5664
     * @param string $haystack       <p>The input string.</p>
5665
     * @param array  $needles        <p>SubStrings to look for.</p>
5666
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5667
     *
5668
     * @psalm-pure
5669
     *
5670
     * @return bool
5671
     *              <p>Whether or not $str contains $needle.</p>
5672
     */
5673 46
    public static function str_contains_any(
5674
        string $haystack,
5675
        array $needles,
5676
        bool $case_sensitive = true
5677
    ): bool {
5678 46
        if ($haystack === '' || $needles === []) {
5679 1
            return false;
5680
        }
5681
5682 45
        foreach ($needles as &$needle) {
5683 45
            if (!$needle) {
5684
                continue;
5685
            }
5686
5687 45
            if ($case_sensitive) {
5688 25
                if (\strpos($haystack, $needle) !== false) {
5689 14
                    return true;
5690
                }
5691
5692 13
                continue;
5693
            }
5694
5695 20
            if (\mb_stripos($haystack, $needle) !== false) {
5696 20
                return true;
5697
            }
5698
        }
5699
5700 19
        return false;
5701
    }
5702
5703
    /**
5704
     * Returns a lowercase and trimmed string separated by dashes. Dashes are
5705
     * inserted before uppercase characters (with the exception of the first
5706
     * character of the string), and in place of spaces as well as underscores.
5707
     *
5708
     * @param string $str      <p>The input string.</p>
5709
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5710
     *
5711
     * @psalm-pure
5712
     *
5713
     * @return string
5714
     */
5715 19
    public static function str_dasherize(string $str, string $encoding = 'UTF-8'): string
5716
    {
5717 19
        return self::str_delimit($str, '-', $encoding);
5718
    }
5719
5720
    /**
5721
     * Returns a lowercase and trimmed string separated by the given delimiter.
5722
     * Delimiters are inserted before uppercase characters (with the exception
5723
     * of the first character of the string), and in place of spaces, dashes,
5724
     * and underscores. Alpha delimiters are not converted to lowercase.
5725
     *
5726
     * @param string      $str                           <p>The input string.</p>
5727
     * @param string      $delimiter                     <p>Sequence used to separate parts of the string.</p>
5728
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
5729
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
5730
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
5731
     *                                                   tr</p>
5732
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ ->
5733
     *                                                   ß</p>
5734
     *
5735
     * @psalm-pure
5736
     *
5737
     * @return string
5738
     */
5739 49
    public static function str_delimit(
5740
        string $str,
5741
        string $delimiter,
5742
        string $encoding = 'UTF-8',
5743
        bool $clean_utf8 = false,
5744
        string $lang = null,
5745
        bool $try_to_keep_the_string_length = false
5746
    ): string {
5747 49
        if (self::$SUPPORT['mbstring'] === true) {
5748 49
            $str = (string) \mb_ereg_replace('\\B(\\p{Lu})', '-\1', \trim($str));
5749
5750 49
            $use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
5751 49
            if ($use_mb_functions && $encoding === 'UTF-8') {
5752 22
                $str = \mb_strtolower($str);
5753
            } else {
5754 27
                $str = self::strtolower($str, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
5755
            }
5756
5757 49
            return (string) \mb_ereg_replace('[\\-_\\s]+', $delimiter, $str);
5758
        }
5759
5760
        $str = (string) \preg_replace('/\\B(\\p{Lu})/u', '-\1', \trim($str));
5761
5762
        $use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
5763
        if ($use_mb_functions && $encoding === 'UTF-8') {
5764
            $str = \mb_strtolower($str);
5765
        } else {
5766
            $str = self::strtolower($str, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
5767
        }
5768
5769
        return (string) \preg_replace('/[\\-_\\s]+/u', $delimiter, $str);
5770
    }
5771
5772
    /**
5773
     * Optimized "mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
5774
     *
5775
     * EXAMPLE: <code>
5776
     * UTF8::str_detect_encoding('中文空白'); // 'UTF-8'
5777
     * UTF8::str_detect_encoding('Abc'); // 'ASCII'
5778
     * </code>
5779
     *
5780
     * @param string $str <p>The input string.</p>
5781
     *
5782
     * @psalm-pure
5783
     *
5784
     * @return false|string
5785
     *                      <p>
5786
     *                      The detected string-encoding e.g. UTF-8 or UTF-16BE,<br>
5787
     *                      otherwise it will return false e.g. for BINARY or not detected encoding.
5788
     *                      </p>
5789
     */
5790 30
    public static function str_detect_encoding($str)
5791
    {
5792
        // init
5793 30
        $str = (string) $str;
5794
5795
        //
5796
        // 1.) check binary strings (010001001...) like UTF-16 / UTF-32 / PDF / Images / ...
5797
        //
5798
5799 30
        if (self::is_binary($str, self::string_has_bom($str) ? false : true)) {
5800 10
            $is_utf32 = self::is_utf32($str, false);
5801 10
            if ($is_utf32 === 1) {
5802
                return 'UTF-32LE';
5803
            }
5804 10
            if ($is_utf32 === 2) {
5805 1
                return 'UTF-32BE';
5806
            }
5807
5808 10
            $is_utf16 = self::is_utf16($str, false);
5809 10
            if ($is_utf16 === 1) {
5810 3
                return 'UTF-16LE';
5811
            }
5812 10
            if ($is_utf16 === 2) {
5813 2
                return 'UTF-16BE';
5814
            }
5815
5816
            // is binary but not "UTF-16" or "UTF-32"
5817 8
            return false;
5818
        }
5819
5820
        //
5821
        // 2.) simple check for ASCII chars
5822
        //
5823
5824 27
        if (ASCII::is_ascii($str)) {
5825 10
            return 'ASCII';
5826
        }
5827
5828
        //
5829
        // 3.) simple check for UTF-8 chars
5830
        //
5831
5832 27
        if (self::is_utf8_string($str)) {
5833 19
            return 'UTF-8';
5834
        }
5835
5836
        //
5837
        // 4.) check via "mb_detect_encoding()"
5838
        //
5839
        // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "mb_detect_encoding()"
5840
5841
        $encoding_detecting_order = [
5842 16
            'ISO-8859-1',
5843
            'ISO-8859-2',
5844
            'ISO-8859-3',
5845
            'ISO-8859-4',
5846
            'ISO-8859-5',
5847
            'ISO-8859-6',
5848
            'ISO-8859-7',
5849
            'ISO-8859-8',
5850
            'ISO-8859-9',
5851
            'ISO-8859-10',
5852
            'ISO-8859-13',
5853
            'ISO-8859-14',
5854
            'ISO-8859-15',
5855
            'ISO-8859-16',
5856
            'WINDOWS-1251',
5857
            'WINDOWS-1252',
5858
            'WINDOWS-1254',
5859
            'CP932',
5860
            'CP936',
5861
            'CP950',
5862
            'CP866',
5863
            'CP850',
5864
            'CP51932',
5865
            'CP50220',
5866
            'CP50221',
5867
            'CP50222',
5868
            'ISO-2022-JP',
5869
            'ISO-2022-KR',
5870
            'JIS',
5871
            'JIS-ms',
5872
            'EUC-CN',
5873
            'EUC-JP',
5874
        ];
5875
5876 16
        if (self::$SUPPORT['mbstring'] === true) {
5877
            // info: do not use the symfony polyfill here
5878 16
            $encoding = \mb_detect_encoding($str, $encoding_detecting_order, true);
5879 16
            if ($encoding) {
5880 16
                return $encoding;
5881
            }
5882
        }
5883
5884
        //
5885
        // 5.) check via "iconv()"
5886
        //
5887
5888
        if (self::$ENCODINGS === null) {
5889
            self::$ENCODINGS = self::getData('encodings');
5890
        }
5891
5892
        foreach (self::$ENCODINGS as $encoding_tmp) {
5893
            // INFO: //IGNORE but still throw notice
5894
            /** @noinspection PhpUsageOfSilenceOperatorInspection */
5895
            if ((string) @\iconv($encoding_tmp, $encoding_tmp . '//IGNORE', $str) === $str) {
5896
                return $encoding_tmp;
5897
            }
5898
        }
5899
5900
        return false;
5901
    }
5902
5903
    /**
5904
     * Check if the string ends with the given substring.
5905
     *
5906
     * EXAMPLE: <code>
5907
     * UTF8::str_ends_with('BeginMiddleΚόσμε', 'Κόσμε'); // true
5908
     * UTF8::str_ends_with('BeginMiddleΚόσμε', 'κόσμε'); // false
5909
     * </code>
5910
     *
5911
     * @param string $haystack <p>The string to search in.</p>
5912
     * @param string $needle   <p>The substring to search for.</p>
5913
     *
5914
     * @psalm-pure
5915
     *
5916
     * @return bool
5917
     */
5918 9
    public static function str_ends_with(string $haystack, string $needle): bool
5919
    {
5920 9
        if ($needle === '') {
5921 2
            return true;
5922
        }
5923
5924 9
        if ($haystack === '') {
5925
            return false;
5926
        }
5927
5928 9
        if (\PHP_VERSION_ID >= 80000) {
5929
            /** @phpstan-ignore-next-line - only for PHP8 */
5930
            return \str_ends_with($haystack, $needle);
5931
        }
5932
5933 9
        return \substr($haystack, -\strlen($needle)) === $needle;
5934
    }
5935
5936
    /**
5937
     * Returns true if the string ends with any of $substrings, false otherwise.
5938
     *
5939
     * - case-sensitive
5940
     *
5941
     * @param string   $str        <p>The input string.</p>
5942
     * @param string[] $substrings <p>Substrings to look for.</p>
5943
     *
5944
     * @psalm-pure
5945
     *
5946
     * @return bool
5947
     *              <p>Whether or not $str ends with $substring.</p>
5948
     */
5949 7
    public static function str_ends_with_any(string $str, array $substrings): bool
5950
    {
5951 7
        if ($substrings === []) {
5952
            return false;
5953
        }
5954
5955 7
        foreach ($substrings as &$substring) {
5956 7
            if (\substr($str, -\strlen($substring)) === $substring) {
5957 7
                return true;
5958
            }
5959
        }
5960
5961 6
        return false;
5962
    }
5963
5964
    /**
5965
     * Ensures that the string begins with $substring. If it doesn't, it's
5966
     * prepended.
5967
     *
5968
     * @param string $str       <p>The input string.</p>
5969
     * @param string $substring <p>The substring to add if not present.</p>
5970
     *
5971
     * @psalm-pure
5972
     *
5973
     * @return string
5974
     */
5975 10
    public static function str_ensure_left(string $str, string $substring): string
5976
    {
5977
        if (
5978 10
            $substring !== ''
5979
            &&
5980 10
            \strpos($str, $substring) === 0
5981
        ) {
5982 6
            return $str;
5983
        }
5984
5985 4
        return $substring . $str;
5986
    }
5987
5988
    /**
5989
     * Ensures that the string ends with $substring. If it doesn't, it's appended.
5990
     *
5991
     * @param string $str       <p>The input string.</p>
5992
     * @param string $substring <p>The substring to add if not present.</p>
5993
     *
5994
     * @psalm-pure
5995
     *
5996
     * @return string
5997
     */
5998 10
    public static function str_ensure_right(string $str, string $substring): string
5999
    {
6000
        if (
6001 10
            $str === ''
6002
            ||
6003 10
            $substring === ''
6004
            ||
6005 10
            \substr($str, -\strlen($substring)) !== $substring
6006
        ) {
6007 4
            $str .= $substring;
6008
        }
6009
6010 10
        return $str;
6011
    }
6012
6013
    /**
6014
     * Capitalizes the first word of the string, replaces underscores with
6015
     * spaces, and strips '_id'.
6016
     *
6017
     * @param string $str
6018
     *
6019
     * @psalm-pure
6020
     *
6021
     * @return string
6022
     */
6023 3
    public static function str_humanize($str): string
6024
    {
6025 3
        $str = \str_replace(
6026
            [
6027 3
                '_id',
6028
                '_',
6029
            ],
6030
            [
6031 3
                '',
6032
                ' ',
6033
            ],
6034 3
            $str
6035
        );
6036
6037 3
        return self::ucfirst(\trim($str));
6038
    }
6039
6040
    /**
6041
     * Check if the string ends with the given substring, case-insensitive.
6042
     *
6043
     * EXAMPLE: <code>
6044
     * UTF8::str_iends_with('BeginMiddleΚόσμε', 'Κόσμε'); // true
6045
     * UTF8::str_iends_with('BeginMiddleΚόσμε', 'κόσμε'); // true
6046
     * </code>
6047
     *
6048
     * @param string $haystack <p>The string to search in.</p>
6049
     * @param string $needle   <p>The substring to search for.</p>
6050
     *
6051
     * @psalm-pure
6052
     *
6053
     * @return bool
6054
     */
6055 12
    public static function str_iends_with(string $haystack, string $needle): bool
6056
    {
6057 12
        if ($needle === '') {
6058 2
            return true;
6059
        }
6060
6061 12
        if ($haystack === '') {
6062
            return false;
6063
        }
6064
6065 12
        return self::strcasecmp(\substr($haystack, -\strlen($needle)), $needle) === 0;
6066
    }
6067
6068
    /**
6069
     * Returns true if the string ends with any of $substrings, false otherwise.
6070
     *
6071
     * - case-insensitive
6072
     *
6073
     * @param string   $str        <p>The input string.</p>
6074
     * @param string[] $substrings <p>Substrings to look for.</p>
6075
     *
6076
     * @psalm-pure
6077
     *
6078
     * @return bool
6079
     *              <p>Whether or not $str ends with $substring.</p>
6080
     */
6081 4
    public static function str_iends_with_any(string $str, array $substrings): bool
6082
    {
6083 4
        if ($substrings === []) {
6084
            return false;
6085
        }
6086
6087 4
        foreach ($substrings as &$substring) {
6088 4
            if (self::str_iends_with($str, $substring)) {
6089 4
                return true;
6090
            }
6091
        }
6092
6093
        return false;
6094
    }
6095
6096
    /**
6097
     * Inserts $substring into the string at the $index provided.
6098
     *
6099
     * @param string $str       <p>The input string.</p>
6100
     * @param string $substring <p>String to be inserted.</p>
6101
     * @param int    $index     <p>The index at which to insert the substring.</p>
6102
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
6103
     *
6104
     * @psalm-pure
6105
     *
6106
     * @return string
6107
     */
6108 8
    public static function str_insert(
6109
        string $str,
6110
        string $substring,
6111
        int $index,
6112
        string $encoding = 'UTF-8'
6113
    ): string {
6114 8
        if ($encoding === 'UTF-8') {
6115 4
            $len = (int) \mb_strlen($str);
6116 4
            if ($index > $len) {
6117
                return $str;
6118
            }
6119
6120
            /** @noinspection UnnecessaryCastingInspection */
6121 4
            return (string) \mb_substr($str, 0, $index) .
6122 4
                   $substring .
6123 4
                   (string) \mb_substr($str, $index, $len);
6124
        }
6125
6126 4
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6127
6128 4
        $len = (int) self::strlen($str, $encoding);
6129 4
        if ($index > $len) {
6130 1
            return $str;
6131
        }
6132
6133 3
        return ((string) self::substr($str, 0, $index, $encoding)) .
6134 3
               $substring .
6135 3
               ((string) self::substr($str, $index, $len, $encoding));
6136
    }
6137
6138
    /**
6139
     * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
6140
     *
6141
     * EXAMPLE: <code>
6142
     * UTF8::str_ireplace('lIzÆ', 'lise', 'Iñtërnâtiônàlizætiøn'); // 'Iñtërnâtiônàlisetiøn'
6143
     * </code>
6144
     *
6145
     * @see http://php.net/manual/en/function.str-ireplace.php
6146
     *
6147
     * @param string|string[] $search      <p>
6148
     *                                     Every replacement with search array is
6149
     *                                     performed on the result of previous replacement.
6150
     *                                     </p>
6151
     * @param string|string[] $replacement <p>The replacement.</p>
6152
     * @param string|string[] $subject     <p>
6153
     *                                     If subject is an array, then the search and
6154
     *                                     replace is performed with every entry of
6155
     *                                     subject, and the return value is an array as
6156
     *                                     well.
6157
     *                                     </p>
6158
     * @param int             $count       [optional] <p>
6159
     *                                     The number of matched and replaced needles will
6160
     *                                     be returned in count which is passed by
6161
     *                                     reference.
6162
     *                                     </p>
6163
     *
6164
     * @psalm-pure
6165
     *
6166
     * @return string|string[]
6167
     *                         <p>A string or an array of replacements.</p>
6168
     *
6169
     * @template TStrIReplaceSubject
6170
     * @phpstan-param TStrIReplaceSubject $subject
6171
     * @phpstan-return TStrIReplaceSubject
6172
     */
6173 29
    public static function str_ireplace($search, $replacement, $subject, &$count = null)
6174
    {
6175 29
        $search = (array) $search;
6176
6177
        /** @noinspection AlterInForeachInspection */
6178 29
        foreach ($search as &$s) {
6179 29
            $s = (string) $s;
6180 29
            if ($s === '') {
6181 6
                $s = '/^(?<=.)$/';
6182
            } else {
6183 29
                $s = '/' . \preg_quote($s, '/') . '/ui';
6184
            }
6185
        }
6186
6187
        // fallback
6188
        /** @phpstan-ignore-next-line - only a fallback for PHP8 */
6189 29
        if ($replacement === null) {
0 ignored issues
show
introduced by
The condition $replacement === null is always false.
Loading history...
6190 1
            $replacement = '';
6191
        }
6192
        /** @phpstan-ignore-next-line - only a fallback for PHP8 */
6193 29
        if ($subject === null) {
6194 1
            $subject = '';
6195
        }
6196
6197
        /**
6198
         * @psalm-suppress PossiblyNullArgument
6199
         * @phpstan-var TStrIReplaceSubject $subject
6200
         */
6201 29
        $subject = \preg_replace($search, $replacement, $subject, -1, $count);
6202
6203 29
        return $subject;
6204
    }
6205
6206
    /**
6207
     * Replaces $search from the beginning of string with $replacement.
6208
     *
6209
     * @param string $str         <p>The input string.</p>
6210
     * @param string $search      <p>The string to search for.</p>
6211
     * @param string $replacement <p>The replacement.</p>
6212
     *
6213
     * @psalm-pure
6214
     *
6215
     * @return string
6216
     *                <p>The string after the replacement.</p>
6217
     */
6218 17
    public static function str_ireplace_beginning(string $str, string $search, string $replacement): string
6219
    {
6220 17
        if ($str === '') {
6221 4
            if ($replacement === '') {
6222 2
                return '';
6223
            }
6224
6225 2
            if ($search === '') {
6226 2
                return $replacement;
6227
            }
6228
        }
6229
6230 13
        if ($search === '') {
6231 2
            return $str . $replacement;
6232
        }
6233
6234 11
        $searchLength = \strlen($search);
6235 11
        if (\strncasecmp($str, $search, $searchLength) === 0) {
6236 10
            return $replacement . \substr($str, $searchLength);
6237
        }
6238
6239 1
        return $str;
6240
    }
6241
6242
    /**
6243
     * Replaces $search from the ending of string with $replacement.
6244
     *
6245
     * @param string $str         <p>The input string.</p>
6246
     * @param string $search      <p>The string to search for.</p>
6247
     * @param string $replacement <p>The replacement.</p>
6248
     *
6249
     * @psalm-pure
6250
     *
6251
     * @return string
6252
     *                <p>The string after the replacement.</p>
6253
     */
6254 17
    public static function str_ireplace_ending(string $str, string $search, string $replacement): string
6255
    {
6256 17
        if ($str === '') {
6257 4
            if ($replacement === '') {
6258 2
                return '';
6259
            }
6260
6261 2
            if ($search === '') {
6262 2
                return $replacement;
6263
            }
6264
        }
6265
6266 13
        if ($search === '') {
6267 2
            return $str . $replacement;
6268
        }
6269
6270 11
        if (\stripos($str, $search, \strlen($str) - \strlen($search)) !== false) {
6271 9
            $str = \substr($str, 0, -\strlen($search)) . $replacement;
6272
        }
6273
6274 11
        return $str;
6275
    }
6276
6277
    /**
6278
     * Check if the string starts with the given substring, case-insensitive.
6279
     *
6280
     * EXAMPLE: <code>
6281
     * UTF8::str_istarts_with('ΚόσμεMiddleEnd', 'Κόσμε'); // true
6282
     * UTF8::str_istarts_with('ΚόσμεMiddleEnd', 'κόσμε'); // true
6283
     * </code>
6284
     *
6285
     * @param string $haystack <p>The string to search in.</p>
6286
     * @param string $needle   <p>The substring to search for.</p>
6287
     *
6288
     * @psalm-pure
6289
     *
6290
     * @return bool
6291
     */
6292 13
    public static function str_istarts_with(string $haystack, string $needle): bool
6293
    {
6294 13
        if ($needle === '') {
6295 2
            return true;
6296
        }
6297
6298 13
        if ($haystack === '') {
6299
            return false;
6300
        }
6301
6302 13
        return self::stripos($haystack, $needle) === 0;
6303
    }
6304
6305
    /**
6306
     * Returns true if the string begins with any of $substrings, false otherwise.
6307
     *
6308
     * - case-insensitive
6309
     *
6310
     * @param string $str        <p>The input string.</p>
6311
     * @param array  $substrings <p>Substrings to look for.</p>
6312
     *
6313
     * @psalm-pure
6314
     *
6315
     * @return bool
6316
     *              <p>Whether or not $str starts with $substring.</p>
6317
     */
6318 5
    public static function str_istarts_with_any(string $str, array $substrings): bool
6319
    {
6320 5
        if ($str === '') {
6321
            return false;
6322
        }
6323
6324 5
        if ($substrings === []) {
6325
            return false;
6326
        }
6327
6328 5
        foreach ($substrings as &$substring) {
6329 5
            if (self::str_istarts_with($str, $substring)) {
6330 5
                return true;
6331
            }
6332
        }
6333
6334 1
        return false;
6335
    }
6336
6337
    /**
6338
     * Gets the substring after the first occurrence of a separator.
6339
     *
6340
     * @param string $str       <p>The input string.</p>
6341
     * @param string $separator <p>The string separator.</p>
6342
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6343
     *
6344
     * @psalm-pure
6345
     *
6346
     * @return string
6347
     */
6348 1
    public static function str_isubstr_after_first_separator(
6349
        string $str,
6350
        string $separator,
6351
        string $encoding = 'UTF-8'
6352
    ): string {
6353 1
        if ($separator === '' || $str === '') {
6354 1
            return '';
6355
        }
6356
6357 1
        $offset = self::stripos($str, $separator);
6358 1
        if ($offset === false) {
6359 1
            return '';
6360
        }
6361
6362 1
        if ($encoding === 'UTF-8') {
6363 1
            return (string) \mb_substr(
6364 1
                $str,
6365 1
                $offset + (int) \mb_strlen($separator)
6366
            );
6367
        }
6368
6369
        return (string) self::substr(
6370
            $str,
6371
            $offset + (int) self::strlen($separator, $encoding),
6372
            null,
6373
            $encoding
6374
        );
6375
    }
6376
6377
    /**
6378
     * Gets the substring after the last occurrence of a separator.
6379
     *
6380
     * @param string $str       <p>The input string.</p>
6381
     * @param string $separator <p>The string separator.</p>
6382
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6383
     *
6384
     * @psalm-pure
6385
     *
6386
     * @return string
6387
     */
6388 1
    public static function str_isubstr_after_last_separator(
6389
        string $str,
6390
        string $separator,
6391
        string $encoding = 'UTF-8'
6392
    ): string {
6393 1
        if ($separator === '' || $str === '') {
6394 1
            return '';
6395
        }
6396
6397 1
        $offset = self::strripos($str, $separator);
6398 1
        if ($offset === false) {
6399 1
            return '';
6400
        }
6401
6402 1
        if ($encoding === 'UTF-8') {
6403 1
            return (string) \mb_substr(
6404 1
                $str,
6405 1
                $offset + (int) self::strlen($separator)
6406
            );
6407
        }
6408
6409
        return (string) self::substr(
6410
            $str,
6411
            $offset + (int) self::strlen($separator, $encoding),
6412
            null,
6413
            $encoding
6414
        );
6415
    }
6416
6417
    /**
6418
     * Gets the substring before the first occurrence of a separator.
6419
     *
6420
     * @param string $str       <p>The input string.</p>
6421
     * @param string $separator <p>The string separator.</p>
6422
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6423
     *
6424
     * @psalm-pure
6425
     *
6426
     * @return string
6427
     */
6428 1
    public static function str_isubstr_before_first_separator(
6429
        string $str,
6430
        string $separator,
6431
        string $encoding = 'UTF-8'
6432
    ): string {
6433 1
        if ($separator === '' || $str === '') {
6434 1
            return '';
6435
        }
6436
6437 1
        $offset = self::stripos($str, $separator);
6438 1
        if ($offset === false) {
6439 1
            return '';
6440
        }
6441
6442 1
        if ($encoding === 'UTF-8') {
6443 1
            return (string) \mb_substr($str, 0, $offset);
6444
        }
6445
6446
        return (string) self::substr($str, 0, $offset, $encoding);
6447
    }
6448
6449
    /**
6450
     * Gets the substring before the last occurrence of a separator.
6451
     *
6452
     * @param string $str       <p>The input string.</p>
6453
     * @param string $separator <p>The string separator.</p>
6454
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6455
     *
6456
     * @psalm-pure
6457
     *
6458
     * @return string
6459
     */
6460 1
    public static function str_isubstr_before_last_separator(
6461
        string $str,
6462
        string $separator,
6463
        string $encoding = 'UTF-8'
6464
    ): string {
6465 1
        if ($separator === '' || $str === '') {
6466 1
            return '';
6467
        }
6468
6469 1
        if ($encoding === 'UTF-8') {
6470 1
            $offset = \mb_strripos($str, $separator);
6471 1
            if ($offset === false) {
6472 1
                return '';
6473
            }
6474
6475 1
            return (string) \mb_substr($str, 0, $offset);
6476
        }
6477
6478
        $offset = self::strripos($str, $separator, 0, $encoding);
6479
        if ($offset === false) {
6480
            return '';
6481
        }
6482
6483
        return (string) self::substr($str, 0, $offset, $encoding);
6484
    }
6485
6486
    /**
6487
     * Gets the substring after (or before via "$before_needle") the first occurrence of the "$needle".
6488
     *
6489
     * @param string $str           <p>The input string.</p>
6490
     * @param string $needle        <p>The string to look for.</p>
6491
     * @param bool   $before_needle [optional] <p>Default: false</p>
6492
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
6493
     *
6494
     * @psalm-pure
6495
     *
6496
     * @return string
6497
     */
6498 2
    public static function str_isubstr_first(
6499
        string $str,
6500
        string $needle,
6501
        bool $before_needle = false,
6502
        string $encoding = 'UTF-8'
6503
    ): string {
6504
        if (
6505 2
            $needle === ''
6506
            ||
6507 2
            $str === ''
6508
        ) {
6509 2
            return '';
6510
        }
6511
6512 2
        $part = self::stristr(
6513 2
            $str,
6514 2
            $needle,
6515 2
            $before_needle,
6516 2
            $encoding
6517
        );
6518 2
        if ($part === false) {
6519 2
            return '';
6520
        }
6521
6522 2
        return $part;
6523
    }
6524
6525
    /**
6526
     * Gets the substring after (or before via "$before_needle") the last occurrence of the "$needle".
6527
     *
6528
     * @param string $str           <p>The input string.</p>
6529
     * @param string $needle        <p>The string to look for.</p>
6530
     * @param bool   $before_needle [optional] <p>Default: false</p>
6531
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
6532
     *
6533
     * @psalm-pure
6534
     *
6535
     * @return string
6536
     */
6537 1
    public static function str_isubstr_last(
6538
        string $str,
6539
        string $needle,
6540
        bool $before_needle = false,
6541
        string $encoding = 'UTF-8'
6542
    ): string {
6543
        if (
6544 1
            $needle === ''
6545
            ||
6546 1
            $str === ''
6547
        ) {
6548 1
            return '';
6549
        }
6550
6551 1
        $part = self::strrichr(
6552 1
            $str,
6553 1
            $needle,
6554 1
            $before_needle,
6555 1
            $encoding
6556
        );
6557 1
        if ($part === false) {
6558 1
            return '';
6559
        }
6560
6561 1
        return $part;
6562
    }
6563
6564
    /**
6565
     * Returns the last $n characters of the string.
6566
     *
6567
     * @param string $str      <p>The input string.</p>
6568
     * @param int    $n        <p>Number of characters to retrieve from the end.</p>
6569
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6570
     *
6571
     * @psalm-pure
6572
     *
6573
     * @return string
6574
     */
6575 12
    public static function str_last_char(
6576
        string $str,
6577
        int $n = 1,
6578
        string $encoding = 'UTF-8'
6579
    ): string {
6580 12
        if ($str === '' || $n <= 0) {
6581 4
            return '';
6582
        }
6583
6584 8
        if ($encoding === 'UTF-8') {
6585 4
            return (string) \mb_substr($str, -$n);
6586
        }
6587
6588 4
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6589
6590 4
        return (string) self::substr($str, -$n, null, $encoding);
6591
    }
6592
6593
    /**
6594
     * Limit the number of characters in a string.
6595
     *
6596
     * @param string $str        <p>The input string.</p>
6597
     * @param int    $length     [optional] <p>Default: 100</p>
6598
     * @param string $str_add_on [optional] <p>Default: …</p>
6599
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
6600
     *
6601
     * @psalm-pure
6602
     *
6603
     * @return string
6604
     */
6605 2
    public static function str_limit(
6606
        string $str,
6607
        int $length = 100,
6608
        string $str_add_on = '…',
6609
        string $encoding = 'UTF-8'
6610
    ): string {
6611 2
        if ($str === '' || $length <= 0) {
6612 2
            return '';
6613
        }
6614
6615 2
        if ($encoding === 'UTF-8') {
6616 2
            if ((int) \mb_strlen($str) <= $length) {
6617 2
                return $str;
6618
            }
6619
6620
            /** @noinspection UnnecessaryCastingInspection */
6621 2
            return (string) \mb_substr($str, 0, $length - (int) self::strlen($str_add_on)) . $str_add_on;
6622
        }
6623
6624
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6625
6626
        if ((int) self::strlen($str, $encoding) <= $length) {
6627
            return $str;
6628
        }
6629
6630
        return ((string) self::substr($str, 0, $length - (int) self::strlen($str_add_on), $encoding)) . $str_add_on;
6631
    }
6632
6633
    /**
6634
     * Limit the number of characters in a string, but also after the next word.
6635
     *
6636
     * EXAMPLE: <code>UTF8::str_limit_after_word('fòô bàř fòô', 8, ''); // 'fòô bàř'</code>
6637
     *
6638
     * @param string $str        <p>The input string.</p>
6639
     * @param int    $length     [optional] <p>Default: 100</p>
6640
     * @param string $str_add_on [optional] <p>Default: …</p>
6641
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
6642
     *
6643
     * @psalm-pure
6644
     *
6645
     * @return string
6646
     */
6647 6
    public static function str_limit_after_word(
6648
        string $str,
6649
        int $length = 100,
6650
        string $str_add_on = '…',
6651
        string $encoding = 'UTF-8'
6652
    ): string {
6653 6
        if ($str === '' || $length <= 0) {
6654 2
            return '';
6655
        }
6656
6657 6
        if ($encoding === 'UTF-8') {
6658 2
            if ((int) \mb_strlen($str) <= $length) {
6659 2
                return $str;
6660
            }
6661
6662 2
            if (\mb_substr($str, $length - 1, 1) === ' ') {
6663 2
                return ((string) \mb_substr($str, 0, $length - 1)) . $str_add_on;
6664
            }
6665
6666 2
            $str = \mb_substr($str, 0, $length);
6667
6668 2
            $array = \explode(' ', $str, -1);
6669 2
            $new_str = \implode(' ', $array);
6670
6671 2
            if ($new_str === '') {
6672 2
                return ((string) \mb_substr($str, 0, $length - 1)) . $str_add_on;
6673
            }
6674
        } else {
6675 4
            if ((int) self::strlen($str, $encoding) <= $length) {
6676
                return $str;
6677
            }
6678
6679 4
            if (self::substr($str, $length - 1, 1, $encoding) === ' ') {
6680 3
                return ((string) self::substr($str, 0, $length - 1, $encoding)) . $str_add_on;
6681
            }
6682
6683
            /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
6684 1
            $str = self::substr($str, 0, $length, $encoding);
6685 1
            if ($str === false) {
6686
                return '' . $str_add_on;
6687
            }
6688
6689 1
            $array = \explode(' ', $str, -1);
6690 1
            $new_str = \implode(' ', $array);
6691
6692 1
            if ($new_str === '') {
6693
                return ((string) self::substr($str, 0, $length - 1, $encoding)) . $str_add_on;
6694
            }
6695
        }
6696
6697 3
        return $new_str . $str_add_on;
6698
    }
6699
6700
    /**
6701
     * Returns the longest common prefix between the $str1 and $str2.
6702
     *
6703
     * @param string $str1     <p>The input sting.</p>
6704
     * @param string $str2     <p>Second string for comparison.</p>
6705
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6706
     *
6707
     * @psalm-pure
6708
     *
6709
     * @return string
6710
     */
6711 10
    public static function str_longest_common_prefix(
6712
        string $str1,
6713
        string $str2,
6714
        string $encoding = 'UTF-8'
6715
    ): string {
6716
        // init
6717 10
        $longest_common_prefix = '';
6718
6719 10
        if ($encoding === 'UTF-8') {
6720 5
            $max_length = (int) \min(
6721 5
                \mb_strlen($str1),
6722 5
                \mb_strlen($str2)
6723
            );
6724
6725 5
            for ($i = 0; $i < $max_length; ++$i) {
6726 4
                $char = \mb_substr($str1, $i, 1);
6727
6728
                if (
6729 4
                    $char !== false
6730
                    &&
6731 4
                    $char === \mb_substr($str2, $i, 1)
6732
                ) {
6733 3
                    $longest_common_prefix .= $char;
6734
                } else {
6735 3
                    break;
6736
                }
6737
            }
6738
        } else {
6739 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
6740
6741 5
            $max_length = (int) \min(
6742 5
                self::strlen($str1, $encoding),
6743 5
                self::strlen($str2, $encoding)
6744
            );
6745
6746 5
            for ($i = 0; $i < $max_length; ++$i) {
6747 4
                $char = self::substr($str1, $i, 1, $encoding);
6748
6749
                if (
6750 4
                    $char !== false
6751
                    &&
6752 4
                    $char === self::substr($str2, $i, 1, $encoding)
6753
                ) {
6754 3
                    $longest_common_prefix .= $char;
6755
                } else {
6756 3
                    break;
6757
                }
6758
            }
6759
        }
6760
6761 10
        return $longest_common_prefix;
6762
    }
6763
6764
    /**
6765
     * Returns the longest common substring between the $str1 and $str2.
6766
     * In the case of ties, it returns that which occurs first.
6767
     *
6768
     * @param string $str1
6769
     * @param string $str2     <p>Second string for comparison.</p>
6770
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6771
     *
6772
     * @psalm-pure
6773
     *
6774
     * @return string
6775
     *                <p>A string with its $str being the longest common substring.</p>
6776
     */
6777 11
    public static function str_longest_common_substring(
6778
        string $str1,
6779
        string $str2,
6780
        string $encoding = 'UTF-8'
6781
    ): string {
6782 11
        if ($str1 === '' || $str2 === '') {
6783 2
            return '';
6784
        }
6785
6786
        // Uses dynamic programming to solve
6787
        // http://en.wikipedia.org/wiki/Longest_common_substring_problem
6788
6789 9
        if ($encoding === 'UTF-8') {
6790 4
            $str_length = (int) \mb_strlen($str1);
6791 4
            $other_length = (int) \mb_strlen($str2);
6792
        } else {
6793 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
6794
6795 5
            $str_length = (int) self::strlen($str1, $encoding);
6796 5
            $other_length = (int) self::strlen($str2, $encoding);
6797
        }
6798
6799
        // Return if either string is empty
6800 9
        if ($str_length === 0 || $other_length === 0) {
6801
            return '';
6802
        }
6803
6804 9
        $len = 0;
6805 9
        $end = 0;
6806 9
        $table = \array_fill(
6807 9
            0,
6808 9
            $str_length + 1,
6809 9
            \array_fill(0, $other_length + 1, 0)
6810
        );
6811
6812 9
        if ($encoding === 'UTF-8') {
6813 9
            for ($i = 1; $i <= $str_length; ++$i) {
6814 9
                for ($j = 1; $j <= $other_length; ++$j) {
6815 9
                    $str_char = \mb_substr($str1, $i - 1, 1);
6816 9
                    $other_char = \mb_substr($str2, $j - 1, 1);
6817
6818 9
                    if ($str_char === $other_char) {
6819 8
                        $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
6820 8
                        if ($table[$i][$j] > $len) {
6821 8
                            $len = $table[$i][$j];
6822 8
                            $end = $i;
6823
                        }
6824
                    } else {
6825 9
                        $table[$i][$j] = 0;
6826
                    }
6827
                }
6828
            }
6829
        } else {
6830
            for ($i = 1; $i <= $str_length; ++$i) {
6831
                for ($j = 1; $j <= $other_length; ++$j) {
6832
                    $str_char = self::substr($str1, $i - 1, 1, $encoding);
6833
                    $other_char = self::substr($str2, $j - 1, 1, $encoding);
6834
6835
                    if ($str_char === $other_char) {
6836
                        $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
6837
                        if ($table[$i][$j] > $len) {
6838
                            $len = $table[$i][$j];
6839
                            $end = $i;
6840
                        }
6841
                    } else {
6842
                        $table[$i][$j] = 0;
6843
                    }
6844
                }
6845
            }
6846
        }
6847
6848 9
        if ($encoding === 'UTF-8') {
6849 9
            return (string) \mb_substr($str1, $end - $len, $len);
6850
        }
6851
6852
        return (string) self::substr($str1, $end - $len, $len, $encoding);
6853
    }
6854
6855
    /**
6856
     * Returns the longest common suffix between the $str1 and $str2.
6857
     *
6858
     * @param string $str1
6859
     * @param string $str2     <p>Second string for comparison.</p>
6860
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6861
     *
6862
     * @psalm-pure
6863
     *
6864
     * @return string
6865
     */
6866 10
    public static function str_longest_common_suffix(
6867
        string $str1,
6868
        string $str2,
6869
        string $encoding = 'UTF-8'
6870
    ): string {
6871 10
        if ($str1 === '' || $str2 === '') {
6872 2
            return '';
6873
        }
6874
6875 8
        if ($encoding === 'UTF-8') {
6876 4
            $max_length = (int) \min(
6877 4
                \mb_strlen($str1, $encoding),
6878 4
                \mb_strlen($str2, $encoding)
6879
            );
6880
6881 4
            $longest_common_suffix = '';
6882 4
            for ($i = 1; $i <= $max_length; ++$i) {
6883 4
                $char = \mb_substr($str1, -$i, 1);
6884
6885
                if (
6886 4
                    $char !== false
6887
                    &&
6888 4
                    $char === \mb_substr($str2, -$i, 1)
6889
                ) {
6890 3
                    $longest_common_suffix = $char . $longest_common_suffix;
6891
                } else {
6892 3
                    break;
6893
                }
6894
            }
6895
        } else {
6896 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
6897
6898 4
            $max_length = (int) \min(
6899 4
                self::strlen($str1, $encoding),
6900 4
                self::strlen($str2, $encoding)
6901
            );
6902
6903 4
            $longest_common_suffix = '';
6904 4
            for ($i = 1; $i <= $max_length; ++$i) {
6905 4
                $char = self::substr($str1, -$i, 1, $encoding);
6906
6907
                if (
6908 4
                    $char !== false
6909
                    &&
6910 4
                    $char === self::substr($str2, -$i, 1, $encoding)
6911
                ) {
6912 3
                    $longest_common_suffix = $char . $longest_common_suffix;
6913
                } else {
6914 3
                    break;
6915
                }
6916
            }
6917
        }
6918
6919 8
        return $longest_common_suffix;
6920
    }
6921
6922
    /**
6923
     * Returns true if $str matches the supplied pattern, false otherwise.
6924
     *
6925
     * @param string $str     <p>The input string.</p>
6926
     * @param string $pattern <p>Regex pattern to match against.</p>
6927
     *
6928
     * @psalm-pure
6929
     *
6930
     * @return bool
6931
     *              <p>Whether or not $str matches the pattern.</p>
6932
     */
6933 10
    public static function str_matches_pattern(string $str, string $pattern): bool
6934
    {
6935 10
        return (bool) \preg_match('/' . $pattern . '/u', $str);
6936
    }
6937
6938
    /**
6939
     * Returns whether or not a character exists at an index. Offsets may be
6940
     * negative to count from the last character in the string. Implements
6941
     * part of the ArrayAccess interface.
6942
     *
6943
     * @param string $str      <p>The input string.</p>
6944
     * @param int    $offset   <p>The index to check.</p>
6945
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6946
     *
6947
     * @psalm-pure
6948
     *
6949
     * @return bool
6950
     *              <p>Whether or not the index exists.</p>
6951
     */
6952 6
    public static function str_offset_exists(string $str, int $offset, string $encoding = 'UTF-8'): bool
6953
    {
6954
        // init
6955 6
        $length = (int) self::strlen($str, $encoding);
6956
6957 6
        if ($offset >= 0) {
6958 3
            return $length > $offset;
6959
        }
6960
6961 3
        return $length >= \abs($offset);
6962
    }
6963
6964
    /**
6965
     * Returns the character at the given index. Offsets may be negative to
6966
     * count from the last character in the string. Implements part of the
6967
     * ArrayAccess interface, and throws an OutOfBoundsException if the index
6968
     * does not exist.
6969
     *
6970
     * @param string $str      <p>The input string.</p>
6971
     * @param int    $index    <p>The <strong>index</strong> from which to retrieve the char.</p>
6972
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6973
     *
6974
     * @throws \OutOfBoundsException if the positive or negative offset does not exist
6975
     *
6976
     * @return string
6977
     *                <p>The character at the specified index.</p>
6978
     *
6979
     * @psalm-pure
6980
     */
6981 2
    public static function str_offset_get(string $str, int $index, string $encoding = 'UTF-8'): string
6982
    {
6983
        // init
6984 2
        $length = (int) self::strlen($str);
6985
6986
        if (
6987 2
            ($index >= 0 && $length <= $index)
6988
            ||
6989 2
            $length < \abs($index)
6990
        ) {
6991 1
            throw new \OutOfBoundsException('No character exists at the index');
6992
        }
6993
6994 1
        return self::char_at($str, $index, $encoding);
6995
    }
6996
6997
    /**
6998
     * Pad a UTF-8 string to a given length with another string.
6999
     *
7000
     * EXAMPLE: <code>UTF8::str_pad('中文空白', 10, '_', STR_PAD_BOTH); // '___中文空白___'</code>
7001
     *
7002
     * @param string     $str        <p>The input string.</p>
7003
     * @param int        $pad_length <p>The length of return string.</p>
7004
     * @param string     $pad_string [optional] <p>String to use for padding the input string.</p>
7005
     * @param int|string $pad_type   [optional] <p>
7006
     *                               Can be <strong>STR_PAD_RIGHT</strong> (default), [or string "right"]<br>
7007
     *                               <strong>STR_PAD_LEFT</strong> [or string "left"] or<br>
7008
     *                               <strong>STR_PAD_BOTH</strong> [or string "both"]
7009
     *                               </p>
7010
     * @param string     $encoding   [optional] <p>Default: 'UTF-8'</p>
7011
     *
7012
     * @psalm-pure
7013
     *
7014
     * @return string
7015
     *                <p>Returns the padded string.</p>
7016
     */
7017 41
    public static function str_pad(
7018
        string $str,
7019
        int $pad_length,
7020
        string $pad_string = ' ',
7021
        $pad_type = \STR_PAD_RIGHT,
7022
        string $encoding = 'UTF-8'
7023
    ): string {
7024 41
        if ($pad_length === 0 || $pad_string === '') {
7025 1
            return $str;
7026
        }
7027
7028 41
        if ($pad_type !== (int) $pad_type) {
7029 13
            if ($pad_type === 'left') {
7030 3
                $pad_type = \STR_PAD_LEFT;
7031 10
            } elseif ($pad_type === 'right') {
7032 6
                $pad_type = \STR_PAD_RIGHT;
7033 4
            } elseif ($pad_type === 'both') {
7034 3
                $pad_type = \STR_PAD_BOTH;
7035
            } else {
7036 1
                throw new \InvalidArgumentException(
7037 1
                    'Pad expects $pad_type to be "STR_PAD_*" or ' . "to be one of 'left', 'right' or 'both'"
7038
                );
7039
            }
7040
        }
7041
7042 40
        if ($encoding === 'UTF-8') {
7043 25
            $str_length = (int) \mb_strlen($str);
7044
7045 25
            if ($pad_length >= $str_length) {
7046
                switch ($pad_type) {
7047 25
                    case \STR_PAD_LEFT:
7048 8
                        $ps_length = (int) \mb_strlen($pad_string);
7049
7050 8
                        $diff = ($pad_length - $str_length);
7051
7052 8
                        $pre = (string) \mb_substr(
7053 8
                            \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
7054 8
                            0,
7055 8
                            $diff
7056
                        );
7057 8
                        $post = '';
7058
7059 8
                        break;
7060
7061 20
                    case \STR_PAD_BOTH:
7062 14
                        $diff = ($pad_length - $str_length);
7063
7064 14
                        $ps_length_left = (int) \floor($diff / 2);
7065
7066 14
                        $ps_length_right = (int) \ceil($diff / 2);
7067
7068 14
                        $pre = (string) \mb_substr(
7069 14
                            \str_repeat($pad_string, $ps_length_left),
7070 14
                            0,
7071 14
                            $ps_length_left
7072
                        );
7073 14
                        $post = (string) \mb_substr(
7074 14
                            \str_repeat($pad_string, $ps_length_right),
7075 14
                            0,
7076 14
                            $ps_length_right
7077
                        );
7078
7079 14
                        break;
7080
7081 9
                    case \STR_PAD_RIGHT:
7082
                    default:
7083 9
                        $ps_length = (int) \mb_strlen($pad_string);
7084
7085 9
                        $diff = ($pad_length - $str_length);
7086
7087 9
                        $post = (string) \mb_substr(
7088 9
                            \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
7089 9
                            0,
7090 9
                            $diff
7091
                        );
7092 9
                        $pre = '';
7093
                }
7094
7095 25
                return $pre . $str . $post;
7096
            }
7097
7098 3
            return $str;
7099
        }
7100
7101 15
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
7102
7103 15
        $str_length = (int) self::strlen($str, $encoding);
7104
7105 15
        if ($pad_length >= $str_length) {
7106
            switch ($pad_type) {
7107 14
                case \STR_PAD_LEFT:
7108 5
                    $ps_length = (int) self::strlen($pad_string, $encoding);
7109
7110 5
                    $diff = ($pad_length - $str_length);
7111
7112 5
                    $pre = (string) self::substr(
7113 5
                        \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
7114 5
                        0,
7115 5
                        $diff,
7116 5
                        $encoding
7117
                    );
7118 5
                    $post = '';
7119
7120 5
                    break;
7121
7122 9
                case \STR_PAD_BOTH:
7123 3
                    $diff = ($pad_length - $str_length);
7124
7125 3
                    $ps_length_left = (int) \floor($diff / 2);
7126
7127 3
                    $ps_length_right = (int) \ceil($diff / 2);
7128
7129 3
                    $pre = (string) self::substr(
7130 3
                        \str_repeat($pad_string, $ps_length_left),
7131 3
                        0,
7132 3
                        $ps_length_left,
7133 3
                        $encoding
7134
                    );
7135 3
                    $post = (string) self::substr(
7136 3
                        \str_repeat($pad_string, $ps_length_right),
7137 3
                        0,
7138 3
                        $ps_length_right,
7139 3
                        $encoding
7140
                    );
7141
7142 3
                    break;
7143
7144 6
                case \STR_PAD_RIGHT:
7145
                default:
7146 6
                    $ps_length = (int) self::strlen($pad_string, $encoding);
7147
7148 6
                    $diff = ($pad_length - $str_length);
7149
7150 6
                    $post = (string) self::substr(
7151 6
                        \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
7152 6
                        0,
7153 6
                        $diff,
7154 6
                        $encoding
7155
                    );
7156 6
                    $pre = '';
7157
            }
7158
7159 14
            return $pre . $str . $post;
7160
        }
7161
7162 1
        return $str;
7163
    }
7164
7165
    /**
7166
     * Returns a new string of a given length such that both sides of the
7167
     * string are padded. Alias for "UTF8::str_pad()" with a $pad_type of 'both'.
7168
     *
7169
     * @param string $str
7170
     * @param int    $length   <p>Desired string length after padding.</p>
7171
     * @param string $pad_str  [optional] <p>String used to pad, defaults to space. Default: ' '</p>
7172
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7173
     *
7174
     * @psalm-pure
7175
     *
7176
     * @return string
7177
     *                <p>The string with padding applied.</p>
7178
     */
7179 11
    public static function str_pad_both(
7180
        string $str,
7181
        int $length,
7182
        string $pad_str = ' ',
7183
        string $encoding = 'UTF-8'
7184
    ): string {
7185 11
        return self::str_pad(
7186 11
            $str,
7187 11
            $length,
7188 11
            $pad_str,
7189 11
            \STR_PAD_BOTH,
7190 11
            $encoding
7191
        );
7192
    }
7193
7194
    /**
7195
     * Returns a new string of a given length such that the beginning of the
7196
     * string is padded. Alias for "UTF8::str_pad()" with a $pad_type of 'left'.
7197
     *
7198
     * @param string $str
7199
     * @param int    $length   <p>Desired string length after padding.</p>
7200
     * @param string $pad_str  [optional] <p>String used to pad, defaults to space. Default: ' '</p>
7201
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7202
     *
7203
     * @psalm-pure
7204
     *
7205
     * @return string
7206
     *                <p>The string with left padding.</p>
7207
     */
7208 7
    public static function str_pad_left(
7209
        string $str,
7210
        int $length,
7211
        string $pad_str = ' ',
7212
        string $encoding = 'UTF-8'
7213
    ): string {
7214 7
        return self::str_pad(
7215 7
            $str,
7216 7
            $length,
7217 7
            $pad_str,
7218 7
            \STR_PAD_LEFT,
7219 7
            $encoding
7220
        );
7221
    }
7222
7223
    /**
7224
     * Returns a new string of a given length such that the end of the string
7225
     * is padded. Alias for "UTF8::str_pad()" with a $pad_type of 'right'.
7226
     *
7227
     * @param string $str
7228
     * @param int    $length   <p>Desired string length after padding.</p>
7229
     * @param string $pad_str  [optional] <p>String used to pad, defaults to space. Default: ' '</p>
7230
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7231
     *
7232
     * @psalm-pure
7233
     *
7234
     * @return string
7235
     *                <p>The string with right padding.</p>
7236
     */
7237 7
    public static function str_pad_right(
7238
        string $str,
7239
        int $length,
7240
        string $pad_str = ' ',
7241
        string $encoding = 'UTF-8'
7242
    ): string {
7243 7
        return self::str_pad(
7244 7
            $str,
7245 7
            $length,
7246 7
            $pad_str,
7247 7
            \STR_PAD_RIGHT,
7248 7
            $encoding
7249
        );
7250
    }
7251
7252
    /**
7253
     * Repeat a string.
7254
     *
7255
     * EXAMPLE: <code>UTF8::str_repeat("°~\xf0\x90\x28\xbc", 2); // '°~ð(¼°~ð(¼'</code>
7256
     *
7257
     * @param string $str        <p>
7258
     *                           The string to be repeated.
7259
     *                           </p>
7260
     * @param int    $multiplier <p>
7261
     *                           Number of time the input string should be
7262
     *                           repeated.
7263
     *                           </p>
7264
     *                           <p>
7265
     *                           multiplier has to be greater than or equal to 0.
7266
     *                           If the multiplier is set to 0, the function
7267
     *                           will return an empty string.
7268
     *                           </p>
7269
     *
7270
     * @psalm-pure
7271
     *
7272
     * @return string
7273
     *                <p>The repeated string.</p>
7274
     */
7275 9
    public static function str_repeat(string $str, int $multiplier): string
7276
    {
7277 9
        $str = self::filter($str);
7278
7279 9
        return \str_repeat($str, $multiplier);
7280
    }
7281
7282
    /**
7283
     * INFO: This is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe.
7284
     *
7285
     * Replace all occurrences of the search string with the replacement string
7286
     *
7287
     * @see http://php.net/manual/en/function.str-replace.php
7288
     *
7289
     * @param string|string[] $search  <p>
7290
     *                                 The value being searched for, otherwise known as the needle.
7291
     *                                 An array may be used to designate multiple needles.
7292
     *                                 </p>
7293
     * @param string|string[] $replace <p>
7294
     *                                 The replacement value that replaces found search
7295
     *                                 values. An array may be used to designate multiple replacements.
7296
     *                                 </p>
7297
     * @param string|string[] $subject <p>
7298
     *                                 The string or array of strings being searched and replaced on,
7299
     *                                 otherwise known as the haystack.
7300
     *                                 </p>
7301
     *                                 <p>
7302
     *                                 If subject is an array, then the search and
7303
     *                                 replace is performed with every entry of
7304
     *                                 subject, and the return value is an array as
7305
     *                                 well.
7306
     *                                 </p>
7307
     * @param int|null        $count   [optional] <p>
7308
     *                                 If passed, this will hold the number of matched and replaced needles.
7309
     *                                 </p>
7310
     *
7311
     * @psalm-pure
7312
     *
7313
     * @return string|string[]
7314
     *                         <p>This function returns a string or an array with the replaced values.</p>
7315
     *
7316
     * @template TStrReplaceSubject
7317
     * @phpstan-param TStrReplaceSubject $subject
7318
     * @phpstan-return TStrReplaceSubject
7319
     *
7320
     * @deprecated please use \str_replace() instead
7321
     */
7322 12
    public static function str_replace(
7323
        $search,
7324
        $replace,
7325
        $subject,
7326
        int &$count = null
7327
    ) {
7328
        /**
7329
         * @psalm-suppress PossiblyNullArgument
7330
         * @phpstan-var TStrReplaceSubject $return;
7331
         */
7332 12
        $return = \str_replace(
7333 12
            $search,
7334 12
            $replace,
7335 12
            $subject,
7336 12
            $count
7337
        );
7338
7339 12
        return $return;
7340
    }
7341
7342
    /**
7343
     * Replaces $search from the beginning of string with $replacement.
7344
     *
7345
     * @param string $str         <p>The input string.</p>
7346
     * @param string $search      <p>The string to search for.</p>
7347
     * @param string $replacement <p>The replacement.</p>
7348
     *
7349
     * @psalm-pure
7350
     *
7351
     * @return string
7352
     *                <p>A string after the replacements.</p>
7353
     */
7354 17
    public static function str_replace_beginning(
7355
        string $str,
7356
        string $search,
7357
        string $replacement
7358
    ): string {
7359 17
        if ($str === '') {
7360 4
            if ($replacement === '') {
7361 2
                return '';
7362
            }
7363
7364 2
            if ($search === '') {
7365 2
                return $replacement;
7366
            }
7367
        }
7368
7369 13
        if ($search === '') {
7370 2
            return $str . $replacement;
7371
        }
7372
7373 11
        $searchLength = \strlen($search);
7374 11
        if (\strncmp($str, $search, $searchLength) === 0) {
7375 9
            return $replacement . \substr($str, $searchLength);
7376
        }
7377
7378 2
        return $str;
7379
    }
7380
7381
    /**
7382
     * Replaces $search from the ending of string with $replacement.
7383
     *
7384
     * @param string $str         <p>The input string.</p>
7385
     * @param string $search      <p>The string to search for.</p>
7386
     * @param string $replacement <p>The replacement.</p>
7387
     *
7388
     * @psalm-pure
7389
     *
7390
     * @return string
7391
     *                <p>A string after the replacements.</p>
7392
     */
7393 17
    public static function str_replace_ending(
7394
        string $str,
7395
        string $search,
7396
        string $replacement
7397
    ): string {
7398 17
        if ($str === '') {
7399 4
            if ($replacement === '') {
7400 2
                return '';
7401
            }
7402
7403 2
            if ($search === '') {
7404 2
                return $replacement;
7405
            }
7406
        }
7407
7408 13
        if ($search === '') {
7409 2
            return $str . $replacement;
7410
        }
7411
7412 11
        if (\strpos($str, $search, \strlen($str) - \strlen($search)) !== false) {
7413 8
            $str = \substr($str, 0, -\strlen($search)) . $replacement;
7414
        }
7415
7416 11
        return $str;
7417
    }
7418
7419
    /**
7420
     * Replace the first "$search"-term with the "$replace"-term.
7421
     *
7422
     * @param string $search
7423
     * @param string $replace
7424
     * @param string $subject
7425
     *
7426
     * @psalm-pure
7427
     *
7428
     * @return string
7429
     *
7430
     * @psalm-suppress InvalidReturnType
7431
     */
7432 2
    public static function str_replace_first(
7433
        string $search,
7434
        string $replace,
7435
        string $subject
7436
    ): string {
7437 2
        $pos = self::strpos($subject, $search);
7438
7439 2
        if ($pos !== false) {
7440
            /**
7441
             * @psalm-suppress InvalidReturnStatement
7442
             */
7443 2
            return self::substr_replace(
7444 2
                $subject,
7445 2
                $replace,
7446 2
                $pos,
7447 2
                (int) self::strlen($search)
7448
            );
7449
        }
7450
7451 2
        return $subject;
7452
    }
7453
7454
    /**
7455
     * Replace the last "$search"-term with the "$replace"-term.
7456
     *
7457
     * @param string $search
7458
     * @param string $replace
7459
     * @param string $subject
7460
     *
7461
     * @psalm-pure
7462
     *
7463
     * @return string
7464
     *
7465
     * @psalm-suppress InvalidReturnType
7466
     */
7467 2
    public static function str_replace_last(
7468
        string $search,
7469
        string $replace,
7470
        string $subject
7471
    ): string {
7472 2
        $pos = self::strrpos($subject, $search);
7473 2
        if ($pos !== false) {
7474
            /**
7475
             * @psalm-suppress InvalidReturnStatement
7476
             */
7477 2
            return self::substr_replace(
7478 2
                $subject,
7479 2
                $replace,
7480 2
                $pos,
7481 2
                (int) self::strlen($search)
7482
            );
7483
        }
7484
7485 2
        return $subject;
7486
    }
7487
7488
    /**
7489
     * Shuffles all the characters in the string.
7490
     *
7491
     * INFO: uses random algorithm which is weak for cryptography purposes
7492
     *
7493
     * EXAMPLE: <code>UTF8::str_shuffle('fòô bàř fòô'); // 'àòôřb ffòô '</code>
7494
     *
7495
     * @param string $str      <p>The input string</p>
7496
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7497
     *
7498
     * @return string
7499
     *                <p>The shuffled string.</p>
7500
     */
7501 5
    public static function str_shuffle(string $str, string $encoding = 'UTF-8'): string
7502
    {
7503 5
        if ($encoding === 'UTF-8') {
7504 5
            $indexes = \range(0, (int) \mb_strlen($str) - 1);
7505 5
            \shuffle($indexes);
7506
7507
            // init
7508 5
            $shuffled_str = '';
7509
7510 5
            foreach ($indexes as &$i) {
7511 5
                $tmp_sub_str = \mb_substr($str, $i, 1);
7512 5
                if ($tmp_sub_str !== false) {
7513 5
                    $shuffled_str .= $tmp_sub_str;
7514
                }
7515
            }
7516
        } else {
7517
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7518
7519
            $indexes = \range(0, (int) self::strlen($str, $encoding) - 1);
7520
            \shuffle($indexes);
7521
7522
            // init
7523
            $shuffled_str = '';
7524
7525
            foreach ($indexes as &$i) {
7526
                $tmp_sub_str = self::substr($str, $i, 1, $encoding);
7527
                if ($tmp_sub_str !== false) {
7528
                    $shuffled_str .= $tmp_sub_str;
7529
                }
7530
            }
7531
        }
7532
7533 5
        return $shuffled_str;
7534
    }
7535
7536
    /**
7537
     * Returns the substring beginning at $start, and up to, but not including
7538
     * the index specified by $end. If $end is omitted, the function extracts
7539
     * the remaining string. If $end is negative, it is computed from the end
7540
     * of the string.
7541
     *
7542
     * @param string   $str
7543
     * @param int      $start    <p>Initial index from which to begin extraction.</p>
7544
     * @param int|null $end      [optional] <p>Index at which to end extraction. Default: null</p>
7545
     * @param string   $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7546
     *
7547
     * @psalm-pure
7548
     *
7549
     * @return false|string
7550
     *                      <p>The extracted substring.</p><p>If <i>str</i> is shorter than <i>start</i>
7551
     *                      characters long, <b>FALSE</b> will be returned.
7552
     */
7553 18
    public static function str_slice(
7554
        string $str,
7555
        int $start,
7556
        int $end = null,
7557
        string $encoding = 'UTF-8'
7558
    ) {
7559 18
        if ($encoding === 'UTF-8') {
7560 7
            if ($end === null) {
7561 1
                $length = (int) \mb_strlen($str);
7562 6
            } elseif ($end >= 0 && $end <= $start) {
7563 2
                return '';
7564 4
            } elseif ($end < 0) {
7565 1
                $length = (int) \mb_strlen($str) + $end - $start;
7566
            } else {
7567 3
                $length = $end - $start;
7568
            }
7569
7570 5
            return \mb_substr($str, $start, $length);
7571
        }
7572
7573 11
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
7574
7575 11
        if ($end === null) {
7576 5
            $length = (int) self::strlen($str, $encoding);
7577 6
        } elseif ($end >= 0 && $end <= $start) {
7578 2
            return '';
7579 4
        } elseif ($end < 0) {
7580 1
            $length = (int) self::strlen($str, $encoding) + $end - $start;
7581
        } else {
7582 3
            $length = $end - $start;
7583
        }
7584
7585 9
        return self::substr($str, $start, $length, $encoding);
7586
    }
7587
7588
    /**
7589
     * Convert a string to e.g.: "snake_case"
7590
     *
7591
     * @param string $str
7592
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7593
     *
7594
     * @psalm-pure
7595
     *
7596
     * @return string
7597
     *                <p>A string in snake_case.</p>
7598
     */
7599 22
    public static function str_snakeize(string $str, string $encoding = 'UTF-8'): string
7600
    {
7601 22
        if ($str === '') {
7602
            return '';
7603
        }
7604
7605 22
        $str = \str_replace(
7606 22
            '-',
7607 22
            '_',
7608 22
            self::normalize_whitespace($str)
7609
        );
7610
7611 22
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7612 19
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7613
        }
7614
7615 22
        $str = (string) \preg_replace_callback(
7616 22
            '/([\\p{N}|\\p{Lu}])/u',
7617
            /**
7618
             * @param string[] $matches
7619
             *
7620
             * @psalm-pure
7621
             *
7622
             * @return string
7623
             */
7624
            static function (array $matches) use ($encoding): string {
7625 9
                $match = $matches[1];
7626 9
                $match_int = (int) $match;
7627
7628 9
                if ((string) $match_int === $match) {
7629 4
                    return '_' . $match . '_';
7630
                }
7631
7632 5
                if ($encoding === 'UTF-8') {
7633 5
                    return '_' . \mb_strtolower($match);
7634
                }
7635
7636
                return '_' . self::strtolower($match, $encoding);
7637 22
            },
7638 22
            $str
7639
        );
7640
7641 22
        $str = (string) \preg_replace(
7642
            [
7643 22
                '/\\s+/u',           // convert spaces to "_"
7644
                '/^\\s+|\\s+$/u', // trim leading & trailing spaces
7645
                '/_+/',                 // remove double "_"
7646
            ],
7647
            [
7648 22
                '_',
7649
                '',
7650
                '_',
7651
            ],
7652 22
            $str
7653
        );
7654
7655 22
        return \trim(\trim($str, '_')); // trim leading & trailing "_" + whitespace
7656
    }
7657
7658
    /**
7659
     * Sort all characters according to code points.
7660
     *
7661
     * EXAMPLE: <code>UTF8::str_sort('  -ABC-中文空白-  '); // '    ---ABC中文白空'</code>
7662
     *
7663
     * @param string $str    <p>A UTF-8 string.</p>
7664
     * @param bool   $unique <p>Sort unique. If <strong>true</strong>, repeated characters are ignored.</p>
7665
     * @param bool   $desc   <p>If <strong>true</strong>, will sort characters in reverse code point order.</p>
7666
     *
7667
     * @psalm-pure
7668
     *
7669
     * @return string
7670
     *                <p>A string of sorted characters.</p>
7671
     */
7672 2
    public static function str_sort(string $str, bool $unique = false, bool $desc = false): string
7673
    {
7674
        /** @var int[] $array */
7675 2
        $array = self::codepoints($str);
7676
7677 2
        if ($unique) {
7678 2
            $array = \array_flip(\array_flip($array));
7679
        }
7680
7681 2
        if ($desc) {
7682 2
            \arsort($array);
7683
        } else {
7684 2
            \asort($array);
7685
        }
7686
7687 2
        return self::string($array);
7688
    }
7689
7690
    /**
7691
     * Convert a string to an array of Unicode characters.
7692
     *
7693
     * EXAMPLE: <code>
7694
     * UTF8::str_split_array(['中文空白', 'test'], 2); // [['中文', '空白'], ['te', 'st']]
7695
     * </code>
7696
     *
7697
     * @param int[]|string[] $input                   <p>The string[] or int[] to split into array.</p>
7698
     * @param int            $length                  [optional] <p>Max character length of each array
7699
     *                                                lement.</p>
7700
     * @param bool           $clean_utf8              [optional] <p>Remove non UTF-8 chars from the
7701
     *                                                string.</p>
7702
     * @param bool           $try_to_use_mb_functions [optional] <p>Set to false, if you don't want to use
7703
     *                                                "mb_substr"</p>
7704
     *
7705
     * @psalm-pure
7706
     *
7707
     * @return string[][]
7708
     *                    <p>An array containing chunks of the input.</p>
7709
     */
7710 1
    public static function str_split_array(
7711
        array $input,
7712
        int $length = 1,
7713
        bool $clean_utf8 = false,
7714
        bool $try_to_use_mb_functions = true
7715
    ): array {
7716 1
        foreach ($input as &$v) {
7717 1
            $v = self::str_split(
7718 1
                $v,
7719 1
                $length,
7720 1
                $clean_utf8,
7721 1
                $try_to_use_mb_functions
7722
            );
7723
        }
7724
7725
        /** @var string[][] $input */
7726 1
        return $input;
7727
    }
7728
7729
    /**
7730
     * Convert a string to an array of unicode characters.
7731
     *
7732
     * EXAMPLE: <code>UTF8::str_split('中文空白'); // array('中', '文', '空', '白')</code>
7733
     *
7734
     * @param int|string $input                   <p>The string or int to split into array.</p>
7735
     * @param int        $length                  [optional] <p>Max character length of each array
7736
     *                                            element.</p>
7737
     * @param bool       $clean_utf8              [optional] <p>Remove non UTF-8 chars from the
7738
     *                                            string.</p>
7739
     * @param bool       $try_to_use_mb_functions [optional] <p>Set to false, if you don't want to use
7740
     *                                            "mb_substr"</p>
7741
     *
7742
     * @psalm-pure
7743
     *
7744
     * @return string[]
7745
     *                  <p>An array containing chunks of chars from the input.</p>
7746
     */
7747 90
    public static function str_split(
7748
        $input,
7749
        int $length = 1,
7750
        bool $clean_utf8 = false,
7751
        bool $try_to_use_mb_functions = true
7752
    ): array {
7753 90
        if ($length <= 0) {
7754 3
            return [];
7755
        }
7756
7757
        // this is only an old fallback
7758
        /** @noinspection PhpSillyAssignmentInspection - hack for phpstan */
7759
        /** @var int|int[]|string|string[] $input */
7760 89
        $input = $input;
7761 89
        if (\is_array($input)) {
7762
            /** @psalm-suppress InvalidReturnStatement */
7763
            /** @phpstan-ignore-next-line - old code :/ */
7764
            return self::str_split_array(
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::str_split_a...ry_to_use_mb_functions) returns the type array<mixed,string[]> which is incompatible with the documented return type string[].
Loading history...
7765
                $input,
7766
                $length,
7767
                $clean_utf8,
7768
                $try_to_use_mb_functions
7769
            );
7770
        }
7771
7772
        // init
7773 89
        $input = (string) $input;
7774
7775 89
        if ($input === '') {
7776 14
            return [];
7777
        }
7778
7779 86
        if ($clean_utf8) {
7780 19
            $input = self::clean($input);
7781
        }
7782
7783
        if (
7784 86
            $try_to_use_mb_functions
7785
            &&
7786 86
            self::$SUPPORT['mbstring'] === true
7787
        ) {
7788 82
            if (\function_exists('mb_str_split')) {
7789
                /**
7790
                 * @psalm-suppress ImpureFunctionCall - why?
7791
                 */
7792 82
                $return = \mb_str_split($input, $length);
7793 82
                if ($return !== false) {
7794 82
                    return $return;
7795
                }
7796
            }
7797
7798
            $i_max = \mb_strlen($input);
7799
            if ($i_max <= 127) {
7800
                $ret = [];
7801
                for ($i = 0; $i < $i_max; ++$i) {
7802
                    $ret[] = \mb_substr($input, $i, 1);
7803
                }
7804
            } else {
7805
                $return_array = [];
7806
                \preg_match_all('/./us', $input, $return_array);
7807
                $ret = $return_array[0] ?? [];
7808
            }
7809 23
        } elseif (self::$SUPPORT['pcre_utf8'] === true) {
7810 17
            $return_array = [];
7811 17
            \preg_match_all('/./us', $input, $return_array);
7812 17
            $ret = $return_array[0] ?? [];
7813
        } else {
7814
7815
            // fallback
7816
7817 8
            $ret = [];
7818 8
            $len = \strlen($input);
7819
7820 8
            for ($i = 0; $i < $len; ++$i) {
7821 8
                if (($input[$i] & "\x80") === "\x00") {
7822 8
                    $ret[] = $input[$i];
7823
                } elseif (
7824 8
                    isset($input[$i + 1])
7825
                    &&
7826 8
                    ($input[$i] & "\xE0") === "\xC0"
7827
                ) {
7828 4
                    if (($input[$i + 1] & "\xC0") === "\x80") {
7829 4
                        $ret[] = $input[$i] . $input[$i + 1];
7830
7831 4
                        ++$i;
7832
                    }
7833
                } elseif (
7834 6
                    isset($input[$i + 2])
7835
                    &&
7836 6
                    ($input[$i] & "\xF0") === "\xE0"
7837
                ) {
7838
                    if (
7839 6
                        ($input[$i + 1] & "\xC0") === "\x80"
7840
                        &&
7841 6
                        ($input[$i + 2] & "\xC0") === "\x80"
7842
                    ) {
7843 6
                        $ret[] = $input[$i] . $input[$i + 1] . $input[$i + 2];
7844
7845 6
                        $i += 2;
7846
                    }
7847
                } elseif (
7848
                    isset($input[$i + 3])
7849
                    &&
7850
                    ($input[$i] & "\xF8") === "\xF0"
7851
                ) {
7852
                    if (
7853
                        ($input[$i + 1] & "\xC0") === "\x80"
7854
                        &&
7855
                        ($input[$i + 2] & "\xC0") === "\x80"
7856
                        &&
7857
                        ($input[$i + 3] & "\xC0") === "\x80"
7858
                    ) {
7859
                        $ret[] = $input[$i] . $input[$i + 1] . $input[$i + 2] . $input[$i + 3];
7860
7861
                        $i += 3;
7862
                    }
7863
                }
7864
            }
7865
        }
7866
7867 23
        if ($length > 1) {
7868 2
            return \array_map(
7869
                static function (array $item): string {
7870 2
                    return \implode('', $item);
7871 2
                },
7872 2
                \array_chunk($ret, $length)
7873
            );
7874
        }
7875
7876 23
        if (isset($ret[0]) && $ret[0] === '') {
7877
            return [];
7878
        }
7879
7880 23
        return $ret;
7881
    }
7882
7883
    /**
7884
     * Splits the string with the provided regular expression, returning an
7885
     * array of strings. An optional integer $limit will truncate the
7886
     * results.
7887
     *
7888
     * @param string $str
7889
     * @param string $pattern <p>The regex with which to split the string.</p>
7890
     * @param int    $limit   [optional] <p>Maximum number of results to return. Default: -1 === no limit</p>
7891
     *
7892
     * @psalm-pure
7893
     *
7894
     * @return string[]
7895
     *                  <p>An array of strings.</p>
7896
     */
7897 16
    public static function str_split_pattern(string $str, string $pattern, int $limit = -1): array
7898
    {
7899 16
        if ($limit === 0) {
7900 2
            return [];
7901
        }
7902
7903 14
        if ($pattern === '') {
7904 1
            return [$str];
7905
        }
7906
7907 13
        if (self::$SUPPORT['mbstring'] === true) {
7908 13
            if ($limit >= 0) {
7909 8
                $result_tmp = \mb_split($pattern, $str);
7910 8
                if ($result_tmp === false) {
7911
                    return [];
7912
                }
7913
7914 8
                $result = [];
7915 8
                foreach ($result_tmp as $item_tmp) {
7916 8
                    if ($limit === 0) {
7917 4
                        break;
7918
                    }
7919 8
                    --$limit;
7920
7921 8
                    $result[] = $item_tmp;
7922
                }
7923
7924 8
                return $result;
7925
            }
7926
7927 5
            $result = \mb_split($pattern, $str);
7928 5
            if ($result === false) {
7929
                return [];
7930
            }
7931
7932 5
            return $result;
7933
        }
7934
7935
        if ($limit > 0) {
7936
            ++$limit;
7937
        } else {
7938
            $limit = -1;
7939
        }
7940
7941
        $array = \preg_split('/' . \preg_quote($pattern, '/') . '/u', $str, $limit);
7942
        if ($array === false) {
7943
            return [];
7944
        }
7945
7946
        if ($limit > 0 && \count($array) === $limit) {
7947
            \array_pop($array);
7948
        }
7949
7950
        return $array;
7951
    }
7952
7953
    /**
7954
     * Check if the string starts with the given substring.
7955
     *
7956
     * EXAMPLE: <code>
7957
     * UTF8::str_starts_with('ΚόσμεMiddleEnd', 'Κόσμε'); // true
7958
     * UTF8::str_starts_with('ΚόσμεMiddleEnd', 'κόσμε'); // false
7959
     * </code>
7960
     *
7961
     * @param string $haystack <p>The string to search in.</p>
7962
     * @param string $needle   <p>The substring to search for.</p>
7963
     *
7964
     * @psalm-pure
7965
     *
7966
     * @return bool
7967
     */
7968 19
    public static function str_starts_with(string $haystack, string $needle): bool
7969
    {
7970 19
        if ($needle === '') {
7971 2
            return true;
7972
        }
7973
7974 19
        if ($haystack === '') {
7975
            return false;
7976
        }
7977
7978 19
        if (\PHP_VERSION_ID >= 80000) {
7979
            /** @phpstan-ignore-next-line - only for PHP8 */
7980
            return \str_starts_with($haystack, $needle);
7981
        }
7982
7983 19
        return \strncmp($haystack, $needle, \strlen($needle)) === 0;
7984
    }
7985
7986
    /**
7987
     * Returns true if the string begins with any of $substrings, false otherwise.
7988
     *
7989
     * - case-sensitive
7990
     *
7991
     * @param string $str        <p>The input string.</p>
7992
     * @param array  $substrings <p>Substrings to look for.</p>
7993
     *
7994
     * @psalm-pure
7995
     *
7996
     * @return bool
7997
     *              <p>Whether or not $str starts with $substring.</p>
7998
     */
7999 8
    public static function str_starts_with_any(string $str, array $substrings): bool
8000
    {
8001 8
        if ($str === '') {
8002
            return false;
8003
        }
8004
8005 8
        if ($substrings === []) {
8006
            return false;
8007
        }
8008
8009 8
        foreach ($substrings as &$substring) {
8010 8
            if (self::str_starts_with($str, $substring)) {
8011 8
                return true;
8012
            }
8013
        }
8014
8015 6
        return false;
8016
    }
8017
8018
    /**
8019
     * Gets the substring after the first occurrence of a separator.
8020
     *
8021
     * @param string $str       <p>The input string.</p>
8022
     * @param string $separator <p>The string separator.</p>
8023
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
8024
     *
8025
     * @psalm-pure
8026
     *
8027
     * @return string
8028
     */
8029 1
    public static function str_substr_after_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
8030
    {
8031 1
        if ($separator === '' || $str === '') {
8032 1
            return '';
8033
        }
8034
8035 1
        if ($encoding === 'UTF-8') {
8036 1
            $offset = \mb_strpos($str, $separator);
8037 1
            if ($offset === false) {
8038 1
                return '';
8039
            }
8040
8041 1
            return (string) \mb_substr(
8042 1
                $str,
8043 1
                $offset + (int) \mb_strlen($separator)
8044
            );
8045
        }
8046
8047
        $offset = self::strpos($str, $separator, 0, $encoding);
8048
        if ($offset === false) {
8049
            return '';
8050
        }
8051
8052
        return (string) \mb_substr(
8053
            $str,
8054
            $offset + (int) self::strlen($separator, $encoding),
8055
            null,
8056
            $encoding
8057
        );
8058
    }
8059
8060
    /**
8061
     * Gets the substring after the last occurrence of a separator.
8062
     *
8063
     * @param string $str       <p>The input string.</p>
8064
     * @param string $separator <p>The string separator.</p>
8065
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
8066
     *
8067
     * @psalm-pure
8068
     *
8069
     * @return string
8070
     */
8071 1
    public static function str_substr_after_last_separator(
8072
        string $str,
8073
        string $separator,
8074
        string $encoding = 'UTF-8'
8075
    ): string {
8076 1
        if ($separator === '' || $str === '') {
8077 1
            return '';
8078
        }
8079
8080 1
        if ($encoding === 'UTF-8') {
8081 1
            $offset = \mb_strrpos($str, $separator);
8082 1
            if ($offset === false) {
8083 1
                return '';
8084
            }
8085
8086 1
            return (string) \mb_substr(
8087 1
                $str,
8088 1
                $offset + (int) \mb_strlen($separator)
8089
            );
8090
        }
8091
8092
        $offset = self::strrpos($str, $separator, 0, $encoding);
8093
        if ($offset === false) {
8094
            return '';
8095
        }
8096
8097
        return (string) self::substr(
8098
            $str,
8099
            $offset + (int) self::strlen($separator, $encoding),
8100
            null,
8101
            $encoding
8102
        );
8103
    }
8104
8105
    /**
8106
     * Gets the substring before the first occurrence of a separator.
8107
     *
8108
     * @param string $str       <p>The input string.</p>
8109
     * @param string $separator <p>The string separator.</p>
8110
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
8111
     *
8112
     * @psalm-pure
8113
     *
8114
     * @return string
8115
     */
8116 1
    public static function str_substr_before_first_separator(
8117
        string $str,
8118
        string $separator,
8119
        string $encoding = 'UTF-8'
8120
    ): string {
8121 1
        if ($separator === '' || $str === '') {
8122 1
            return '';
8123
        }
8124
8125 1
        if ($encoding === 'UTF-8') {
8126 1
            $offset = \mb_strpos($str, $separator);
8127 1
            if ($offset === false) {
8128 1
                return '';
8129
            }
8130
8131 1
            return (string) \mb_substr(
8132 1
                $str,
8133 1
                0,
8134 1
                $offset
8135
            );
8136
        }
8137
8138
        $offset = self::strpos($str, $separator, 0, $encoding);
8139
        if ($offset === false) {
8140
            return '';
8141
        }
8142
8143
        return (string) self::substr(
8144
            $str,
8145
            0,
8146
            $offset,
8147
            $encoding
8148
        );
8149
    }
8150
8151
    /**
8152
     * Gets the substring before the last occurrence of a separator.
8153
     *
8154
     * @param string $str       <p>The input string.</p>
8155
     * @param string $separator <p>The string separator.</p>
8156
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
8157
     *
8158
     * @psalm-pure
8159
     *
8160
     * @return string
8161
     */
8162 1
    public static function str_substr_before_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
8163
    {
8164 1
        if ($separator === '' || $str === '') {
8165 1
            return '';
8166
        }
8167
8168 1
        if ($encoding === 'UTF-8') {
8169 1
            $offset = \mb_strrpos($str, $separator);
8170 1
            if ($offset === false) {
8171 1
                return '';
8172
            }
8173
8174 1
            return (string) \mb_substr(
8175 1
                $str,
8176 1
                0,
8177 1
                $offset
8178
            );
8179
        }
8180
8181
        $offset = self::strrpos($str, $separator, 0, $encoding);
8182
        if ($offset === false) {
8183
            return '';
8184
        }
8185
8186
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
8187
8188
        return (string) self::substr(
8189
            $str,
8190
            0,
8191
            $offset,
8192
            $encoding
8193
        );
8194
    }
8195
8196
    /**
8197
     * Gets the substring after (or before via "$before_needle") the first occurrence of the "$needle".
8198
     *
8199
     * @param string $str           <p>The input string.</p>
8200
     * @param string $needle        <p>The string to look for.</p>
8201
     * @param bool   $before_needle [optional] <p>Default: false</p>
8202
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
8203
     *
8204
     * @psalm-pure
8205
     *
8206
     * @return string
8207
     */
8208 2
    public static function str_substr_first(
8209
        string $str,
8210
        string $needle,
8211
        bool $before_needle = false,
8212
        string $encoding = 'UTF-8'
8213
    ): string {
8214 2
        if ($str === '' || $needle === '') {
8215 2
            return '';
8216
        }
8217
8218 2
        if ($encoding === 'UTF-8') {
8219 2
            if ($before_needle) {
8220 1
                $part = \mb_strstr(
8221 1
                    $str,
8222 1
                    $needle,
8223 1
                    $before_needle
8224
                );
8225
            } else {
8226 1
                $part = \mb_strstr(
8227 1
                    $str,
8228 2
                    $needle
8229
                );
8230
            }
8231
        } else {
8232
            $part = self::strstr(
8233
                $str,
8234
                $needle,
8235
                $before_needle,
8236
                $encoding
8237
            );
8238
        }
8239
8240 2
        return $part === false ? '' : $part;
8241
    }
8242
8243
    /**
8244
     * Gets the substring after (or before via "$before_needle") the last occurrence of the "$needle".
8245
     *
8246
     * @param string $str           <p>The input string.</p>
8247
     * @param string $needle        <p>The string to look for.</p>
8248
     * @param bool   $before_needle [optional] <p>Default: false</p>
8249
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
8250
     *
8251
     * @psalm-pure
8252
     *
8253
     * @return string
8254
     */
8255 2
    public static function str_substr_last(
8256
        string $str,
8257
        string $needle,
8258
        bool $before_needle = false,
8259
        string $encoding = 'UTF-8'
8260
    ): string {
8261 2
        if ($str === '' || $needle === '') {
8262 2
            return '';
8263
        }
8264
8265 2
        if ($encoding === 'UTF-8') {
8266 2
            if ($before_needle) {
8267 1
                $part = \mb_strrchr(
8268 1
                    $str,
8269 1
                    $needle,
8270 1
                    $before_needle
8271
                );
8272
            } else {
8273 1
                $part = \mb_strrchr(
8274 1
                    $str,
8275 2
                    $needle
8276
                );
8277
            }
8278
        } else {
8279
            $part = self::strrchr(
8280
                $str,
8281
                $needle,
8282
                $before_needle,
8283
                $encoding
8284
            );
8285
        }
8286
8287 2
        return $part === false ? '' : $part;
8288
    }
8289
8290
    /**
8291
     * Surrounds $str with the given substring.
8292
     *
8293
     * @param string $str
8294
     * @param string $substring <p>The substring to add to both sides.</p>
8295
     *
8296
     * @psalm-pure
8297
     *
8298
     * @return string
8299
     *                <p>A string with the substring both prepended and appended.</p>
8300
     */
8301 5
    public static function str_surround(string $str, string $substring): string
8302
    {
8303 5
        return $substring . $str . $substring;
8304
    }
8305
8306
    /**
8307
     * Returns a trimmed string with the first letter of each word capitalized.
8308
     * Also accepts an array, $ignore, allowing you to list words not to be
8309
     * capitalized.
8310
     *
8311
     * @param string              $str
8312
     * @param array|string[]|null $ignore                        [optional] <p>An array of words not to capitalize or
8313
     *                                                           null. Default: null</p>
8314
     * @param string              $encoding                      [optional] <p>Default: 'UTF-8'</p>
8315
     * @param bool                $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the
8316
     *                                                           string.</p>
8317
     * @param string|null         $lang                          [optional] <p>Set the language for special cases: az,
8318
     *                                                           el, lt, tr</p>
8319
     * @param bool                $try_to_keep_the_string_length [optional] <p>true === try to keep the string length:
8320
     *                                                           e.g. ẞ -> ß</p>
8321
     * @param bool                $use_trim_first                [optional] <p>true === trim the input string,
8322
     *                                                           first</p>
8323
     * @param string|null         $word_define_chars             [optional] <p>An string of chars that will be used as
8324
     *                                                           whitespace separator === words.</p>
8325
     *
8326
     * @psalm-pure
8327
     *
8328
     * @return string
8329
     *                <p>The titleized string.</p>
8330
     */
8331 10
    public static function str_titleize(
8332
        string $str,
8333
        array $ignore = null,
8334
        string $encoding = 'UTF-8',
8335
        bool $clean_utf8 = false,
8336
        string $lang = null,
8337
        bool $try_to_keep_the_string_length = false,
8338
        bool $use_trim_first = true,
8339
        string $word_define_chars = null
8340
    ): string {
8341 10
        if ($str === '') {
8342
            return '';
8343
        }
8344
8345 10
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8346 9
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8347
        }
8348
8349 10
        if ($use_trim_first) {
8350 10
            $str = \trim($str);
8351
        }
8352
8353 10
        if ($clean_utf8) {
8354
            $str = self::clean($str);
8355
        }
8356
8357 10
        $use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
8358
8359 10
        if ($word_define_chars) {
8360 4
            $word_define_chars = \preg_quote($word_define_chars, '/');
8361
        } else {
8362 6
            $word_define_chars = '';
8363
        }
8364
8365 10
        $str = (string) \preg_replace_callback(
8366 10
            '/([^\\s' . $word_define_chars . ']+)/u',
8367
            static function (array $match) use ($try_to_keep_the_string_length, $lang, $ignore, $use_mb_functions, $encoding): string {
8368 10
                if ($ignore !== null && \in_array($match[0], $ignore, true)) {
8369 4
                    return $match[0];
8370
                }
8371
8372 10
                if ($use_mb_functions) {
8373 10
                    if ($encoding === 'UTF-8') {
8374 10
                        return \mb_strtoupper(\mb_substr($match[0], 0, 1))
8375 10
                               . \mb_strtolower(\mb_substr($match[0], 1));
8376
                    }
8377
8378
                    return \mb_strtoupper(\mb_substr($match[0], 0, 1, $encoding), $encoding)
8379
                           . \mb_strtolower(\mb_substr($match[0], 1, null, $encoding), $encoding);
8380
                }
8381
8382
                return self::ucfirst(
8383
                    self::strtolower(
8384
                        $match[0],
8385
                        $encoding,
8386
                        false,
8387
                        $lang,
8388
                        $try_to_keep_the_string_length
8389
                    ),
8390
                    $encoding,
8391
                    false,
8392
                    $lang,
8393
                    $try_to_keep_the_string_length
8394
                );
8395 10
            },
8396 10
            $str
8397
        );
8398
8399 10
        return $str;
8400
    }
8401
8402
    /**
8403
     * Convert a string into a obfuscate string.
8404
     *
8405
     * EXAMPLE: <code>
8406
     *
8407
     * UTF8::str_obfuscate('[email protected]', 0.5, '*', ['@', '.']); // e.g. "l***@m**lleke*.*r*"
8408
     * </code>
8409
     *
8410
     * @param string   $str
8411
     * @param float    $percent
8412
     * @param string   $obfuscateChar
8413
     * @param string[] $keepChars
8414
     *
8415
     * @psalm-pure
8416
     *
8417
     * @return string
8418
     *                <p>The obfuscate string.</p>
8419
     */
8420 1
    public static function str_obfuscate(
8421
        string $str,
8422
        float $percent = 0.5,
8423
        string $obfuscateChar = '*',
8424
        array $keepChars = []
8425
    ): string {
8426 1
        $obfuscateCharHelper = "\u{2603}";
8427 1
        $str = \str_replace($obfuscateChar, $obfuscateCharHelper, $str);
8428
8429 1
        $chars = self::chars($str);
8430 1
        $charsMax = \count($chars);
8431 1
        $charsMaxChange = \round($charsMax * $percent);
8432 1
        $charsCounter = 0;
8433 1
        $charKeyDone = [];
8434
8435 1
        while ($charsCounter < $charsMaxChange) {
8436 1
            foreach ($chars as $charKey => $char) {
8437 1
                if (isset($charKeyDone[$charKey])) {
8438 1
                    continue;
8439
                }
8440
8441 1
                if (\random_int(0, 100) > 50) {
8442 1
                    continue;
8443
                }
8444
8445 1
                if ($char === $obfuscateChar) {
8446
                    continue;
8447
                }
8448
8449 1
                ++$charsCounter;
8450 1
                $charKeyDone[$charKey] = true;
8451
8452 1
                if ($charsCounter > $charsMaxChange) {
8453
                    break;
8454
                }
8455
8456 1
                if (\in_array($char, $keepChars, true)) {
8457 1
                    continue;
8458
                }
8459
8460 1
                $chars[$charKey] = $obfuscateChar;
8461
            }
8462
        }
8463
8464 1
        $str = \implode('', $chars);
8465
8466 1
        return \str_replace($obfuscateCharHelper, $obfuscateChar, $str);
8467
    }
8468
8469
    /**
8470
     * Returns a trimmed string in proper title case.
8471
     *
8472
     * Also accepts an array, $ignore, allowing you to list words not to be
8473
     * capitalized.
8474
     *
8475
     * Adapted from John Gruber's script.
8476
     *
8477
     * @see https://gist.github.com/gruber/9f9e8650d68b13ce4d78
8478
     *
8479
     * @param string $str
8480
     * @param array  $ignore   <p>An array of words not to capitalize.</p>
8481
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8482
     *
8483
     * @psalm-pure
8484
     *
8485
     * @return string
8486
     *                <p>The titleized string.</p>
8487
     */
8488 35
    public static function str_titleize_for_humans(
8489
        string $str,
8490
        array $ignore = [],
8491
        string $encoding = 'UTF-8'
8492
    ): string {
8493 35
        if ($str === '') {
8494
            return '';
8495
        }
8496
8497
        $small_words = [
8498 35
            '(?<!q&)a',
8499
            'an',
8500
            'and',
8501
            'as',
8502
            'at(?!&t)',
8503
            'but',
8504
            'by',
8505
            'en',
8506
            'for',
8507
            'if',
8508
            'in',
8509
            'of',
8510
            'on',
8511
            'or',
8512
            'the',
8513
            'to',
8514
            'v[.]?',
8515
            'via',
8516
            'vs[.]?',
8517
        ];
8518
8519 35
        if ($ignore !== []) {
8520 1
            $small_words = \array_merge($small_words, $ignore);
8521
        }
8522
8523 35
        $small_words_rx = \implode('|', $small_words);
8524 35
        $apostrophe_rx = '(?x: [\'’] [[:lower:]]* )?';
8525
8526 35
        $str = \trim($str);
8527
8528 35
        if (!self::has_lowercase($str)) {
8529 2
            $str = self::strtolower($str, $encoding);
8530
        }
8531
8532
        // the main substitutions
8533 35
        $str = (string) \preg_replace_callback(
8534
            '~\\b (_*) (?:                                                           # 1. Leading underscore and
8535
                        ( (?<=[ ][/\\\\]) [[:alpha:]]+ [-_[:alpha:]/\\\\]+ |                # 2. file path or 
8536 35
                          [-_[:alpha:]]+ [@.:] [-_[:alpha:]@.:/]+ ' . $apostrophe_rx . ' )  #    URL, domain, or email
8537
                        |
8538 35
                        ( (?i: ' . $small_words_rx . ' ) ' . $apostrophe_rx . ' )           # 3. or small word (case-insensitive)
8539
                        |
8540 35
                        ( [[:alpha:]] [[:lower:]\'’()\[\]{}]* ' . $apostrophe_rx . ' )     # 4. or word w/o internal caps
8541
                        |
8542 35
                        ( [[:alpha:]] [[:alpha:]\'’()\[\]{}]* ' . $apostrophe_rx . ' )     # 5. or some other word
8543
                      ) (_*) \\b                                                          # 6. With trailing underscore
8544
                    ~ux',
8545
            /**
8546
             * @param string[] $matches
8547
             *
8548
             * @psalm-pure
8549
             *
8550
             * @return string
8551
             */
8552
            static function (array $matches) use ($encoding): string {
8553
                // preserve leading underscore
8554 35
                $str = $matches[1];
8555 35
                if ($matches[2]) {
8556
                    // preserve URLs, domains, emails and file paths
8557 5
                    $str .= $matches[2];
8558 35
                } elseif ($matches[3]) {
8559
                    // lower-case small words
8560 25
                    $str .= self::strtolower($matches[3], $encoding);
8561 35
                } elseif ($matches[4]) {
8562
                    // capitalize word w/o internal caps
8563 34
                    $str .= static::ucfirst($matches[4], $encoding);
8564
                } else {
8565
                    // preserve other kinds of word (iPhone)
8566 7
                    $str .= $matches[5];
8567
                }
8568
                // preserve trailing underscore
8569 35
                $str .= $matches[6];
8570
8571 35
                return $str;
8572 35
            },
8573 35
            $str
8574
        );
8575
8576
        // Exceptions for small words: capitalize at start of title...
8577 35
        $str = (string) \preg_replace_callback(
8578
            '~(  \\A [[:punct:]]*            # start of title...
8579
                      |  [:.;?!][ ]+                # or of subsentence...
8580
                      |  [ ][\'"“‘(\[][ ]* )        # or of inserted subphrase...
8581 35
                      ( ' . $small_words_rx . ' ) \\b # ...followed by small word
8582
                     ~uxi',
8583
            /**
8584
             * @param string[] $matches
8585
             *
8586
             * @psalm-pure
8587
             *
8588
             * @return string
8589
             */
8590
            static function (array $matches) use ($encoding): string {
8591 11
                return $matches[1] . static::ucfirst($matches[2], $encoding);
8592 35
            },
8593 35
            $str
8594
        );
8595
8596
        // ...and end of title
8597 35
        $str = (string) \preg_replace_callback(
8598 35
            '~\\b ( ' . $small_words_rx . ' ) # small word...
8599
                      (?= [[:punct:]]* \Z          # ...at the end of the title...
8600
                      |   [\'"’”)\]] [ ] )         # ...or of an inserted subphrase?
8601
                     ~uxi',
8602
            /**
8603
             * @param string[] $matches
8604
             *
8605
             * @psalm-pure
8606
             *
8607
             * @return string
8608
             */
8609
            static function (array $matches) use ($encoding): string {
8610 3
                return static::ucfirst($matches[1], $encoding);
8611 35
            },
8612 35
            $str
8613
        );
8614
8615
        // Exceptions for small words in hyphenated compound words.
8616
        // e.g. "in-flight" -> In-Flight
8617 35
        $str = (string) \preg_replace_callback(
8618
            '~\\b
8619
                        (?<! -)                   # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (in-flight)
8620 35
                        ( ' . $small_words_rx . ' )
8621
                        (?= -[[:alpha:]]+)        # lookahead for "-someword"
8622
                       ~uxi',
8623
            /**
8624
             * @param string[] $matches
8625
             *
8626
             * @psalm-pure
8627
             *
8628
             * @return string
8629
             */
8630
            static function (array $matches) use ($encoding): string {
8631
                return static::ucfirst($matches[1], $encoding);
8632 35
            },
8633 35
            $str
8634
        );
8635
8636
        // e.g. "Stand-in" -> "Stand-In" (Stand is already capped at this point)
8637 35
        $str = (string) \preg_replace_callback(
8638
            '~\\b
8639
                      (?<!…)                    # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (stand-in)
8640
                      ( [[:alpha:]]+- )         # $1 = first word and hyphen, should already be properly capped
8641 35
                      ( ' . $small_words_rx . ' ) # ...followed by small word
8642
                      (?!	- )                 # Negative lookahead for another -
8643
                     ~uxi',
8644
            /**
8645
             * @param string[] $matches
8646
             *
8647
             * @psalm-pure
8648
             *
8649
             * @return string
8650
             */
8651
            static function (array $matches) use ($encoding): string {
8652
                return $matches[1] . static::ucfirst($matches[2], $encoding);
8653 35
            },
8654 35
            $str
8655
        );
8656
8657 35
        return $str;
8658
    }
8659
8660
    /**
8661
     * Get a binary representation of a specific string.
8662
     *
8663
     * EXAPLE: <code>UTF8::str_to_binary('😃'); // '11110000100111111001100010000011'</code>
8664
     *
8665
     * @param string $str <p>The input string.</p>
8666
     *
8667
     * @psalm-pure
8668
     *
8669
     * @return false|string
8670
     *                      <p>false on error</p>
8671
     */
8672 2
    public static function str_to_binary(string $str)
8673
    {
8674
        /** @var array|false $value - needed for PhpStan (stubs error) */
8675 2
        $value = \unpack('H*', $str);
8676 2
        if ($value === false) {
8677
            return false;
8678
        }
8679
8680
        /** @noinspection OffsetOperationsInspection */
8681 2
        return \base_convert($value[1], 16, 2);
8682
    }
8683
8684
    /**
8685
     * @param string   $str
8686
     * @param bool     $remove_empty_values <p>Remove empty values.</p>
8687
     * @param int|null $remove_short_values <p>The min. string length or null to disable</p>
8688
     *
8689
     * @psalm-pure
8690
     *
8691
     * @return string[]
8692
     */
8693 17
    public static function str_to_lines(string $str, bool $remove_empty_values = false, int $remove_short_values = null): array
8694
    {
8695 17
        if ($str === '') {
8696 1
            return $remove_empty_values ? [] : [''];
8697
        }
8698
8699 16
        if (self::$SUPPORT['mbstring'] === true) {
8700 16
            $return = \mb_split("[\r\n]{1,2}", $str);
8701
        } else {
8702
            $return = \preg_split("/[\r\n]{1,2}/u", $str);
8703
        }
8704
8705 16
        if ($return === false) {
8706
            return $remove_empty_values ? [] : [''];
8707
        }
8708
8709
        if (
8710 16
            $remove_short_values === null
8711
            &&
8712 16
            !$remove_empty_values
8713
        ) {
8714 16
            return $return;
8715
        }
8716
8717
        return self::reduce_string_array(
8718
            $return,
8719
            $remove_empty_values,
8720
            $remove_short_values
8721
        );
8722
    }
8723
8724
    /**
8725
     * Convert a string into an array of words.
8726
     *
8727
     * EXAMPLE: <code>UTF8::str_to_words('中文空白 oöäü#s', '#') // array('', '中文空白', ' ', 'oöäü#s', '')</code>
8728
     *
8729
     * @param string   $str
8730
     * @param string   $char_list           <p>Additional chars for the definition of "words".</p>
8731
     * @param bool     $remove_empty_values <p>Remove empty values.</p>
8732
     * @param int|null $remove_short_values <p>The min. string length or null to disable</p>
8733
     *
8734
     * @psalm-pure
8735
     *
8736
     * @return string[]
8737
     */
8738 16
    public static function str_to_words(
8739
        string $str,
8740
        string $char_list = '',
8741
        bool $remove_empty_values = false,
8742
        int $remove_short_values = null
8743
    ): array {
8744 16
        if ($str === '') {
8745 4
            return $remove_empty_values ? [] : [''];
8746
        }
8747
8748 16
        $char_list = self::rxClass($char_list, '\pL');
8749
8750 16
        $return = \preg_split("/({$char_list}+(?:[\p{Pd}’']{$char_list}+)*)/u", $str, -1, \PREG_SPLIT_DELIM_CAPTURE);
8751 16
        if ($return === false) {
8752
            return $remove_empty_values ? [] : [''];
8753
        }
8754
8755
        if (
8756 16
            $remove_short_values === null
8757
            &&
8758 16
            !$remove_empty_values
8759
        ) {
8760 16
            return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
8761
        }
8762
8763 2
        $tmp_return = self::reduce_string_array(
8764 2
            $return,
8765 2
            $remove_empty_values,
8766 2
            $remove_short_values
8767
        );
8768
8769 2
        foreach ($tmp_return as &$item) {
8770 2
            $item = (string) $item;
8771
        }
8772
8773 2
        return $tmp_return;
8774
    }
8775
8776
    /**
8777
     * Truncates the string to a given length. If $substring is provided, and
8778
     * truncating occurs, the string is further truncated so that the substring
8779
     * may be appended without exceeding the desired length.
8780
     *
8781
     * @param string $str
8782
     * @param int    $length    <p>Desired length of the truncated string.</p>
8783
     * @param string $substring [optional] <p>The substring to append if it can fit. Default: ''</p>
8784
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
8785
     *
8786
     * @psalm-pure
8787
     *
8788
     * @return string
8789
     *                <p>A string after truncating.</p>
8790
     */
8791 22
    public static function str_truncate(
8792
        string $str,
8793
        int $length,
8794
        string $substring = '',
8795
        string $encoding = 'UTF-8'
8796
    ): string {
8797 22
        if ($str === '') {
8798
            return '';
8799
        }
8800
8801 22
        if ($encoding === 'UTF-8') {
8802 10
            if ($length >= (int) \mb_strlen($str)) {
8803 2
                return $str;
8804
            }
8805
8806 8
            if ($substring !== '') {
8807 4
                $length -= (int) \mb_strlen($substring);
8808
8809
                /** @noinspection UnnecessaryCastingInspection */
8810 4
                return (string) \mb_substr($str, 0, $length) . $substring;
8811
            }
8812
8813 4
            return (string) \mb_substr($str, 0, $length);
8814
        }
8815
8816 12
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
8817
8818 12
        if ($length >= (int) self::strlen($str, $encoding)) {
8819 2
            return $str;
8820
        }
8821
8822 10
        if ($substring !== '') {
8823 6
            $length -= (int) self::strlen($substring, $encoding);
8824
        }
8825
8826
        return (
8827 10
               (string) self::substr(
8828 10
                   $str,
8829 10
                   0,
8830 10
                   $length,
8831 10
                   $encoding
8832
               )
8833 10
               ) . $substring;
8834
    }
8835
8836
    /**
8837
     * Truncates the string to a given length, while ensuring that it does not
8838
     * split words. If $substring is provided, and truncating occurs, the
8839
     * string is further truncated so that the substring may be appended without
8840
     * exceeding the desired length.
8841
     *
8842
     * @param string $str
8843
     * @param int    $length                                 <p>Desired length of the truncated string.</p>
8844
     * @param string $substring                              [optional] <p>The substring to append if it can fit.
8845
     *                                                       Default:
8846
     *                                                       ''</p>
8847
     * @param string $encoding                               [optional] <p>Default: 'UTF-8'</p>
8848
     * @param bool   $ignore_do_not_split_words_for_one_word [optional] <p>Default: false</p>
8849
     *
8850
     * @psalm-pure
8851
     *
8852
     * @return string
8853
     *                <p>A string after truncating.</p>
8854
     */
8855 47
    public static function str_truncate_safe(
8856
        string $str,
8857
        int $length,
8858
        string $substring = '',
8859
        string $encoding = 'UTF-8',
8860
        bool $ignore_do_not_split_words_for_one_word = false
8861
    ): string {
8862 47
        if ($str === '' || $length <= 0) {
8863 1
            return $substring;
8864
        }
8865
8866 47
        if ($encoding === 'UTF-8') {
8867 21
            if ($length >= (int) \mb_strlen($str)) {
8868 5
                return $str;
8869
            }
8870
8871
            // need to further trim the string so we can append the substring
8872 17
            $length -= (int) \mb_strlen($substring);
8873 17
            if ($length <= 0) {
8874 1
                return $substring;
8875
            }
8876
8877
            /** @var false|string $truncated - needed for PhpStan (stubs error) */
8878 17
            $truncated = \mb_substr($str, 0, $length);
8879 17
            if ($truncated === false) {
8880
                return '';
8881
            }
8882
8883
            // if the last word was truncated
8884 17
            $space_position = \mb_strpos($str, ' ', $length - 1);
8885 17
            if ($space_position !== $length) {
8886
                // find pos of the last occurrence of a space, get up to that
8887 13
                $last_position = \mb_strrpos($truncated, ' ', 0);
8888
8889
                if (
8890 13
                    $last_position !== false
8891
                    ||
8892
                    (
8893 3
                        $space_position !== false
8894
                        &&
8895 13
                        !$ignore_do_not_split_words_for_one_word
8896
                    )
8897
                ) {
8898 17
                    $truncated = (string) \mb_substr($truncated, 0, (int) $last_position);
8899
                }
8900
            }
8901
        } else {
8902 26
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8903
8904 26
            if ($length >= (int) self::strlen($str, $encoding)) {
8905 4
                return $str;
8906
            }
8907
8908
            // need to further trim the string so we can append the substring
8909 22
            $length -= (int) self::strlen($substring, $encoding);
8910 22
            if ($length <= 0) {
8911
                return $substring;
8912
            }
8913
8914 22
            $truncated = self::substr($str, 0, $length, $encoding);
8915
8916 22
            if ($truncated === false) {
8917
                return '';
8918
            }
8919
8920
            // if the last word was truncated
8921 22
            $space_position = self::strpos($str, ' ', $length - 1, $encoding);
8922 22
            if ($space_position !== $length) {
8923
                // find pos of the last occurrence of a space, get up to that
8924 12
                $last_position = self::strrpos($truncated, ' ', 0, $encoding);
8925
8926
                if (
8927 12
                    $last_position !== false
8928
                    ||
8929
                    (
8930 4
                        $space_position !== false
8931
                        &&
8932 12
                        !$ignore_do_not_split_words_for_one_word
8933
                    )
8934
                ) {
8935 9
                    $truncated = (string) self::substr($truncated, 0, (int) $last_position, $encoding);
8936
                }
8937
            }
8938
        }
8939
8940 39
        return $truncated . $substring;
8941
    }
8942
8943
    /**
8944
     * Returns a lowercase and trimmed string separated by underscores.
8945
     * Underscores are inserted before uppercase characters (with the exception
8946
     * of the first character of the string), and in place of spaces as well as
8947
     * dashes.
8948
     *
8949
     * @param string $str
8950
     *
8951
     * @psalm-pure
8952
     *
8953
     * @return string
8954
     *                <p>The underscored string.</p>
8955
     */
8956 16
    public static function str_underscored(string $str): string
8957
    {
8958 16
        return self::str_delimit($str, '_');
8959
    }
8960
8961
    /**
8962
     * Returns an UpperCamelCase version of the supplied string. It trims
8963
     * surrounding spaces, capitalizes letters following digits, spaces, dashes
8964
     * and underscores, and removes spaces, dashes, underscores.
8965
     *
8966
     * @param string      $str                           <p>The input string.</p>
8967
     * @param string      $encoding                      [optional] <p>Default: 'UTF-8'</p>
8968
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
8969
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
8970
     *                                                   tr</p>
8971
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
8972
     *                                                   -> ß</p>
8973
     *
8974
     * @psalm-pure
8975
     *
8976
     * @return string
8977
     *                <p>A string in UpperCamelCase.</p>
8978
     */
8979 13
    public static function str_upper_camelize(
8980
        string $str,
8981
        string $encoding = 'UTF-8',
8982
        bool $clean_utf8 = false,
8983
        string $lang = null,
8984
        bool $try_to_keep_the_string_length = false
8985
    ): string {
8986 13
        return self::ucfirst(self::str_camelize($str, $encoding), $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
8987
    }
8988
8989
    /**
8990
     * Get the number of words in a specific string.
8991
     *
8992
     * EXAMPLES: <code>
8993
     * // format: 0 -> return only word count (int)
8994
     * //
8995
     * UTF8::str_word_count('中文空白 öäü abc#c'); // 4
8996
     * UTF8::str_word_count('中文空白 öäü abc#c', 0, '#'); // 3
8997
     *
8998
     * // format: 1 -> return words (array)
8999
     * //
9000
     * UTF8::str_word_count('中文空白 öäü abc#c', 1); // array('中文空白', 'öäü', 'abc', 'c')
9001
     * UTF8::str_word_count('中文空白 öäü abc#c', 1, '#'); // array('中文空白', 'öäü', 'abc#c')
9002
     *
9003
     * // format: 2 -> return words with offset (array)
9004
     * //
9005
     * UTF8::str_word_count('中文空白 öäü ab#c', 2); // array(0 => '中文空白', 5 => 'öäü', 9 => 'abc', 13 => 'c')
9006
     * UTF8::str_word_count('中文空白 öäü ab#c', 2, '#'); // array(0 => '中文空白', 5 => 'öäü', 9 => 'abc#c')
9007
     * </code>
9008
     *
9009
     * @param string $str       <p>The input string.</p>
9010
     * @param int    $format    [optional] <p>
9011
     *                          <strong>0</strong> => return a number of words (default)<br>
9012
     *                          <strong>1</strong> => return an array of words<br>
9013
     *                          <strong>2</strong> => return an array of words with word-offset as key
9014
     *                          </p>
9015
     * @param string $char_list [optional] <p>Additional chars that contains to words and do not start a new word.</p>
9016
     *
9017
     * @psalm-pure
9018
     *
9019
     * @return int|string[]
9020
     *                      <p>The number of words in the string.</p>
9021
     */
9022 2
    public static function str_word_count(string $str, int $format = 0, string $char_list = '')
9023
    {
9024 2
        $str_parts = self::str_to_words($str, $char_list);
9025
9026 2
        $len = \count($str_parts);
9027
9028 2
        if ($format === 1) {
9029 2
            $number_of_words = [];
9030 2
            for ($i = 1; $i < $len; $i += 2) {
9031 2
                $number_of_words[] = $str_parts[$i];
9032
            }
9033 2
        } elseif ($format === 2) {
9034 2
            $number_of_words = [];
9035 2
            $offset = (int) self::strlen($str_parts[0]);
9036 2
            for ($i = 1; $i < $len; $i += 2) {
9037 2
                $number_of_words[$offset] = $str_parts[$i];
9038 2
                $offset += (int) self::strlen($str_parts[$i]) + (int) self::strlen($str_parts[$i + 1]);
9039
            }
9040
        } else {
9041 2
            $number_of_words = (int) (($len - 1) / 2);
9042
        }
9043
9044 2
        return $number_of_words;
9045
    }
9046
9047
    /**
9048
     * Case-insensitive string comparison.
9049
     *
9050
     * INFO: Case-insensitive version of UTF8::strcmp()
9051
     *
9052
     * EXAMPLE: <code>UTF8::strcasecmp("iñtërnâtiôn\nàlizætiøn", "Iñtërnâtiôn\nàlizætiøn"); // 0</code>
9053
     *
9054
     * @param string $str1     <p>The first string.</p>
9055
     * @param string $str2     <p>The second string.</p>
9056
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
9057
     *
9058
     * @psalm-pure
9059
     *
9060
     * @return int
9061
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
9062
     *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
9063
     *             <strong>0</strong> if they are equal
9064
     */
9065 23
    public static function strcasecmp(
9066
        string $str1,
9067
        string $str2,
9068
        string $encoding = 'UTF-8'
9069
    ): int {
9070 23
        return self::strcmp(
9071 23
            self::strtocasefold(
9072 23
                $str1,
9073 23
                true,
9074 23
                false,
9075 23
                $encoding,
9076 23
                null,
9077 23
                false
9078
            ),
9079 23
            self::strtocasefold(
9080 23
                $str2,
9081 23
                true,
9082 23
                false,
9083 23
                $encoding,
9084 23
                null,
9085 23
                false
9086
            )
9087
        );
9088
    }
9089
9090
    /**
9091
     * Case-sensitive string comparison.
9092
     *
9093
     * EXAMPLE: <code>UTF8::strcmp("iñtërnâtiôn\nàlizætiøn", "iñtërnâtiôn\nàlizætiøn"); // 0</code>
9094
     *
9095
     * @param string $str1 <p>The first string.</p>
9096
     * @param string $str2 <p>The second string.</p>
9097
     *
9098
     * @psalm-pure
9099
     *
9100
     * @return int
9101
     *             <strong>&lt; 0</strong> if str1 is less than str2<br>
9102
     *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
9103
     *             <strong>0</strong> if they are equal
9104
     */
9105 29
    public static function strcmp(string $str1, string $str2): int
9106
    {
9107 29
        if ($str1 === $str2) {
9108 21
            return 0;
9109
        }
9110
9111 24
        return \strcmp(
9112
            /** @phpstan-ignore-next-line - we use only NFD */
9113 24
            \Normalizer::normalize($str1, \Normalizer::NFD),
9114
            /** @phpstan-ignore-next-line - we use only NFD */
9115 24
            \Normalizer::normalize($str2, \Normalizer::NFD)
9116
        );
9117
    }
9118
9119
    /**
9120
     * Find length of initial segment not matching mask.
9121
     *
9122
     * @param string   $str
9123
     * @param string   $char_list
9124
     * @param int      $offset
9125
     * @param int|null $length
9126
     * @param string   $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
9127
     *
9128
     * @psalm-pure
9129
     *
9130
     * @return int
9131
     */
9132 12
    public static function strcspn(
9133
        string $str,
9134
        string $char_list,
9135
        int $offset = 0,
9136
        int $length = null,
9137
        string $encoding = 'UTF-8'
9138
    ): int {
9139 12
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9140
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9141
        }
9142
9143 12
        if ($char_list === '') {
9144 2
            return (int) self::strlen($str, $encoding);
9145
        }
9146
9147 11
        if ($offset || $length !== null) {
9148 3
            if ($encoding === 'UTF-8') {
9149 3
                if ($length === null) {
9150 2
                    $str_tmp = \mb_substr($str, $offset);
9151
                } else {
9152 3
                    $str_tmp = \mb_substr($str, $offset, $length);
9153
                }
9154
            } else {
9155
                $str_tmp = self::substr($str, $offset, $length, $encoding);
9156
            }
9157
9158 3
            if ($str_tmp === false) {
9159
                return 0;
9160
            }
9161
9162 3
            $str = $str_tmp;
9163
        }
9164
9165 11
        if ($str === '') {
9166 2
            return 0;
9167
        }
9168
9169 10
        $matches = [];
9170 10
        if (\preg_match('/^(.*?)' . self::rxClass($char_list) . '/us', $str, $matches)) {
9171 9
            $return = self::strlen($matches[1], $encoding);
9172 9
            if ($return === false) {
9173
                return 0;
9174
            }
9175
9176 9
            return $return;
9177
        }
9178
9179 2
        return (int) self::strlen($str, $encoding);
9180
    }
9181
9182
    /**
9183
     * Create a UTF-8 string from code points.
9184
     *
9185
     * INFO: opposite to UTF8::codepoints()
9186
     *
9187
     * EXAMPLE: <code>UTF8::string(array(246, 228, 252)); // 'öäü'</code>
9188
     *
9189
     * @param int|int[]|string|string[] $intOrHex <p>Integer or Hexadecimal codepoints.</p>
9190
     *
9191
     * @phpstan-param int[]|numeric-string[]|int|numeric-string $intOrHex
9192
     *
9193
     * @psalm-pure
9194
     *
9195
     * @return string
9196
     *                <p>A UTF-8 encoded string.</p>
9197
     */
9198 4
    public static function string($intOrHex): string
9199
    {
9200 4
        if ($intOrHex === []) {
9201 4
            return '';
9202
        }
9203
9204 4
        if (!\is_array($intOrHex)) {
9205 1
            $intOrHex = [$intOrHex];
9206
        }
9207
9208 4
        $str = '';
9209 4
        foreach ($intOrHex as $strPart) {
9210 4
            $str .= '&#' . (int) $strPart . ';';
9211
        }
9212
9213
        // We cannot use html_entity_decode() here, as it will not return
9214
        // characters for many values < 160.
9215 4
        return mb_convert_encoding($str, 'UTF-8', 'HTML-ENTITIES');
0 ignored issues
show
Bug Best Practice introduced by
The expression return mb_convert_encodi...TF-8', 'HTML-ENTITIES') could return the type array which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
9216
    }
9217
9218
    /**
9219
     * Checks if string starts with "BOM" (Byte Order Mark Character) character.
9220
     *
9221
     * EXAMPLE: <code>UTF8::string_has_bom("\xef\xbb\xbf foobar"); // true</code>
9222
     *
9223
     * @param string $str <p>The input string.</p>
9224
     *
9225
     * @psalm-pure
9226
     *
9227
     * @return bool
9228
     *              <p>
9229
     *              <strong>true</strong> if the string has BOM at the start,<br>
9230
     *              <strong>false</strong> otherwise
9231
     *              </p>
9232
     */
9233 40
    public static function string_has_bom(string $str): bool
9234
    {
9235 40
        foreach (self::$BOM as $bom_string => &$bom_byte_length) {
9236 40
            if (\strncmp($str, $bom_string, $bom_byte_length) === 0) {
9237 40
                return true;
9238
            }
9239
        }
9240
9241 40
        return false;
9242
    }
9243
9244
    /**
9245
     * Strip HTML and PHP tags from a string + clean invalid UTF-8.
9246
     *
9247
     * EXAMPLE: <code>UTF8::strip_tags("<span>κόσμε\xa0\xa1</span>"); // 'κόσμε'</code>
9248
     *
9249
     * @see http://php.net/manual/en/function.strip-tags.php
9250
     *
9251
     * @param string      $str            <p>
9252
     *                                    The input string.
9253
     *                                    </p>
9254
     * @param string|null $allowable_tags [optional] <p>
9255
     *                                    You can use the optional second parameter to specify tags which should
9256
     *                                    not be stripped.
9257
     *                                    </p>
9258
     *                                    <p>
9259
     *                                    HTML comments and PHP tags are also stripped. This is hardcoded and
9260
     *                                    can not be changed with allowable_tags.
9261
     *                                    </p>
9262
     * @param bool        $clean_utf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
9263
     *
9264
     * @psalm-pure
9265
     *
9266
     * @return string
9267
     *                <p>The stripped string.</p>
9268
     */
9269 4
    public static function strip_tags(
9270
        string $str,
9271
        string $allowable_tags = null,
9272
        bool $clean_utf8 = false
9273
    ): string {
9274 4
        if ($str === '') {
9275 1
            return '';
9276
        }
9277
9278 4
        if ($clean_utf8) {
9279 2
            $str = self::clean($str);
9280
        }
9281
9282 4
        if ($allowable_tags === null) {
9283 4
            return \strip_tags($str);
9284
        }
9285
9286 2
        return \strip_tags($str, $allowable_tags);
9287
    }
9288
9289
    /**
9290
     * Strip all whitespace characters. This includes tabs and newline
9291
     * characters, as well as multibyte whitespace such as the thin space
9292
     * and ideographic space.
9293
     *
9294
     * EXAMPLE: <code>UTF8::strip_whitespace('   Ο     συγγραφέας  '); // 'Οσυγγραφέας'</code>
9295
     *
9296
     * @param string $str
9297
     *
9298
     * @psalm-pure
9299
     *
9300
     * @return string
9301
     */
9302 36
    public static function strip_whitespace(string $str): string
9303
    {
9304 36
        if ($str === '') {
9305 3
            return '';
9306
        }
9307
9308 33
        return (string) \preg_replace('/[[:space:]]+/u', '', $str);
9309
    }
9310
9311
    /**
9312
     * Find the position of the first occurrence of a substring in a string, case-insensitive.
9313
     *
9314
     * INFO: use UTF8::stripos_in_byte() for the byte-length
9315
     *
9316
     * EXAMPLE: <code>UTF8::stripos('aσσb', 'ΣΣ'); // 1</code> (σσ == ΣΣ)
9317
     *
9318
     * @see http://php.net/manual/en/function.mb-stripos.php
9319
     *
9320
     * @param string $haystack   <p>The string from which to get the position of the first occurrence of needle.</p>
9321
     * @param string $needle     <p>The string to find in haystack.</p>
9322
     * @param int    $offset     [optional] <p>The position in haystack to start searching.</p>
9323
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
9324
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9325
     *
9326
     * @psalm-pure
9327
     *
9328
     * @return false|int
9329
     *                   Return the <strong>(int)</strong> numeric position of the first occurrence of needle in the
9330
     *                   haystack string,<br> or <strong>false</strong> if needle is not found
9331
     */
9332 25
    public static function stripos(
9333
        string $haystack,
9334
        string $needle,
9335
        int $offset = 0,
9336
        string $encoding = 'UTF-8',
9337
        bool $clean_utf8 = false
9338
    ) {
9339 25
        if ($haystack === '') {
9340 5
            if (\PHP_VERSION_ID >= 80000 && $needle === '') {
9341
                return 0;
9342
            }
9343
9344 5
            return false;
9345
        }
9346
9347 24
        if ($needle === '' && \PHP_VERSION_ID < 80000) {
9348 2
            return false;
9349
        }
9350
9351 24
        if ($clean_utf8) {
9352
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9353
            // if invalid characters are found in $haystack before $needle
9354 1
            $haystack = self::clean($haystack);
9355 1
            $needle = self::clean($needle);
9356
        }
9357
9358 24
        if (self::$SUPPORT['mbstring'] === true) {
9359 24
            if ($encoding === 'UTF-8') {
9360 24
                return \mb_stripos($haystack, $needle, $offset);
9361
            }
9362
9363 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9364
9365 2
            return \mb_stripos($haystack, $needle, $offset, $encoding);
9366
        }
9367
9368 2
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
9369
9370
        if (
9371 2
            $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
9372
            &&
9373 2
            $offset >= 0 // grapheme_stripos() can't handle negative offset
9374
            &&
9375 2
            self::$SUPPORT['intl'] === true
9376
        ) {
9377
            $return_tmp = \grapheme_stripos($haystack, $needle, $offset);
9378
            if ($return_tmp !== false) {
9379
                return $return_tmp;
9380
            }
9381
        }
9382
9383
        //
9384
        // fallback for ascii only
9385
        //
9386
9387 2
        if (ASCII::is_ascii($haystack . $needle)) {
9388 2
            return \stripos($haystack, $needle, $offset);
9389
        }
9390
9391
        //
9392
        // fallback via vanilla php
9393
        //
9394
9395 2
        $haystack = self::strtocasefold($haystack, true, false, $encoding, null, false);
9396 2
        $needle = self::strtocasefold($needle, true, false, $encoding, null, false);
9397
9398 2
        return self::strpos($haystack, $needle, $offset, $encoding);
9399
    }
9400
9401
    /**
9402
     * Returns all of haystack starting from and including the first occurrence of needle to the end.
9403
     *
9404
     * EXAMPLE: <code>
9405
     * $str = 'iñtërnâtiônàlizætiøn';
9406
     * $search = 'NÂT';
9407
     *
9408
     * UTF8::stristr($str, $search)); // 'nâtiônàlizætiøn'
9409
     * UTF8::stristr($str, $search, true)); // 'iñtër'
9410
     * </code>
9411
     *
9412
     * @param string $haystack      <p>The input string. Must be valid UTF-8.</p>
9413
     * @param string $needle        <p>The string to look for. Must be valid UTF-8.</p>
9414
     * @param bool   $before_needle [optional] <p>
9415
     *                              If <b>TRUE</b>, it returns the part of the
9416
     *                              haystack before the first occurrence of the needle (excluding the needle).
9417
     *                              </p>
9418
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
9419
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
9420
     *
9421
     * @psalm-pure
9422
     *
9423
     * @return false|string
9424
     *                      <p>A sub-string,<br>or <strong>false</strong> if needle is not found.</p>
9425
     */
9426 13
    public static function stristr(
9427
        string $haystack,
9428
        string $needle,
9429
        bool $before_needle = false,
9430
        string $encoding = 'UTF-8',
9431
        bool $clean_utf8 = false
9432
    ) {
9433 13
        if ($haystack === '') {
9434 3
            if (\PHP_VERSION_ID >= 80000 && $needle === '') {
9435
                return '';
9436
            }
9437
9438 3
            return false;
9439
        }
9440
9441 11
        if ($clean_utf8) {
9442
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9443
            // if invalid characters are found in $haystack before $needle
9444 1
            $needle = self::clean($needle);
9445 1
            $haystack = self::clean($haystack);
9446
        }
9447
9448 11
        if ($needle === '') {
9449 2
            if (\PHP_VERSION_ID >= 80000) {
9450
                return $haystack;
9451
            }
9452
9453 2
            return false;
9454
        }
9455
9456 10
        if (self::$SUPPORT['mbstring'] === true) {
9457 10
            if ($encoding === 'UTF-8') {
9458 10
                return \mb_stristr($haystack, $needle, $before_needle);
9459
            }
9460
9461 1
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9462
9463 1
            return \mb_stristr($haystack, $needle, $before_needle, $encoding);
9464
        }
9465
9466
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
9467
9468
        if (
9469
            $encoding !== 'UTF-8'
9470
            &&
9471
            self::$SUPPORT['mbstring'] === false
9472
        ) {
9473
            /**
9474
             * @psalm-suppress ImpureFunctionCall - is is only a warning
9475
             */
9476
            \trigger_error('UTF8::stristr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9477
        }
9478
9479
        if (
9480
            $encoding === 'UTF-8' // INFO: "grapheme_stristr()" can't handle other encodings
9481
            &&
9482
            self::$SUPPORT['intl'] === true
9483
        ) {
9484
            $return_tmp = \grapheme_stristr($haystack, $needle, $before_needle);
9485
            if ($return_tmp !== false) {
9486
                return $return_tmp;
9487
            }
9488
        }
9489
9490
        if (ASCII::is_ascii($needle . $haystack)) {
9491
            return \stristr($haystack, $needle, $before_needle);
9492
        }
9493
9494
        \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/usi', $haystack, $match);
9495
9496
        if (!isset($match[1])) {
9497
            return false;
9498
        }
9499
9500
        if ($before_needle) {
9501
            return $match[1];
9502
        }
9503
9504
        return self::substr($haystack, (int) self::strlen($match[1], $encoding), null, $encoding);
9505
    }
9506
9507
    /**
9508
     * Get the string length, not the byte-length!
9509
     *
9510
     * INFO: use UTF8::strwidth() for the char-length
9511
     *
9512
     * EXAMPLE: <code>UTF8::strlen("Iñtërnâtiôn\xE9àlizætiøn")); // 20</code>
9513
     *
9514
     * @see http://php.net/manual/en/function.mb-strlen.php
9515
     *
9516
     * @param string $str        <p>The string being checked for length.</p>
9517
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
9518
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9519
     *
9520
     * @psalm-pure
9521
     *
9522
     * @return false|int
9523
     *                   <p>
9524
     *                   The number <strong>(int)</strong> of characters in the string $str having character encoding
9525
     *                   $encoding.
9526
     *                   (One multi-byte character counted as +1).
9527
     *                   <br>
9528
     *                   Can return <strong>false</strong>, if e.g. mbstring is not installed and we process invalid
9529
     *                   chars.
9530
     *                   </p>
9531
     */
9532 174
    public static function strlen(
9533
        string $str,
9534
        string $encoding = 'UTF-8',
9535
        bool $clean_utf8 = false
9536
    ) {
9537 174
        if ($str === '') {
9538 21
            return 0;
9539
        }
9540
9541 172
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9542 12
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9543
        }
9544
9545 172
        if ($clean_utf8) {
9546
            // "mb_strlen" and "\iconv_strlen" returns wrong length,
9547
            // if invalid characters are found in $str
9548 5
            $str = self::clean($str);
9549
        }
9550
9551
        //
9552
        // fallback via mbstring
9553
        //
9554
9555 172
        if (self::$SUPPORT['mbstring'] === true) {
9556 166
            if ($encoding === 'UTF-8') {
9557
                /** @noinspection PhpUsageOfSilenceOperatorInspection - ignore warnings, it's working anyway */
9558 166
                return @\mb_strlen($str);
9559
            }
9560
9561
            /** @noinspection PhpUsageOfSilenceOperatorInspection - ignore warnings, it's working anyway */
9562 4
            return @\mb_strlen($str, $encoding);
9563
        }
9564
9565
        //
9566
        // fallback for binary || ascii only
9567
        //
9568
9569
        if (
9570 8
            $encoding === 'CP850'
9571
            ||
9572 8
            $encoding === 'ASCII'
9573
        ) {
9574
            return \strlen($str);
9575
        }
9576
9577
        if (
9578 8
            $encoding !== 'UTF-8'
9579
            &&
9580 8
            self::$SUPPORT['mbstring'] === false
9581
            &&
9582 8
            self::$SUPPORT['iconv'] === false
9583
        ) {
9584
            /**
9585
             * @psalm-suppress ImpureFunctionCall - is is only a warning
9586
             */
9587 2
            \trigger_error('UTF8::strlen() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9588
        }
9589
9590
        //
9591
        // fallback via iconv
9592
        //
9593
9594 8
        if (self::$SUPPORT['iconv'] === true) {
9595
            $return_tmp = \iconv_strlen($str, $encoding);
9596
            if ($return_tmp !== false) {
9597
                return $return_tmp;
9598
            }
9599
        }
9600
9601
        //
9602
        // fallback via intl
9603
        //
9604
9605
        if (
9606 8
            $encoding === 'UTF-8' // INFO: "grapheme_strlen()" can't handle other encodings
9607
            &&
9608 8
            self::$SUPPORT['intl'] === true
9609
        ) {
9610
            $return_tmp = \grapheme_strlen($str);
9611
            if ($return_tmp !== null) {
9612
                return $return_tmp;
9613
            }
9614
        }
9615
9616
        //
9617
        // fallback for ascii only
9618
        //
9619
9620 8
        if (ASCII::is_ascii($str)) {
9621 4
            return \strlen($str);
9622
        }
9623
9624
        //
9625
        // fallback via vanilla php
9626
        //
9627
9628 8
        \preg_match_all('/./us', $str, $parts);
9629
9630 8
        $return_tmp = \count($parts[0]);
9631 8
        if ($return_tmp === 0) {
9632
            return false;
9633
        }
9634
9635 8
        return $return_tmp;
9636
    }
9637
9638
    /**
9639
     * Get string length in byte.
9640
     *
9641
     * @param string $str
9642
     *
9643
     * @psalm-pure
9644
     *
9645
     * @return int
9646
     */
9647 1
    public static function strlen_in_byte(string $str): int
9648
    {
9649 1
        if ($str === '') {
9650
            return 0;
9651
        }
9652
9653 1
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
9654
            // "mb_" is available if overload is used, so use it ...
9655
            return \mb_strlen($str, 'CP850'); // 8-BIT
9656
        }
9657
9658 1
        return \strlen($str);
9659
    }
9660
9661
    /**
9662
     * Case-insensitive string comparisons using a "natural order" algorithm.
9663
     *
9664
     * INFO: natural order version of UTF8::strcasecmp()
9665
     *
9666
     * EXAMPLES: <code>
9667
     * UTF8::strnatcasecmp('2', '10Hello WORLD 中文空白!'); // -1
9668
     * UTF8::strcasecmp('2Hello world 中文空白!', '10Hello WORLD 中文空白!'); // 1
9669
     *
9670
     * UTF8::strnatcasecmp('10Hello world 中文空白!', '2Hello WORLD 中文空白!'); // 1
9671
     * UTF8::strcasecmp('10Hello world 中文空白!', '2Hello WORLD 中文空白!'); // -1
9672
     * </code>
9673
     *
9674
     * @param string $str1     <p>The first string.</p>
9675
     * @param string $str2     <p>The second string.</p>
9676
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
9677
     *
9678
     * @psalm-pure
9679
     *
9680
     * @return int
9681
     *             <strong>&lt; 0</strong> if str1 is less than str2<br>
9682
     *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
9683
     *             <strong>0</strong> if they are equal
9684
     */
9685 2
    public static function strnatcasecmp(string $str1, string $str2, string $encoding = 'UTF-8'): int
9686
    {
9687 2
        return self::strnatcmp(
9688 2
            self::strtocasefold($str1, true, false, $encoding, null, false),
9689 2
            self::strtocasefold($str2, true, false, $encoding, null, false)
9690
        );
9691
    }
9692
9693
    /**
9694
     * String comparisons using a "natural order" algorithm
9695
     *
9696
     * INFO: natural order version of UTF8::strcmp()
9697
     *
9698
     * EXAMPLES: <code>
9699
     * UTF8::strnatcmp('2Hello world 中文空白!', '10Hello WORLD 中文空白!'); // -1
9700
     * UTF8::strcmp('2Hello world 中文空白!', '10Hello WORLD 中文空白!'); // 1
9701
     *
9702
     * UTF8::strnatcmp('10Hello world 中文空白!', '2Hello WORLD 中文空白!'); // 1
9703
     * UTF8::strcmp('10Hello world 中文空白!', '2Hello WORLD 中文空白!'); // -1
9704
     * </code>
9705
     *
9706
     * @see http://php.net/manual/en/function.strnatcmp.php
9707
     *
9708
     * @param string $str1 <p>The first string.</p>
9709
     * @param string $str2 <p>The second string.</p>
9710
     *
9711
     * @psalm-pure
9712
     *
9713
     * @return int
9714
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
9715
     *             <strong>&gt; 0</strong> if str1 is greater than str2;<br>
9716
     *             <strong>0</strong> if they are equal
9717
     */
9718 4
    public static function strnatcmp(string $str1, string $str2): int
9719
    {
9720 4
        if ($str1 === $str2) {
9721 4
            return 0;
9722
        }
9723
9724 4
        return \strnatcmp(
9725 4
            (string) self::strtonatfold($str1),
9726 4
            (string) self::strtonatfold($str2)
9727
        );
9728
    }
9729
9730
    /**
9731
     * Case-insensitive string comparison of the first n characters.
9732
     *
9733
     * EXAMPLE: <code>
9734
     * UTF8::strcasecmp("iñtërnâtiôn\nàlizætiøn321", "iñtërnâtiôn\nàlizætiøn123", 5); // 0
9735
     * </code>
9736
     *
9737
     * @see http://php.net/manual/en/function.strncasecmp.php
9738
     *
9739
     * @param string $str1     <p>The first string.</p>
9740
     * @param string $str2     <p>The second string.</p>
9741
     * @param int    $len      <p>The length of strings to be used in the comparison.</p>
9742
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
9743
     *
9744
     * @psalm-pure
9745
     *
9746
     * @return int
9747
     *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
9748
     *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
9749
     *             <strong>0</strong> if they are equal
9750
     */
9751 2
    public static function strncasecmp(
9752
        string $str1,
9753
        string $str2,
9754
        int $len,
9755
        string $encoding = 'UTF-8'
9756
    ): int {
9757 2
        return self::strncmp(
9758 2
            self::strtocasefold($str1, true, false, $encoding, null, false),
9759 2
            self::strtocasefold($str2, true, false, $encoding, null, false),
9760 2
            $len
9761
        );
9762
    }
9763
9764
    /**
9765
     * String comparison of the first n characters.
9766
     *
9767
     * EXAMPLE: <code>
9768
     * UTF8::strncmp("Iñtërnâtiôn\nàlizætiøn321", "Iñtërnâtiôn\nàlizætiøn123", 5); // 0
9769
     * </code>
9770
     *
9771
     * @see http://php.net/manual/en/function.strncmp.php
9772
     *
9773
     * @param string $str1     <p>The first string.</p>
9774
     * @param string $str2     <p>The second string.</p>
9775
     * @param int    $len      <p>Number of characters to use in the comparison.</p>
9776
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
9777
     *
9778
     * @psalm-pure
9779
     *
9780
     * @return int
9781
     *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
9782
     *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
9783
     *             <strong>0</strong> if they are equal
9784
     */
9785 4
    public static function strncmp(
9786
        string $str1,
9787
        string $str2,
9788
        int $len,
9789
        string $encoding = 'UTF-8'
9790
    ): int {
9791 4
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9792
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9793
        }
9794
9795 4
        if ($encoding === 'UTF-8') {
9796 4
            $str1 = (string) \mb_substr($str1, 0, $len);
9797 4
            $str2 = (string) \mb_substr($str2, 0, $len);
9798
        } else {
9799
            $str1 = (string) self::substr($str1, 0, $len, $encoding);
9800
            $str2 = (string) self::substr($str2, 0, $len, $encoding);
9801
        }
9802
9803 4
        return self::strcmp($str1, $str2);
9804
    }
9805
9806
    /**
9807
     * Search a string for any of a set of characters.
9808
     *
9809
     * EXAMPLE: <code>UTF8::strpbrk('-中文空白-', '白'); // '白-'</code>
9810
     *
9811
     * @see http://php.net/manual/en/function.strpbrk.php
9812
     *
9813
     * @param string $haystack  <p>The string where char_list is looked for.</p>
9814
     * @param string $char_list <p>This parameter is case-sensitive.</p>
9815
     *
9816
     * @psalm-pure
9817
     *
9818
     * @return false|string
9819
     *                      <p>The string starting from the character found, or false if it is not found.</p>
9820
     */
9821 2
    public static function strpbrk(string $haystack, string $char_list)
9822
    {
9823 2
        if ($haystack === '' || $char_list === '') {
9824 2
            return false;
9825
        }
9826
9827 2
        if (\preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
9828 2
            return \substr($haystack, (int) \strpos($haystack, $m[0]));
9829
        }
9830
9831 2
        return false;
9832
    }
9833
9834
    /**
9835
     * Find the position of the first occurrence of a substring in a string.
9836
     *
9837
     * INFO: use UTF8::strpos_in_byte() for the byte-length
9838
     *
9839
     * EXAMPLE: <code>UTF8::strpos('ABC-ÖÄÜ-中文空白-中文空白', '中'); // 8</code>
9840
     *
9841
     * @see http://php.net/manual/en/function.mb-strpos.php
9842
     *
9843
     * @param string     $haystack   <p>The string from which to get the position of the first occurrence of needle.</p>
9844
     * @param int|string $needle     <p>The string to find in haystack.<br>Or a code point as int.</p>
9845
     * @param int        $offset     [optional] <p>The search offset. If it is not specified, 0 is used.</p>
9846
     * @param string     $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
9847
     * @param bool       $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9848
     *
9849
     * @psalm-pure
9850
     *
9851
     * @return false|int
9852
     *                   The <strong>(int)</strong> numeric position of the first occurrence of needle in the haystack
9853
     *                   string.<br> If needle is not found it returns false.
9854
     */
9855 52
    public static function strpos(
9856
        string $haystack,
9857
        $needle,
9858
        int $offset = 0,
9859
        string $encoding = 'UTF-8',
9860
        bool $clean_utf8 = false
9861
    ) {
9862 52
        if ($haystack === '') {
9863 4
            if (\PHP_VERSION_ID >= 80000) {
9864
                if ($needle === '') {
9865
                    return 0;
9866
                }
9867
            } else {
9868 4
                return false;
9869
            }
9870
        }
9871
9872
        // iconv and mbstring do not support integer $needle
9873 51
        if ((int) $needle === $needle) {
9874
            $needle = (string) self::chr($needle);
9875
        }
9876 51
        $needle = (string) $needle;
9877
9878 51
        if ($haystack === '') {
9879
            if (\PHP_VERSION_ID >= 80000 && $needle === '') {
9880
                return 0;
9881
            }
9882
9883
            return false;
9884
        }
9885
9886 51
        if ($needle === '' && \PHP_VERSION_ID < 80000) {
9887 2
            return false;
9888
        }
9889
9890 51
        if ($clean_utf8) {
9891
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9892
            // if invalid characters are found in $haystack before $needle
9893 3
            $needle = self::clean($needle);
9894 3
            $haystack = self::clean($haystack);
9895
        }
9896
9897 51
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9898 10
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9899
        }
9900
9901
        //
9902
        // fallback via mbstring
9903
        //
9904
9905 51
        if (self::$SUPPORT['mbstring'] === true) {
9906 49
            if ($encoding === 'UTF-8') {
9907
                /** @noinspection PhpUsageOfSilenceOperatorInspection - Offset not contained in string */
9908 49
                return @\mb_strpos($haystack, $needle, $offset);
9909
            }
9910
9911
            /** @noinspection PhpUsageOfSilenceOperatorInspection - Offset not contained in string */
9912 2
            return @\mb_strpos($haystack, $needle, $offset, $encoding);
9913
        }
9914
9915
        //
9916
        // fallback for binary || ascii only
9917
        //
9918
        if (
9919 4
            $encoding === 'CP850'
9920
            ||
9921 4
            $encoding === 'ASCII'
9922
        ) {
9923 2
            return \strpos($haystack, $needle, $offset);
9924
        }
9925
9926
        if (
9927 4
            $encoding !== 'UTF-8'
9928
            &&
9929 4
            self::$SUPPORT['iconv'] === false
9930
            &&
9931 4
            self::$SUPPORT['mbstring'] === false
9932
        ) {
9933
            /**
9934
             * @psalm-suppress ImpureFunctionCall - is is only a warning
9935
             */
9936 2
            \trigger_error('UTF8::strpos() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9937
        }
9938
9939
        //
9940
        // fallback via intl
9941
        //
9942
9943
        if (
9944 4
            $encoding === 'UTF-8' // INFO: "grapheme_strpos()" can't handle other encodings
9945
            &&
9946 4
            $offset >= 0 // grapheme_strpos() can't handle negative offset
9947
            &&
9948 4
            self::$SUPPORT['intl'] === true
9949
        ) {
9950
            $return_tmp = \grapheme_strpos($haystack, $needle, $offset);
9951
            if ($return_tmp !== false) {
9952
                return $return_tmp;
9953
            }
9954
        }
9955
9956
        //
9957
        // fallback via iconv
9958
        //
9959
9960
        if (
9961 4
            $offset >= 0 // iconv_strpos() can't handle negative offset
9962
            &&
9963 4
            self::$SUPPORT['iconv'] === true
9964
        ) {
9965
            // ignore invalid negative offset to keep compatibility
9966
            // with php < 5.5.35, < 5.6.21, < 7.0.6
9967
            $return_tmp = \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
9968
            if ($return_tmp !== false) {
9969
                return $return_tmp;
9970
            }
9971
        }
9972
9973
        //
9974
        // fallback for ascii only
9975
        //
9976
9977 4
        if (ASCII::is_ascii($haystack . $needle)) {
9978
            /** @noinspection PhpUsageOfSilenceOperatorInspection - Offset not contained in string */
9979 2
            return @\strpos($haystack, $needle, $offset);
9980
        }
9981
9982
        //
9983
        // fallback via vanilla php
9984
        //
9985
9986 4
        $haystack_tmp = self::substr($haystack, $offset, null, $encoding);
9987 4
        if ($haystack_tmp === false) {
9988
            $haystack_tmp = '';
9989
        }
9990 4
        $haystack = (string) $haystack_tmp;
9991
9992 4
        if ($offset < 0) {
9993
            $offset = 0;
9994
        }
9995
9996 4
        $pos = \strpos($haystack, $needle);
9997 4
        if ($pos === false) {
9998 3
            return false;
9999
        }
10000
10001 4
        if ($pos) {
10002 4
            return $offset + (int) self::strlen(\substr($haystack, 0, $pos), $encoding);
10003
        }
10004
10005 2
        return $offset + 0;
10006
    }
10007
10008
    /**
10009
     * Find the position of the first occurrence of a substring in a string.
10010
     *
10011
     * @param string $haystack <p>
10012
     *                         The string being checked.
10013
     *                         </p>
10014
     * @param string $needle   <p>
10015
     *                         The position counted from the beginning of haystack.
10016
     *                         </p>
10017
     * @param int    $offset   [optional] <p>
10018
     *                         The search offset. If it is not specified, 0 is used.
10019
     *                         </p>
10020
     *
10021
     * @psalm-pure
10022
     *
10023
     * @return false|int
10024
     *                   <p>The numeric position of the first occurrence of needle in the
10025
     *                   haystack string. If needle is not found, it returns false.</p>
10026
     */
10027 2
    public static function strpos_in_byte(string $haystack, string $needle, int $offset = 0)
10028
    {
10029 2
        if ($haystack === '' || $needle === '') {
10030
            return false;
10031
        }
10032
10033 2
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10034
            // "mb_" is available if overload is used, so use it ...
10035
            return \mb_strpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
10036
        }
10037
10038 2
        return \strpos($haystack, $needle, $offset);
10039
    }
10040
10041
    /**
10042
     * Find the position of the first occurrence of a substring in a string, case-insensitive.
10043
     *
10044
     * @param string $haystack <p>
10045
     *                         The string being checked.
10046
     *                         </p>
10047
     * @param string $needle   <p>
10048
     *                         The position counted from the beginning of haystack.
10049
     *                         </p>
10050
     * @param int    $offset   [optional] <p>
10051
     *                         The search offset. If it is not specified, 0 is used.
10052
     *                         </p>
10053
     *
10054
     * @psalm-pure
10055
     *
10056
     * @return false|int
10057
     *                   <p>The numeric position of the first occurrence of needle in the
10058
     *                   haystack string. If needle is not found, it returns false.</p>
10059
     */
10060 2
    public static function stripos_in_byte(string $haystack, string $needle, int $offset = 0)
10061
    {
10062 2
        if ($haystack === '' || $needle === '') {
10063
            return false;
10064
        }
10065
10066 2
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10067
            // "mb_" is available if overload is used, so use it ...
10068
            return \mb_stripos($haystack, $needle, $offset, 'CP850'); // 8-BIT
10069
        }
10070
10071 2
        return \stripos($haystack, $needle, $offset);
10072
    }
10073
10074
    /**
10075
     * Find the last occurrence of a character in a string within another.
10076
     *
10077
     * EXAMPLE: <code>UTF8::strrchr('κόσμεκόσμε-äöü', 'κόσμε'); // 'κόσμε-äöü'</code>
10078
     *
10079
     * @see http://php.net/manual/en/function.mb-strrchr.php
10080
     *
10081
     * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
10082
     * @param string $needle        <p>The string to find in haystack</p>
10083
     * @param bool   $before_needle [optional] <p>
10084
     *                              Determines which portion of haystack
10085
     *                              this function returns.
10086
     *                              If set to true, it returns all of haystack
10087
     *                              from the beginning to the last occurrence of needle.
10088
     *                              If set to false, it returns all of haystack
10089
     *                              from the last occurrence of needle to the end,
10090
     *                              </p>
10091
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
10092
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
10093
     *
10094
     * @psalm-pure
10095
     *
10096
     * @return false|string
10097
     *                      <p>The portion of haystack or false if needle is not found.</p>
10098
     */
10099 2
    public static function strrchr(
10100
        string $haystack,
10101
        string $needle,
10102
        bool $before_needle = false,
10103
        string $encoding = 'UTF-8',
10104
        bool $clean_utf8 = false
10105
    ) {
10106 2
        if ($haystack === '' || $needle === '') {
10107 2
            return false;
10108
        }
10109
10110 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10111 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10112
        }
10113
10114 2
        if ($clean_utf8) {
10115
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10116
            // if invalid characters are found in $haystack before $needle
10117 2
            $needle = self::clean($needle);
10118 2
            $haystack = self::clean($haystack);
10119
        }
10120
10121
        //
10122
        // fallback via mbstring
10123
        //
10124
10125 2
        if (self::$SUPPORT['mbstring'] === true) {
10126 2
            if ($encoding === 'UTF-8') {
10127 2
                return \mb_strrchr($haystack, $needle, $before_needle);
10128
            }
10129
10130 2
            return \mb_strrchr($haystack, $needle, $before_needle, $encoding);
10131
        }
10132
10133
        //
10134
        // fallback for binary || ascii only
10135
        //
10136
10137
        if (
10138
            !$before_needle
10139
            &&
10140
            (
10141
                $encoding === 'CP850'
10142
                ||
10143
                $encoding === 'ASCII'
10144
            )
10145
        ) {
10146
            return \strrchr($haystack, $needle);
10147
        }
10148
10149
        if (
10150
            $encoding !== 'UTF-8'
10151
            &&
10152
            self::$SUPPORT['mbstring'] === false
10153
        ) {
10154
            /**
10155
             * @psalm-suppress ImpureFunctionCall - is is only a warning
10156
             */
10157
            \trigger_error('UTF8::strrchr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10158
        }
10159
10160
        //
10161
        // fallback via iconv
10162
        //
10163
10164
        if (self::$SUPPORT['iconv'] === true) {
10165
            $needle_tmp = self::substr($needle, 0, 1, $encoding);
10166
            if ($needle_tmp === false) {
10167
                return false;
10168
            }
10169
            $needle = $needle_tmp;
10170
10171
            $pos = \iconv_strrpos($haystack, $needle, $encoding);
10172
            if ($pos === false) {
10173
                return false;
10174
            }
10175
10176
            if ($before_needle) {
10177
                return self::substr($haystack, 0, $pos, $encoding);
10178
            }
10179
10180
            return self::substr($haystack, $pos, null, $encoding);
10181
        }
10182
10183
        //
10184
        // fallback via vanilla php
10185
        //
10186
10187
        $needle_tmp = self::substr($needle, 0, 1, $encoding);
10188
        if ($needle_tmp === false) {
10189
            return false;
10190
        }
10191
        $needle = $needle_tmp;
10192
10193
        $pos = self::strrpos($haystack, $needle, 0, $encoding);
10194
        if ($pos === false) {
10195
            return false;
10196
        }
10197
10198
        if ($before_needle) {
10199
            return self::substr($haystack, 0, $pos, $encoding);
10200
        }
10201
10202
        return self::substr($haystack, $pos, null, $encoding);
10203
    }
10204
10205
    /**
10206
     * Reverses characters order in the string.
10207
     *
10208
     * EXAMPLE: <code>UTF8::strrev('κ-öäü'); // 'üäö-κ'</code>
10209
     *
10210
     * @param string $str      <p>The input string.</p>
10211
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
10212
     *
10213
     * @psalm-pure
10214
     *
10215
     * @return string
10216
     *                <p>The string with characters in the reverse sequence.</p>
10217
     */
10218 10
    public static function strrev(string $str, string $encoding = 'UTF-8'): string
10219
    {
10220 10
        if ($str === '') {
10221 4
            return '';
10222
        }
10223
10224
        // init
10225 8
        $reversed = '';
10226
10227 8
        $str = self::emoji_encode($str, true);
10228
10229 8
        if ($encoding === 'UTF-8') {
10230 8
            if (self::$SUPPORT['intl'] === true) {
10231
                // try "grapheme" first: https://stackoverflow.com/questions/17496493/strrev-dosent-support-utf-8
10232 8
                $i = (int) \grapheme_strlen($str);
10233 8
                while ($i--) {
10234 8
                    $reversed_tmp = \grapheme_substr($str, $i, 1);
10235 8
                    if ($reversed_tmp !== false) {
10236 8
                        $reversed .= $reversed_tmp;
10237
                    }
10238
                }
10239
            } else {
10240
                $i = (int) \mb_strlen($str);
10241 8
                while ($i--) {
10242
                    $reversed_tmp = \mb_substr($str, $i, 1);
10243
                    if ($reversed_tmp !== false) {
10244
                        $reversed .= $reversed_tmp;
10245
                    }
10246
                }
10247
            }
10248
        } else {
10249
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10250
10251
            $i = (int) self::strlen($str, $encoding);
10252
            while ($i--) {
10253
                $reversed_tmp = self::substr($str, $i, 1, $encoding);
10254
                if ($reversed_tmp !== false) {
10255
                    $reversed .= $reversed_tmp;
10256
                }
10257
            }
10258
        }
10259
10260 8
        return self::emoji_decode($reversed, true);
10261
    }
10262
10263
    /**
10264
     * Find the last occurrence of a character in a string within another, case-insensitive.
10265
     *
10266
     * EXAMPLE: <code>UTF8::strrichr('Aκόσμεκόσμε-äöü', 'aκόσμε'); // 'Aκόσμεκόσμε-äöü'</code>
10267
     *
10268
     * @see http://php.net/manual/en/function.mb-strrichr.php
10269
     *
10270
     * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
10271
     * @param string $needle        <p>The string to find in haystack.</p>
10272
     * @param bool   $before_needle [optional] <p>
10273
     *                              Determines which portion of haystack
10274
     *                              this function returns.
10275
     *                              If set to true, it returns all of haystack
10276
     *                              from the beginning to the last occurrence of needle.
10277
     *                              If set to false, it returns all of haystack
10278
     *                              from the last occurrence of needle to the end,
10279
     *                              </p>
10280
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
10281
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
10282
     *
10283
     * @psalm-pure
10284
     *
10285
     * @return false|string
10286
     *                      <p>The portion of haystack or<br>false if needle is not found.</p>
10287
     */
10288 3
    public static function strrichr(
10289
        string $haystack,
10290
        string $needle,
10291
        bool $before_needle = false,
10292
        string $encoding = 'UTF-8',
10293
        bool $clean_utf8 = false
10294
    ) {
10295 3
        if ($haystack === '' || $needle === '') {
10296 2
            return false;
10297
        }
10298
10299 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10300 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10301
        }
10302
10303 3
        if ($clean_utf8) {
10304
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10305
            // if invalid characters are found in $haystack before $needle
10306 2
            $needle = self::clean($needle);
10307 2
            $haystack = self::clean($haystack);
10308
        }
10309
10310
        //
10311
        // fallback via mbstring
10312
        //
10313
10314 3
        if (self::$SUPPORT['mbstring'] === true) {
10315 3
            if ($encoding === 'UTF-8') {
10316 3
                return \mb_strrichr($haystack, $needle, $before_needle);
10317
            }
10318
10319 2
            return \mb_strrichr($haystack, $needle, $before_needle, $encoding);
10320
        }
10321
10322
        //
10323
        // fallback via vanilla php
10324
        //
10325
10326
        $needle_tmp = self::substr($needle, 0, 1, $encoding);
10327
        if ($needle_tmp === false) {
10328
            return false;
10329
        }
10330
        $needle = $needle_tmp;
10331
10332
        $pos = self::strripos($haystack, $needle, 0, $encoding);
10333
        if ($pos === false) {
10334
            return false;
10335
        }
10336
10337
        if ($before_needle) {
10338
            return self::substr($haystack, 0, $pos, $encoding);
10339
        }
10340
10341
        return self::substr($haystack, $pos, null, $encoding);
10342
    }
10343
10344
    /**
10345
     * Find the position of the last occurrence of a substring in a string, case-insensitive.
10346
     *
10347
     * EXAMPLE: <code>UTF8::strripos('ABC-ÖÄÜ-中文空白-中文空白', '中'); // 13</code>
10348
     *
10349
     * @param string     $haystack   <p>The string to look in.</p>
10350
     * @param int|string $needle     <p>The string to look for.</p>
10351
     * @param int        $offset     [optional] <p>Number of characters to ignore in the beginning or end.</p>
10352
     * @param string     $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
10353
     * @param bool       $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10354
     *
10355
     * @psalm-pure
10356
     *
10357
     * @return false|int
10358
     *                   <p>The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
10359
     *                   string.<br>If needle is not found, it returns false.</p>
10360
     */
10361 14
    public static function strripos(
10362
        string $haystack,
10363
        $needle,
10364
        int $offset = 0,
10365
        string $encoding = 'UTF-8',
10366
        bool $clean_utf8 = false
10367
    ) {
10368 14
        if ($haystack === '') {
10369 3
            if (\PHP_VERSION_ID >= 80000) {
10370
                if ($needle === '') {
10371
                    return 0;
10372
                }
10373
            } else {
10374 3
                return false;
10375
            }
10376
        }
10377
10378
        // iconv and mbstring do not support integer $needle
10379 14
        if ((int) $needle === $needle && $needle >= 0) {
10380
            $needle = (string) self::chr($needle);
10381
        }
10382 14
        $needle = (string) $needle;
10383
10384 14
        if ($haystack === '') {
10385
            if (\PHP_VERSION_ID >= 80000 && $needle === '') {
10386
                return 0;
10387
            }
10388
10389
            return false;
10390
        }
10391
10392 14
        if ($needle === '' && \PHP_VERSION_ID < 80000) {
10393 3
            return false;
10394
        }
10395
10396 14
        if ($clean_utf8) {
10397
            // mb_strripos() && iconv_strripos() is not tolerant to invalid characters
10398 3
            $needle = self::clean($needle);
10399 3
            $haystack = self::clean($haystack);
10400
        }
10401
10402 14
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10403 9
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10404
        }
10405
10406
        //
10407
        // fallback via mbstrig
10408
        //
10409
10410 14
        if (self::$SUPPORT['mbstring'] === true) {
10411 14
            if ($encoding === 'UTF-8') {
10412 14
                return \mb_strripos($haystack, $needle, $offset);
10413
            }
10414
10415
            return \mb_strripos($haystack, $needle, $offset, $encoding);
10416
        }
10417
10418
        //
10419
        // fallback for binary || ascii only
10420
        //
10421
10422
        if (
10423
            $encoding === 'CP850'
10424
            ||
10425
            $encoding === 'ASCII'
10426
        ) {
10427
            return \strripos($haystack, $needle, $offset);
10428
        }
10429
10430
        if (
10431
            $encoding !== 'UTF-8'
10432
            &&
10433
            self::$SUPPORT['mbstring'] === false
10434
        ) {
10435
            /**
10436
             * @psalm-suppress ImpureFunctionCall - is is only a warning
10437
             */
10438
            \trigger_error('UTF8::strripos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10439
        }
10440
10441
        //
10442
        // fallback via intl
10443
        //
10444
10445
        if (
10446
            $encoding === 'UTF-8' // INFO: "grapheme_strripos()" can't handle other encodings
10447
            &&
10448
            $offset >= 0 // grapheme_strripos() can't handle negative offset
10449
            &&
10450
            self::$SUPPORT['intl'] === true
10451
        ) {
10452
            $return_tmp = \grapheme_strripos($haystack, $needle, $offset);
10453
            if ($return_tmp !== false) {
10454
                return $return_tmp;
10455
            }
10456
        }
10457
10458
        //
10459
        // fallback for ascii only
10460
        //
10461
10462
        if (ASCII::is_ascii($haystack . $needle)) {
10463
            return \strripos($haystack, $needle, $offset);
10464
        }
10465
10466
        //
10467
        // fallback via vanilla php
10468
        //
10469
10470
        $haystack = self::strtocasefold($haystack, true, false, $encoding);
10471
        $needle = self::strtocasefold($needle, true, false, $encoding);
10472
10473
        return self::strrpos($haystack, $needle, $offset, $encoding, $clean_utf8);
10474
    }
10475
10476
    /**
10477
     * Finds position of last occurrence of a string within another, case-insensitive.
10478
     *
10479
     * @param string $haystack <p>
10480
     *                         The string from which to get the position of the last occurrence
10481
     *                         of needle.
10482
     *                         </p>
10483
     * @param string $needle   <p>
10484
     *                         The string to find in haystack.
10485
     *                         </p>
10486
     * @param int    $offset   [optional] <p>
10487
     *                         The position in haystack
10488
     *                         to start searching.
10489
     *                         </p>
10490
     *
10491
     * @psalm-pure
10492
     *
10493
     * @return false|int
10494
     *                   <p>eturn the numeric position of the last occurrence of needle in the
10495
     *                   haystack string, or false if needle is not found.</p>
10496
     */
10497 2
    public static function strripos_in_byte(string $haystack, string $needle, int $offset = 0)
10498
    {
10499 2
        if ($haystack === '' || $needle === '') {
10500
            return false;
10501
        }
10502
10503 2
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10504
            // "mb_" is available if overload is used, so use it ...
10505
            return \mb_strripos($haystack, $needle, $offset, 'CP850'); // 8-BIT
10506
        }
10507
10508 2
        return \strripos($haystack, $needle, $offset);
10509
    }
10510
10511
    /**
10512
     * Find the position of the last occurrence of a substring in a string.
10513
     *
10514
     * EXAMPLE: <code>UTF8::strrpos('ABC-ÖÄÜ-中文空白-中文空白', '中'); // 13</code>
10515
     *
10516
     * @see http://php.net/manual/en/function.mb-strrpos.php
10517
     *
10518
     * @param string     $haystack   <p>The string being checked, for the last occurrence of needle</p>
10519
     * @param int|string $needle     <p>The string to find in haystack.<br>Or a code point as int.</p>
10520
     * @param int        $offset     [optional] <p>May be specified to begin searching an arbitrary number of characters
10521
     *                               into the string. Negative values will stop searching at an arbitrary point prior to
10522
     *                               the end of the string.
10523
     *                               </p>
10524
     * @param string     $encoding   [optional] <p>Set the charset.</p>
10525
     * @param bool       $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10526
     *
10527
     * @psalm-pure
10528
     *
10529
     * @return false|int
10530
     *                   <p>The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
10531
     *                   string.<br>If needle is not found, it returns false.</p>
10532
     */
10533 35
    public static function strrpos(
10534
        string $haystack,
10535
        $needle,
10536
        int $offset = 0,
10537
        string $encoding = 'UTF-8',
10538
        bool $clean_utf8 = false
10539
    ) {
10540 35
        if ($haystack === '') {
10541 4
            if (\PHP_VERSION_ID >= 80000) {
10542
                if ($needle === '') {
10543
                    return 0;
10544
                }
10545
            } else {
10546 4
                return false;
10547
            }
10548
        }
10549
10550
        // iconv and mbstring do not support integer $needle
10551 34
        if ((int) $needle === $needle && $needle >= 0) {
10552 1
            $needle = (string) self::chr($needle);
10553
        }
10554 34
        $needle = (string) $needle;
10555
10556 34
        if ($haystack === '') {
10557
            if (\PHP_VERSION_ID >= 80000 && $needle === '') {
10558
                return 0;
10559
            }
10560
10561
            return false;
10562
        }
10563
10564 34
        if ($needle === '' && \PHP_VERSION_ID < 80000) {
10565 2
            return false;
10566
        }
10567
10568 34
        if ($clean_utf8) {
10569
            // mb_strrpos && iconv_strrpos is not tolerant to invalid characters
10570 4
            $needle = self::clean($needle);
10571 4
            $haystack = self::clean($haystack);
10572
        }
10573
10574 34
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10575 8
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10576
        }
10577
10578
        //
10579
        // fallback via mbstring
10580
        //
10581
10582 34
        if (self::$SUPPORT['mbstring'] === true) {
10583 34
            if ($encoding === 'UTF-8') {
10584 34
                return \mb_strrpos($haystack, $needle, $offset);
10585
            }
10586
10587 2
            return \mb_strrpos($haystack, $needle, $offset, $encoding);
10588
        }
10589
10590
        //
10591
        // fallback for binary || ascii only
10592
        //
10593
10594
        if (
10595
            $encoding === 'CP850'
10596
            ||
10597
            $encoding === 'ASCII'
10598
        ) {
10599
            return \strrpos($haystack, $needle, $offset);
10600
        }
10601
10602
        if (
10603
            $encoding !== 'UTF-8'
10604
            &&
10605
            self::$SUPPORT['mbstring'] === false
10606
        ) {
10607
            /**
10608
             * @psalm-suppress ImpureFunctionCall - is is only a warning
10609
             */
10610
            \trigger_error('UTF8::strrpos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10611
        }
10612
10613
        //
10614
        // fallback via intl
10615
        //
10616
10617
        if (
10618
            $offset >= 0 // grapheme_strrpos() can't handle negative offset
10619
            &&
10620
            $encoding === 'UTF-8' // INFO: "grapheme_strrpos()" can't handle other encodings
10621
            &&
10622
            self::$SUPPORT['intl'] === true
10623
        ) {
10624
            $return_tmp = \grapheme_strrpos($haystack, $needle, $offset);
10625
            if ($return_tmp !== false) {
10626
                return $return_tmp;
10627
            }
10628
        }
10629
10630
        //
10631
        // fallback for ascii only
10632
        //
10633
10634
        if (ASCII::is_ascii($haystack . $needle)) {
10635
            return \strrpos($haystack, $needle, $offset);
10636
        }
10637
10638
        //
10639
        // fallback via vanilla php
10640
        //
10641
10642
        $haystack_tmp = null;
10643
        if ($offset > 0) {
10644
            $haystack_tmp = self::substr($haystack, $offset);
10645
        } elseif ($offset < 0) {
10646
            $haystack_tmp = self::substr($haystack, 0, $offset);
10647
            $offset = 0;
10648
        }
10649
10650
        if ($haystack_tmp !== null) {
10651
            if ($haystack_tmp === false) {
10652
                $haystack_tmp = '';
10653
            }
10654
            $haystack = (string) $haystack_tmp;
10655
        }
10656
10657
        $pos = \strrpos($haystack, $needle);
10658
        if ($pos === false) {
10659
            return false;
10660
        }
10661
10662
        /** @var false|string $str_tmp - needed for PhpStan (stubs error) */
10663
        $str_tmp = \substr($haystack, 0, $pos);
10664
        if ($str_tmp === false) {
10665
            return false;
10666
        }
10667
10668
        return $offset + (int) self::strlen($str_tmp);
10669
    }
10670
10671
    /**
10672
     * Find the position of the last occurrence of a substring in a string.
10673
     *
10674
     * @param string $haystack <p>
10675
     *                         The string being checked, for the last occurrence
10676
     *                         of needle.
10677
     *                         </p>
10678
     * @param string $needle   <p>
10679
     *                         The string to find in haystack.
10680
     *                         </p>
10681
     * @param int    $offset   [optional] <p>May be specified to begin searching an arbitrary number of characters into
10682
     *                         the string. Negative values will stop searching at an arbitrary point
10683
     *                         prior to the end of the string.
10684
     *                         </p>
10685
     *
10686
     * @psalm-pure
10687
     *
10688
     * @return false|int
10689
     *                   <p>The numeric position of the last occurrence of needle in the
10690
     *                   haystack string. If needle is not found, it returns false.</p>
10691
     */
10692 2
    public static function strrpos_in_byte(string $haystack, string $needle, int $offset = 0)
10693
    {
10694 2
        if ($haystack === '' || $needle === '') {
10695
            return false;
10696
        }
10697
10698 2
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10699
            // "mb_" is available if overload is used, so use it ...
10700
            return \mb_strrpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
10701
        }
10702
10703 2
        return \strrpos($haystack, $needle, $offset);
10704
    }
10705
10706
    /**
10707
     * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
10708
     * mask.
10709
     *
10710
     * EXAMPLE: <code>UTF8::strspn('iñtërnâtiônàlizætiøn', 'itñ'); // '3'</code>
10711
     *
10712
     * @param string   $str      <p>The input string.</p>
10713
     * @param string   $mask     <p>The mask of chars</p>
10714
     * @param int      $offset   [optional]
10715
     * @param int|null $length   [optional]
10716
     * @param string   $encoding [optional] <p>Set the charset.</p>
10717
     *
10718
     * @psalm-pure
10719
     *
10720
     * @return false|int
10721
     */
10722 10
    public static function strspn(
10723
        string $str,
10724
        string $mask,
10725
        int $offset = 0,
10726
        int $length = null,
10727
        string $encoding = 'UTF-8'
10728
    ) {
10729 10
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10730
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10731
        }
10732
10733 10
        if ($offset || $length !== null) {
10734 2
            if ($encoding === 'UTF-8') {
10735 2
                if ($length === null) {
10736
                    $str = (string) \mb_substr($str, $offset);
10737
                } else {
10738 2
                    $str = (string) \mb_substr($str, $offset, $length);
10739
                }
10740
            } else {
10741
                $str = (string) self::substr($str, $offset, $length, $encoding);
10742
            }
10743
        }
10744
10745 10
        if ($str === '' || $mask === '') {
10746 2
            return 0;
10747
        }
10748
10749 8
        $matches = [];
10750
10751 8
        return \preg_match('/^' . self::rxClass($mask) . '+/u', $str, $matches) ? (int) self::strlen($matches[0], $encoding) : 0;
10752
    }
10753
10754
    /**
10755
     * Returns part of haystack string from the first occurrence of needle to the end of haystack.
10756
     *
10757
     * EXAMPLE: <code>
10758
     * $str = 'iñtërnâtiônàlizætiøn';
10759
     * $search = 'nât';
10760
     *
10761
     * UTF8::strstr($str, $search)); // 'nâtiônàlizætiøn'
10762
     * UTF8::strstr($str, $search, true)); // 'iñtër'
10763
     * </code>
10764
     *
10765
     * @param string $haystack      <p>The input string. Must be valid UTF-8.</p>
10766
     * @param string $needle        <p>The string to look for. Must be valid UTF-8.</p>
10767
     * @param bool   $before_needle [optional] <p>
10768
     *                              If <b>TRUE</b>, strstr() returns the part of the
10769
     *                              haystack before the first occurrence of the needle (excluding the needle).
10770
     *                              </p>
10771
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
10772
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
10773
     *
10774
     * @psalm-pure
10775
     *
10776
     * @return false|string
10777
     *                      <p>A sub-string,<br>or <strong>false</strong> if needle is not found.</p>
10778
     */
10779 3
    public static function strstr(
10780
        string $haystack,
10781
        string $needle,
10782
        bool $before_needle = false,
10783
        string $encoding = 'UTF-8',
10784
        bool $clean_utf8 = false
10785
    ) {
10786 3
        if ($haystack === '') {
10787 2
            if (\PHP_VERSION_ID >= 80000 && $needle === '') {
10788
                return '';
10789
            }
10790
10791 2
            return false;
10792
        }
10793
10794 3
        if ($clean_utf8) {
10795
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10796
            // if invalid characters are found in $haystack before $needle
10797
            $needle = self::clean($needle);
10798
            $haystack = self::clean($haystack);
10799
        }
10800
10801 3
        if ($needle === '') {
10802 1
            if (\PHP_VERSION_ID >= 80000) {
10803
                return $haystack;
10804
            }
10805
10806 1
            return false;
10807
        }
10808
10809 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10810 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10811
        }
10812
10813
        //
10814
        // fallback via mbstring
10815
        //
10816
10817 3
        if (self::$SUPPORT['mbstring'] === true) {
10818 3
            if ($encoding === 'UTF-8') {
10819 3
                return \mb_strstr($haystack, $needle, $before_needle);
10820
            }
10821
10822 2
            return \mb_strstr($haystack, $needle, $before_needle, $encoding);
10823
        }
10824
10825
        //
10826
        // fallback for binary || ascii only
10827
        //
10828
10829
        if (
10830
            $encoding === 'CP850'
10831
            ||
10832
            $encoding === 'ASCII'
10833
        ) {
10834
            return \strstr($haystack, $needle, $before_needle);
10835
        }
10836
10837
        if (
10838
            $encoding !== 'UTF-8'
10839
            &&
10840
            self::$SUPPORT['mbstring'] === false
10841
        ) {
10842
            /**
10843
             * @psalm-suppress ImpureFunctionCall - is is only a warning
10844
             */
10845
            \trigger_error('UTF8::strstr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10846
        }
10847
10848
        //
10849
        // fallback via intl
10850
        //
10851
10852
        if (
10853
            $encoding === 'UTF-8' // INFO: "grapheme_strstr()" can't handle other encodings
10854
            &&
10855
            self::$SUPPORT['intl'] === true
10856
        ) {
10857
            $return_tmp = \grapheme_strstr($haystack, $needle, $before_needle);
10858
            if ($return_tmp !== false) {
10859
                return $return_tmp;
10860
            }
10861
        }
10862
10863
        //
10864
        // fallback for ascii only
10865
        //
10866
10867
        if (ASCII::is_ascii($haystack . $needle)) {
10868
            return \strstr($haystack, $needle, $before_needle);
10869
        }
10870
10871
        //
10872
        // fallback via vanilla php
10873
        //
10874
10875
        \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/us', $haystack, $match);
10876
10877
        if (!isset($match[1])) {
10878
            return false;
10879
        }
10880
10881
        if ($before_needle) {
10882
            return $match[1];
10883
        }
10884
10885
        return self::substr($haystack, (int) self::strlen($match[1]));
10886
    }
10887
10888
    /**
10889
     * Finds first occurrence of a string within another.
10890
     *
10891
     * @param string $haystack      <p>
10892
     *                              The string from which to get the first occurrence
10893
     *                              of needle.
10894
     *                              </p>
10895
     * @param string $needle        <p>
10896
     *                              The string to find in haystack.
10897
     *                              </p>
10898
     * @param bool   $before_needle [optional] <p>
10899
     *                              Determines which portion of haystack
10900
     *                              this function returns.
10901
     *                              If set to true, it returns all of haystack
10902
     *                              from the beginning to the first occurrence of needle.
10903
     *                              If set to false, it returns all of haystack
10904
     *                              from the first occurrence of needle to the end,
10905
     *                              </p>
10906
     *
10907
     * @psalm-pure
10908
     *
10909
     * @return false|string
10910
     *                      <p>The portion of haystack,
10911
     *                      or false if needle is not found.</p>
10912
     */
10913 2
    public static function strstr_in_byte(
10914
        string $haystack,
10915
        string $needle,
10916
        bool $before_needle = false
10917
    ) {
10918 2
        if ($haystack === '' || $needle === '') {
10919
            return false;
10920
        }
10921
10922 2
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10923
            // "mb_" is available if overload is used, so use it ...
10924
            return \mb_strstr($haystack, $needle, $before_needle, 'CP850'); // 8-BIT
10925
        }
10926
10927 2
        return \strstr($haystack, $needle, $before_needle);
10928
    }
10929
10930
    /**
10931
     * Unicode transformation for case-less matching.
10932
     *
10933
     * EXAMPLE: <code>UTF8::strtocasefold('ǰ◌̱'); // 'ǰ◌̱'</code>
10934
     *
10935
     * @see http://unicode.org/reports/tr21/tr21-5.html
10936
     *
10937
     * @param string      $str        <p>The input string.</p>
10938
     * @param bool        $full       [optional] <p>
10939
     *                                <b>true</b>, replace full case folding chars (default)<br>
10940
     *                                <b>false</b>, use only limited static array [UTF8::$COMMON_CASE_FOLD]
10941
     *                                </p>
10942
     * @param bool        $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10943
     * @param string      $encoding   [optional] <p>Set the charset.</p>
10944
     * @param string|null $lang       [optional] <p>Set the language for special cases: az, el, lt, tr</p>
10945
     * @param bool        $lower      [optional] <p>Use lowercase string, otherwise use uppercase string. PS: uppercase
10946
     *                                is for some languages better ...</p>
10947
     *
10948
     * @psalm-pure
10949
     *
10950
     * @return string
10951
     */
10952 32
    public static function strtocasefold(
10953
        string $str,
10954
        bool $full = true,
10955
        bool $clean_utf8 = false,
10956
        string $encoding = 'UTF-8',
10957
        string $lang = null,
10958
        bool $lower = true
10959
    ): string {
10960 32
        if ($str === '') {
10961 5
            return '';
10962
        }
10963
10964 31
        if ($clean_utf8) {
10965
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10966
            // if invalid characters are found in $haystack before $needle
10967 2
            $str = self::clean($str);
10968
        }
10969
10970 31
        $str = self::fixStrCaseHelper($str, $lower, $full);
10971
10972 31
        if ($lang === null && $encoding === 'UTF-8') {
10973 31
            if ($lower) {
10974 2
                return \mb_strtolower($str);
10975
            }
10976
10977 29
            return \mb_strtoupper($str);
10978
        }
10979
10980 2
        if ($lower) {
10981
            return self::strtolower($str, $encoding, false, $lang);
10982
        }
10983
10984 2
        return self::strtoupper($str, $encoding, false, $lang);
10985
    }
10986
10987
    /**
10988
     * Make a string lowercase.
10989
     *
10990
     * EXAMPLE: <code>UTF8::strtolower('DÉJÀ Σσς Iıİi'); // 'déjà σσς iıii'</code>
10991
     *
10992
     * @see http://php.net/manual/en/function.mb-strtolower.php
10993
     *
10994
     * @param string      $str                           <p>The string being lowercased.</p>
10995
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
10996
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
10997
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
10998
     *                                                   tr</p>
10999
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
11000
     *                                                   -> ß</p>
11001
     *
11002
     * @psalm-pure
11003
     *
11004
     * @return string
11005
     *                <p>String with all alphabetic characters converted to lowercase.</p>
11006
     */
11007 73
    public static function strtolower(
11008
        $str,
11009
        string $encoding = 'UTF-8',
11010
        bool $clean_utf8 = false,
11011
        string $lang = null,
11012
        bool $try_to_keep_the_string_length = false
11013
    ): string {
11014
        // init
11015 73
        $str = (string) $str;
11016
11017 73
        if ($str === '') {
11018 1
            return '';
11019
        }
11020
11021 72
        if ($clean_utf8) {
11022
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11023
            // if invalid characters are found in $haystack before $needle
11024 2
            $str = self::clean($str);
11025
        }
11026
11027
        // hack for old php version or for the polyfill ...
11028 72
        if ($try_to_keep_the_string_length) {
11029
            $str = self::fixStrCaseHelper($str, true);
11030
        }
11031
11032 72
        if ($lang === null && $encoding === 'UTF-8') {
11033 13
            return \mb_strtolower($str);
11034
        }
11035
11036 61
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
11037
11038 61
        if ($lang !== null) {
11039 2
            if (self::$SUPPORT['intl'] === true) {
11040 2
                if (self::$INTL_TRANSLITERATOR_LIST === null) {
11041
                    self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
11042
                }
11043
11044 2
                $language_code = $lang . '-Lower';
11045 2
                if (!\in_array($language_code, self::$INTL_TRANSLITERATOR_LIST, true)) {
11046
                    /**
11047
                     * @psalm-suppress ImpureFunctionCall - is is only a warning
11048
                     */
11049
                    \trigger_error('UTF8::strtolower() cannot handle special language: ' . $lang . ' | supported: ' . \print_r(self::$INTL_TRANSLITERATOR_LIST, true), \E_USER_WARNING);
0 ignored issues
show
Bug introduced by
Are you sure print_r(self::INTL_TRANSLITERATOR_LIST, true) of type string|true can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

11049
                    \trigger_error('UTF8::strtolower() cannot handle special language: ' . $lang . ' | supported: ' . /** @scrutinizer ignore-type */ \print_r(self::$INTL_TRANSLITERATOR_LIST, true), \E_USER_WARNING);
Loading history...
11050
11051
                    $language_code = 'Any-Lower';
11052
                }
11053
11054 2
                return (string) \transliterator_transliterate($language_code, $str);
11055
            }
11056
11057
            /**
11058
             * @psalm-suppress ImpureFunctionCall - is is only a warning
11059
             */
11060
            \trigger_error('UTF8::strtolower() without intl cannot handle the "lang" parameter: ' . $lang, \E_USER_WARNING);
11061
        }
11062
11063
        // always fallback via symfony polyfill
11064 61
        return \mb_strtolower($str, $encoding);
11065
    }
11066
11067
    /**
11068
     * Make a string uppercase.
11069
     *
11070
     * EXAMPLE: <code>UTF8::strtoupper('Déjà Σσς Iıİi'); // 'DÉJÀ ΣΣΣ IIİI'</code>
11071
     *
11072
     * @see http://php.net/manual/en/function.mb-strtoupper.php
11073
     *
11074
     * @param string      $str                           <p>The string being uppercased.</p>
11075
     * @param string      $encoding                      [optional] <p>Set the charset.</p>
11076
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
11077
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
11078
     *                                                   tr</p>
11079
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
11080
     *                                                   -> ß</p>
11081
     *
11082
     * @psalm-pure
11083
     *
11084
     * @return string
11085
     *                <p>String with all alphabetic characters converted to uppercase.</p>
11086
     */
11087 17
    public static function strtoupper(
11088
        $str,
11089
        string $encoding = 'UTF-8',
11090
        bool $clean_utf8 = false,
11091
        string $lang = null,
11092
        bool $try_to_keep_the_string_length = false
11093
    ): string {
11094
        // init
11095 17
        $str = (string) $str;
11096
11097 17
        if ($str === '') {
11098 1
            return '';
11099
        }
11100
11101 16
        if ($clean_utf8) {
11102
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11103
            // if invalid characters are found in $haystack before $needle
11104 2
            $str = self::clean($str);
11105
        }
11106
11107
        // hack for old php version or for the polyfill ...
11108 16
        if ($try_to_keep_the_string_length) {
11109 2
            $str = self::fixStrCaseHelper($str);
11110
        }
11111
11112 16
        if ($lang === null && $encoding === 'UTF-8') {
11113 8
            return \mb_strtoupper($str);
11114
        }
11115
11116 10
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
11117
11118 10
        if ($lang !== null) {
11119 2
            if (self::$SUPPORT['intl'] === true) {
11120 2
                if (self::$INTL_TRANSLITERATOR_LIST === null) {
11121
                    self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
11122
                }
11123
11124 2
                $language_code = $lang . '-Upper';
11125 2
                if (!\in_array($language_code, self::$INTL_TRANSLITERATOR_LIST, true)) {
11126
                    /**
11127
                     * @psalm-suppress ImpureFunctionCall - is is only a warning
11128
                     */
11129
                    \trigger_error('UTF8::strtoupper() without intl for special language: ' . $lang, \E_USER_WARNING);
11130
11131
                    $language_code = 'Any-Upper';
11132
                }
11133
11134 2
                return (string) \transliterator_transliterate($language_code, $str);
11135
            }
11136
11137
            /**
11138
             * @psalm-suppress ImpureFunctionCall - is is only a warning
11139
             */
11140
            \trigger_error('UTF8::strtolower() without intl cannot handle the "lang"-parameter: ' . $lang, \E_USER_WARNING);
11141
        }
11142
11143
        // always fallback via symfony polyfill
11144 10
        return \mb_strtoupper($str, $encoding);
11145
    }
11146
11147
    /**
11148
     * Translate characters or replace sub-strings.
11149
     *
11150
     * EXAMPLE:
11151
     * <code>
11152
     * $array = [
11153
     *     'Hello'   => '○●◎',
11154
     *     '中文空白' => 'earth',
11155
     * ];
11156
     * UTF8::strtr('Hello 中文空白', $array); // '○●◎ earth'
11157
     * </code>
11158
     *
11159
     * @see http://php.net/manual/en/function.strtr.php
11160
     *
11161
     * @param string          $str  <p>The string being translated.</p>
11162
     * @param string|string[] $from <p>The string replacing from.</p>
11163
     * @param string|string[] $to   [optional] <p>The string being translated to to.</p>
11164
     *
11165
     * @psalm-pure
11166
     *
11167
     * @return string
11168
     *                <p>This function returns a copy of str, translating all occurrences of each character in "from"
11169
     *                to the corresponding character in "to".</p>
11170
     */
11171 2
    public static function strtr(string $str, $from, $to = ''): string
11172
    {
11173 2
        if ($str === '') {
11174
            return '';
11175
        }
11176
11177 2
        if ($from === $to) {
11178
            return $str;
11179
        }
11180
11181 2
        if ($to !== '') {
11182 2
            if (!\is_array($from)) {
11183 2
                $from = self::str_split($from);
11184
            }
11185
11186 2
            if (!\is_array($to)) {
11187 2
                $to = self::str_split($to);
11188
            }
11189
11190 2
            $count_from = \count($from);
11191 2
            $count_to = \count($to);
11192
11193 2
            if ($count_from !== $count_to) {
11194 2
                if ($count_from > $count_to) {
11195 2
                    $from = \array_slice($from, 0, $count_to);
11196 2
                } elseif ($count_from < $count_to) {
11197 2
                    $to = \array_slice($to, 0, $count_from);
11198
                }
11199
            }
11200
11201 2
            $from = \array_combine($from, $to);
11202 2
            if ($from === false) {
11203
                throw new \InvalidArgumentException('The number of elements for each array isn\'t equal or the arrays are empty: (from: ' . \print_r($from, true) . ' | to: ' . \print_r($to, true) . ')');
0 ignored issues
show
Bug introduced by
Are you sure print_r($from, true) of type string|true can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

11203
                throw new \InvalidArgumentException('The number of elements for each array isn\'t equal or the arrays are empty: (from: ' . /** @scrutinizer ignore-type */ \print_r($from, true) . ' | to: ' . \print_r($to, true) . ')');
Loading history...
Bug introduced by
Are you sure print_r($to, true) of type string|true can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

11203
                throw new \InvalidArgumentException('The number of elements for each array isn\'t equal or the arrays are empty: (from: ' . \print_r($from, true) . ' | to: ' . /** @scrutinizer ignore-type */ \print_r($to, true) . ')');
Loading history...
11204
            }
11205
        }
11206
11207 2
        if (\is_string($from)) {
11208 2
            return \str_replace($from, $to, $str);
0 ignored issues
show
Bug introduced by
It seems like $to can also be of type array<mixed,string[]>; however, parameter $replace of str_replace() does only seem to accept string|string[], maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

11208
            return \str_replace($from, /** @scrutinizer ignore-type */ $to, $str);
Loading history...
11209
        }
11210
11211 2
        return \strtr($str, $from);
11212
    }
11213
11214
    /**
11215
     * Return the width of a string.
11216
     *
11217
     * INFO: use UTF8::strlen() for the byte-length
11218
     *
11219
     * EXAMPLE: <code>UTF8::strwidth("Iñtërnâtiôn\xE9àlizætiøn")); // 21</code>
11220
     *
11221
     * @param string $str        <p>The input string.</p>
11222
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
11223
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
11224
     *
11225
     * @psalm-pure
11226
     *
11227
     * @return int
11228
     */
11229 2
    public static function strwidth(
11230
        string $str,
11231
        string $encoding = 'UTF-8',
11232
        bool $clean_utf8 = false
11233
    ): int {
11234 2
        if ($str === '') {
11235 2
            return 0;
11236
        }
11237
11238 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
11239 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11240
        }
11241
11242 2
        if ($clean_utf8) {
11243
            // iconv and mbstring are not tolerant to invalid encoding
11244
            // further, their behaviour is inconsistent with that of PHP's substr
11245 2
            $str = self::clean($str);
11246
        }
11247
11248
        //
11249
        // fallback via mbstring
11250
        //
11251
11252 2
        if (self::$SUPPORT['mbstring'] === true) {
11253 2
            if ($encoding === 'UTF-8') {
11254 2
                return \mb_strwidth($str);
11255
            }
11256
11257
            return \mb_strwidth($str, $encoding);
11258
        }
11259
11260
        //
11261
        // fallback via vanilla php
11262
        //
11263
11264
        if ($encoding !== 'UTF-8') {
11265
            $str = self::encode('UTF-8', $str, false, $encoding);
11266
        }
11267
11268
        $wide = 0;
11269
        $str = (string) \preg_replace('/[\x{1100}-\x{115F}\x{2329}\x{232A}\x{2E80}-\x{303E}\x{3040}-\x{A4CF}\x{AC00}-\x{D7A3}\x{F900}-\x{FAFF}\x{FE10}-\x{FE19}\x{FE30}-\x{FE6F}\x{FF00}-\x{FF60}\x{FFE0}-\x{FFE6}\x{20000}-\x{2FFFD}\x{30000}-\x{3FFFD}]/u', '', $str, -1, $wide);
11270
11271
        return ($wide << 1) + (int) self::strlen($str);
11272
    }
11273
11274
    /**
11275
     * Get part of a string.
11276
     *
11277
     * EXAMPLE: <code>UTF8::substr('中文空白', 1, 2); // '文空'</code>
11278
     *
11279
     * @see http://php.net/manual/en/function.mb-substr.php
11280
     *
11281
     * @param string   $str        <p>The string being checked.</p>
11282
     * @param int      $offset     <p>The first position used in str.</p>
11283
     * @param int|null $length     [optional] <p>The maximum length of the returned string.</p>
11284
     * @param string   $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
11285
     * @param bool     $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
11286
     *
11287
     * @psalm-pure
11288
     *
11289
     * @return false|string
11290
     *                      The portion of <i>str</i> specified by the <i>offset</i> and
11291
     *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
11292
     *                      characters long, <b>FALSE</b> will be returned.
11293
     */
11294 172
    public static function substr(
11295
        string $str,
11296
        int $offset = 0,
11297
        int $length = null,
11298
        string $encoding = 'UTF-8',
11299
        bool $clean_utf8 = false
11300
    ) {
11301
        // empty string
11302 172
        if ($str === '' || $length === 0) {
11303 8
            return '';
11304
        }
11305
11306 168
        if ($clean_utf8) {
11307
            // iconv and mbstring are not tolerant to invalid encoding
11308
            // further, their behaviour is inconsistent with that of PHP's substr
11309 2
            $str = self::clean($str);
11310
        }
11311
11312
        // whole string
11313 168
        if (!$offset && $length === null) {
11314 7
            return $str;
11315
        }
11316
11317 163
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
11318 19
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11319
        }
11320
11321
        //
11322
        // fallback via mbstring
11323
        //
11324
11325 163
        if (self::$SUPPORT['mbstring'] === true && $encoding === 'UTF-8') {
11326 161
            if ($length === null) {
11327 64
                return \mb_substr($str, $offset);
11328
            }
11329
11330 102
            return \mb_substr($str, $offset, $length);
11331
        }
11332
11333
        //
11334
        // fallback for binary || ascii only
11335
        //
11336
11337
        if (
11338 4
            $encoding === 'CP850'
11339
            ||
11340 4
            $encoding === 'ASCII'
11341
        ) {
11342
            if ($length === null) {
11343
                return \substr($str, $offset);
11344
            }
11345
11346
            return \substr($str, $offset, $length);
11347
        }
11348
11349
        // otherwise we need the string-length
11350 4
        $str_length = 0;
11351 4
        if ($offset || $length === null) {
11352 4
            $str_length = self::strlen($str, $encoding);
11353
        }
11354
11355
        // e.g.: invalid chars + mbstring not installed
11356 4
        if ($str_length === false) {
11357
            return false;
11358
        }
11359
11360
        // empty string
11361 4
        if ($offset === $str_length && !$length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type integer|null is loosely compared to false; this is ambiguous if the integer can be 0. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
11362
            return '';
11363
        }
11364
11365
        // impossible
11366 4
        if ($offset && $offset > $str_length) {
11367
            return '';
11368
        }
11369
11370 4
        $length = $length ?? $str_length;
11371
11372
        if (
11373 4
            $encoding !== 'UTF-8'
11374
            &&
11375 4
            self::$SUPPORT['mbstring'] === false
11376
        ) {
11377
            /**
11378
             * @psalm-suppress ImpureFunctionCall - is is only a warning
11379
             */
11380 2
            \trigger_error('UTF8::substr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
11381
        }
11382
11383
        //
11384
        // fallback via intl
11385
        //
11386
11387
        if (
11388 4
            $encoding === 'UTF-8' // INFO: "grapheme_substr()" can't handle other encodings
11389
            &&
11390 4
            $offset >= 0 // grapheme_substr() can't handle negative offset
11391
            &&
11392 4
            self::$SUPPORT['intl'] === true
11393
        ) {
11394
            $return_tmp = \grapheme_substr($str, $offset, $length);
11395
            if ($return_tmp !== false) {
11396
                return $return_tmp;
11397
            }
11398
        }
11399
11400
        //
11401
        // fallback via iconv
11402
        //
11403
11404
        if (
11405 4
            $length >= 0 // "iconv_substr()" can't handle negative length
11406
            &&
11407 4
            self::$SUPPORT['iconv'] === true
11408
        ) {
11409
            $return_tmp = \iconv_substr($str, $offset, $length);
11410
            if ($return_tmp !== false) {
11411
                return $return_tmp;
11412
            }
11413
        }
11414
11415
        //
11416
        // fallback for ascii only
11417
        //
11418
11419 4
        if (ASCII::is_ascii($str)) {
11420
            return \substr($str, $offset, $length);
11421
        }
11422
11423
        //
11424
        // fallback via vanilla php
11425
        //
11426
11427
        // split to array, and remove invalid characters
11428
        // &&
11429
        // extract relevant part, and join to make sting again
11430 4
        return \implode('', \array_slice(self::str_split($str), $offset, $length));
11431
    }
11432
11433
    /**
11434
     * Binary-safe comparison of two strings from an offset, up to a length of characters.
11435
     *
11436
     * EXAMPLE: <code>
11437
     * UTF8::substr_compare("○●◎\r", '●◎', 0, 2); // -1
11438
     * UTF8::substr_compare("○●◎\r", '◎●', 1, 2); // 1
11439
     * UTF8::substr_compare("○●◎\r", '●◎', 1, 2); // 0
11440
     * </code>
11441
     *
11442
     * @param string   $str1               <p>The main string being compared.</p>
11443
     * @param string   $str2               <p>The secondary string being compared.</p>
11444
     * @param int      $offset             [optional] <p>The start position for the comparison. If negative, it starts
11445
     *                                     counting from the end of the string.</p>
11446
     * @param int|null $length             [optional] <p>The length of the comparison. The default value is the largest
11447
     *                                     of the length of the str compared to the length of main_str less the
11448
     *                                     offset.</p>
11449
     * @param bool     $case_insensitivity [optional] <p>If case_insensitivity is TRUE, comparison is case
11450
     *                                     insensitive.</p>
11451
     * @param string   $encoding           [optional] <p>Set the charset for e.g. "mb_" function</p>
11452
     *
11453
     * @psalm-pure
11454
     *
11455
     * @return int
11456
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
11457
     *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
11458
     *             <strong>0</strong> if they are equal
11459
     */
11460 2
    public static function substr_compare(
11461
        string $str1,
11462
        string $str2,
11463
        int $offset = 0,
11464
        int $length = null,
11465
        bool $case_insensitivity = false,
11466
        string $encoding = 'UTF-8'
11467
    ): int {
11468
        if (
11469 2
            $offset !== 0
11470
            ||
11471 2
            $length !== null
11472
        ) {
11473 2
            if ($encoding === 'UTF-8') {
11474 2
                if ($length === null) {
11475 2
                    $str1 = (string) \mb_substr($str1, $offset);
11476
                } else {
11477 2
                    $str1 = (string) \mb_substr($str1, $offset, $length);
11478
                }
11479 2
                $str2 = (string) \mb_substr($str2, 0, (int) self::strlen($str1));
11480
            } else {
11481
                $encoding = self::normalize_encoding($encoding, 'UTF-8');
11482
11483
                $str1 = (string) self::substr($str1, $offset, $length, $encoding);
11484
                $str2 = (string) self::substr($str2, 0, (int) self::strlen($str1), $encoding);
11485
            }
11486
        }
11487
11488 2
        if ($case_insensitivity) {
11489 2
            return self::strcasecmp($str1, $str2, $encoding);
11490
        }
11491
11492 2
        return self::strcmp($str1, $str2);
11493
    }
11494
11495
    /**
11496
     * Count the number of substring occurrences.
11497
     *
11498
     * EXAMPLE: <code>UTF8::substr_count('中文空白', '文空', 1, 2); // 1</code>
11499
     *
11500
     * @see http://php.net/manual/en/function.substr-count.php
11501
     *
11502
     * @param string   $haystack   <p>The string to search in.</p>
11503
     * @param string   $needle     <p>The substring to search for.</p>
11504
     * @param int      $offset     [optional] <p>The offset where to start counting.</p>
11505
     * @param int|null $length     [optional] <p>
11506
     *                             The maximum length after the specified offset to search for the
11507
     *                             substring. It outputs a warning if the offset plus the length is
11508
     *                             greater than the haystack length.
11509
     *                             </p>
11510
     * @param string   $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
11511
     * @param bool     $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
11512
     *
11513
     * @psalm-pure
11514
     *
11515
     * @return false|int
11516
     *                   <p>This functions returns an integer or false if there isn't a string.</p>
11517
     */
11518 5
    public static function substr_count(
11519
        string $haystack,
11520
        string $needle,
11521
        int $offset = 0,
11522
        int $length = null,
11523
        string $encoding = 'UTF-8',
11524
        bool $clean_utf8 = false
11525
    ) {
11526 5
        if ($needle === '') {
11527 2
            return false;
11528
        }
11529
11530 5
        if ($haystack === '') {
11531 2
            if (\PHP_VERSION_ID >= 80000) {
11532
                return 0;
11533
            }
11534
11535 2
            return 0;
11536
        }
11537
11538 5
        if ($length === 0) {
11539 2
            return 0;
11540
        }
11541
11542 5
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
11543 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11544
        }
11545
11546 5
        if ($clean_utf8) {
11547
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11548
            // if invalid characters are found in $haystack before $needle
11549
            $needle = self::clean($needle);
11550
            $haystack = self::clean($haystack);
11551
        }
11552
11553 5
        if ($offset || $length > 0) {
11554 2
            if ($length === null) {
11555 2
                $length_tmp = self::strlen($haystack, $encoding);
11556 2
                if ($length_tmp === false) {
11557
                    return false;
11558
                }
11559 2
                $length = $length_tmp;
11560
            }
11561
11562 2
            if ($encoding === 'UTF-8') {
11563 2
                $haystack = (string) \mb_substr($haystack, $offset, $length);
11564
            } else {
11565 2
                $haystack = (string) \mb_substr($haystack, $offset, $length, $encoding);
11566
            }
11567
        }
11568
11569
        if (
11570 5
            $encoding !== 'UTF-8'
11571
            &&
11572 5
            self::$SUPPORT['mbstring'] === false
11573
        ) {
11574
            /**
11575
             * @psalm-suppress ImpureFunctionCall - is is only a warning
11576
             */
11577
            \trigger_error('UTF8::substr_count() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
11578
        }
11579
11580 5
        if (self::$SUPPORT['mbstring'] === true) {
11581 5
            if ($encoding === 'UTF-8') {
11582 5
                return \mb_substr_count($haystack, $needle);
11583
            }
11584
11585 2
            return \mb_substr_count($haystack, $needle, $encoding);
11586
        }
11587
11588
        \preg_match_all('/' . \preg_quote($needle, '/') . '/us', $haystack, $matches, \PREG_SET_ORDER);
11589
11590
        return \count($matches);
11591
    }
11592
11593
    /**
11594
     * Count the number of substring occurrences.
11595
     *
11596
     * @param string   $haystack <p>
11597
     *                           The string being checked.
11598
     *                           </p>
11599
     * @param string   $needle   <p>
11600
     *                           The string being found.
11601
     *                           </p>
11602
     * @param int      $offset   [optional] <p>
11603
     *                           The offset where to start counting
11604
     *                           </p>
11605
     * @param int|null $length   [optional] <p>
11606
     *                           The maximum length after the specified offset to search for the
11607
     *                           substring. It outputs a warning if the offset plus the length is
11608
     *                           greater than the haystack length.
11609
     *                           </p>
11610
     *
11611
     * @psalm-pure
11612
     *
11613
     * @return false|int
11614
     *                   <p>The number of times the
11615
     *                   needle substring occurs in the
11616
     *                   haystack string.</p>
11617
     */
11618 4
    public static function substr_count_in_byte(
11619
        string $haystack,
11620
        string $needle,
11621
        int $offset = 0,
11622
        int $length = null
11623
    ) {
11624 4
        if ($haystack === '' || $needle === '') {
11625 1
            return 0;
11626
        }
11627
11628
        if (
11629 3
            ($offset || $length !== null)
11630
            &&
11631 3
            self::$SUPPORT['mbstring_func_overload'] === true
11632
        ) {
11633
            if ($length === null) {
11634
                $length_tmp = self::strlen($haystack);
11635
                if ($length_tmp === false) {
11636
                    return false;
11637
                }
11638
                $length = $length_tmp;
11639
            }
11640
11641
            if (
11642
                (
11643
                    $length !== 0
11644
                    &&
11645
                    $offset !== 0
11646
                )
11647
                &&
11648
                ($length + $offset) <= 0
11649
                &&
11650
                \PHP_VERSION_ID < 71000 // output from "substr_count()" have changed in PHP 7.1
11651
            ) {
11652
                return false;
11653
            }
11654
11655
            /** @var false|string $haystack_tmp - needed for PhpStan (stubs error) */
11656
            $haystack_tmp = \substr($haystack, $offset, $length);
11657
            if ($haystack_tmp === false) {
11658
                $haystack_tmp = '';
11659
            }
11660
            $haystack = (string) $haystack_tmp;
11661
        }
11662
11663 3
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
11664
            // "mb_" is available if overload is used, so use it ...
11665
            return \mb_substr_count($haystack, $needle, 'CP850'); // 8-BIT
11666
        }
11667
11668 3
        if ($length === null) {
11669 3
            return \substr_count($haystack, $needle, $offset);
11670
        }
11671
11672
        return \substr_count($haystack, $needle, $offset, $length);
11673
    }
11674
11675
    /**
11676
     * Returns the number of occurrences of $substring in the given string.
11677
     * By default, the comparison is case-sensitive, but can be made insensitive
11678
     * by setting $case_sensitive to false.
11679
     *
11680
     * @param string $str            <p>The input string.</p>
11681
     * @param string $substring      <p>The substring to search for.</p>
11682
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
11683
     * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
11684
     *
11685
     * @psalm-pure
11686
     *
11687
     * @return int
11688
     */
11689 15
    public static function substr_count_simple(
11690
        string $str,
11691
        string $substring,
11692
        bool $case_sensitive = true,
11693
        string $encoding = 'UTF-8'
11694
    ): int {
11695 15
        if ($str === '' || $substring === '') {
11696 2
            return 0;
11697
        }
11698
11699 13
        if ($encoding === 'UTF-8') {
11700 7
            if ($case_sensitive) {
11701
                return (int) \mb_substr_count($str, $substring);
11702
            }
11703
11704 7
            return (int) \mb_substr_count(
11705 7
                \mb_strtoupper($str),
11706 7
                \mb_strtoupper($substring)
11707
            );
11708
        }
11709
11710 6
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
11711
11712 6
        if ($case_sensitive) {
11713 3
            return (int) \mb_substr_count($str, $substring, $encoding);
11714
        }
11715
11716 3
        return (int) \mb_substr_count(
11717 3
            self::strtocasefold($str, true, false, $encoding, null, false),
11718 3
            self::strtocasefold($substring, true, false, $encoding, null, false),
11719 3
            $encoding
11720
        );
11721
    }
11722
11723
    /**
11724
     * Removes a prefix ($needle) from the beginning of the string ($haystack), case-insensitive.
11725
     *
11726
     * EXMAPLE: <code>
11727
     * UTF8::substr_ileft('ΚόσμεMiddleEnd', 'Κόσμε'); // 'MiddleEnd'
11728
     * UTF8::substr_ileft('ΚόσμεMiddleEnd', 'κόσμε'); // 'MiddleEnd'
11729
     * </code>
11730
     *
11731
     * @param string $haystack <p>The string to search in.</p>
11732
     * @param string $needle   <p>The substring to search for.</p>
11733
     *
11734
     * @psalm-pure
11735
     *
11736
     * @return string
11737
     *                <p>Return the sub-string.</p>
11738
     */
11739 2
    public static function substr_ileft(string $haystack, string $needle): string
11740
    {
11741 2
        if ($haystack === '') {
11742 2
            return '';
11743
        }
11744
11745 2
        if ($needle === '') {
11746 2
            return $haystack;
11747
        }
11748
11749 2
        if (self::str_istarts_with($haystack, $needle)) {
11750 2
            $haystack = (string) \mb_substr($haystack, (int) self::strlen($needle));
11751
        }
11752
11753 2
        return $haystack;
11754
    }
11755
11756
    /**
11757
     * Get part of a string process in bytes.
11758
     *
11759
     * @param string   $str    <p>The string being checked.</p>
11760
     * @param int      $offset <p>The first position used in str.</p>
11761
     * @param int|null $length [optional] <p>The maximum length of the returned string.</p>
11762
     *
11763
     * @psalm-pure
11764
     *
11765
     * @return false|string
11766
     *                      The portion of <i>str</i> specified by the <i>offset</i> and
11767
     *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
11768
     *                      characters long, <b>FALSE</b> will be returned.
11769
     */
11770 1
    public static function substr_in_byte(string $str, int $offset = 0, int $length = null)
11771
    {
11772
        // empty string
11773 1
        if ($str === '' || $length === 0) {
11774
            return '';
11775
        }
11776
11777
        // whole string
11778 1
        if (!$offset && $length === null) {
11779
            return $str;
11780
        }
11781
11782 1
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
11783
            // "mb_" is available if overload is used, so use it ...
11784
            return \mb_substr($str, $offset, $length, 'CP850'); // 8-BIT
11785
        }
11786
11787 1
        return \substr($str, $offset, $length ?? 2147483647);
11788
    }
11789
11790
    /**
11791
     * Removes a suffix ($needle) from the end of the string ($haystack), case-insensitive.
11792
     *
11793
     * EXAMPLE: <code>
11794
     * UTF8::substr_iright('BeginMiddleΚόσμε', 'Κόσμε'); // 'BeginMiddle'
11795
     * UTF8::substr_iright('BeginMiddleΚόσμε', 'κόσμε'); // 'BeginMiddle'
11796
     * </code>
11797
     *
11798
     * @param string $haystack <p>The string to search in.</p>
11799
     * @param string $needle   <p>The substring to search for.</p>
11800
     *
11801
     * @psalm-pure
11802
     *
11803
     * @return string
11804
     *                <p>Return the sub-string.<p>
11805
     */
11806 2
    public static function substr_iright(string $haystack, string $needle): string
11807
    {
11808 2
        if ($haystack === '') {
11809 2
            return '';
11810
        }
11811
11812 2
        if ($needle === '') {
11813 2
            return $haystack;
11814
        }
11815
11816 2
        if (self::str_iends_with($haystack, $needle)) {
11817 2
            $haystack = (string) \mb_substr($haystack, 0, (int) self::strlen($haystack) - (int) self::strlen($needle));
11818
        }
11819
11820 2
        return $haystack;
11821
    }
11822
11823
    /**
11824
     * Removes a prefix ($needle) from the beginning of the string ($haystack).
11825
     *
11826
     * EXAMPLE: <code>
11827
     * UTF8::substr_left('ΚόσμεMiddleEnd', 'Κόσμε'); // 'MiddleEnd'
11828
     * UTF8::substr_left('ΚόσμεMiddleEnd', 'κόσμε'); // 'ΚόσμεMiddleEnd'
11829
     * </code>
11830
     *
11831
     * @param string $haystack <p>The string to search in.</p>
11832
     * @param string $needle   <p>The substring to search for.</p>
11833
     *
11834
     * @psalm-pure
11835
     *
11836
     * @return string
11837
     *                <p>Return the sub-string.</p>
11838
     */
11839 2
    public static function substr_left(string $haystack, string $needle): string
11840
    {
11841 2
        if ($haystack === '') {
11842 2
            return '';
11843
        }
11844
11845 2
        if ($needle === '') {
11846 2
            return $haystack;
11847
        }
11848
11849 2
        if (self::str_starts_with($haystack, $needle)) {
11850 2
            $haystack = (string) \mb_substr($haystack, (int) self::strlen($needle));
11851
        }
11852
11853 2
        return $haystack;
11854
    }
11855
11856
    /**
11857
     * Replace text within a portion of a string.
11858
     *
11859
     * EXAMPLE: <code>UTF8::substr_replace(array('Iñtërnâtiônàlizætiøn', 'foo'), 'æ', 1); // array('Iæñtërnâtiônàlizætiøn', 'fæoo')</code>
11860
     *
11861
     * source: https://gist.github.com/stemar/8287074
11862
     *
11863
     * @param string|string[] $str         <p>The input string or an array of stings.</p>
11864
     * @param string|string[] $replacement <p>The replacement string or an array of stings.</p>
11865
     * @param int|int[]       $offset      <p>
11866
     *                                     If start is positive, the replacing will begin at the start'th offset
11867
     *                                     into string.
11868
     *                                     <br><br>
11869
     *                                     If start is negative, the replacing will begin at the start'th character
11870
     *                                     from the end of string.
11871
     *                                     </p>
11872
     * @param int|int[]|null  $length      [optional] <p>If given and is positive, it represents the length of the
11873
     *                                     portion of string which is to be replaced. If it is negative, it
11874
     *                                     represents the number of characters from the end of string at which to
11875
     *                                     stop replacing. If it is not given, then it will default to strlen(
11876
     *                                     string ); i.e. end the replacing at the end of string. Of course, if
11877
     *                                     length is zero then this function will have the effect of inserting
11878
     *                                     replacement into string at the given start offset.</p>
11879
     * @param string          $encoding    [optional] <p>Set the charset for e.g. "mb_" function</p>
11880
     *
11881
     * @psalm-pure
11882
     *
11883
     * @return string|string[]
11884
     *                         <p>The result string is returned. If string is an array then array is returned.</p>
11885
     *
11886
     * @template TSubstrReplace
11887
     * @phpstan-param TSubstrReplace $str
11888
     * @phpstan-return TSubstrReplace
11889
     */
11890 10
    public static function substr_replace(
11891
        $str,
11892
        $replacement,
11893
        $offset,
11894
        $length = null,
11895
        string $encoding = 'UTF-8'
11896
    ) {
11897 10
        if (\is_array($str)) {
11898 1
            $num = \count($str);
11899
11900
            // the replacement
11901 1
            if (\is_array($replacement)) {
11902 1
                $replacement = \array_slice($replacement, 0, $num);
11903
            } else {
11904 1
                $replacement = \array_pad([$replacement], $num, $replacement);
11905
            }
11906
11907
            // the offset
11908 1
            if (\is_array($offset)) {
11909 1
                $offset = \array_slice($offset, 0, $num);
11910 1
                foreach ($offset as &$value_tmp) {
11911 1
                    $value_tmp = (int) $value_tmp === $value_tmp ? $value_tmp : 0;
11912
                }
11913 1
                unset($value_tmp);
11914
            } else {
11915 1
                $offset = \array_pad([$offset], $num, $offset);
11916
            }
11917
11918
            // the length
11919 1
            if ($length === null) {
11920 1
                $length = \array_fill(0, $num, 0);
11921 1
            } elseif (\is_array($length)) {
11922 1
                $length = \array_slice($length, 0, $num);
11923 1
                foreach ($length as &$value_tmp_V2) {
11924 1
                    $value_tmp_V2 = (int) $value_tmp_V2 === $value_tmp_V2 ? $value_tmp_V2 : $num;
11925
                }
11926 1
                unset($value_tmp_V2);
11927
            } else {
11928 1
                $length = \array_pad([$length], $num, $length);
11929
            }
11930
11931
            // recursive call
11932
            /** @phpstan-ignore-next-line - phpstan currently can't handle recursive calls */
11933 1
            return \array_map([self::class, 'substr_replace'], $str, $replacement, $offset, $length);
11934
        }
11935
11936 10
        if (\is_array($replacement)) {
11937 1
            if ($replacement !== []) {
11938 1
                $replacement = $replacement[0];
11939
            } else {
11940 1
                $replacement = '';
11941
            }
11942
        }
11943
11944
        // init
11945 10
        $str = (string) $str;
11946 10
        $replacement = (string) $replacement;
11947
11948 10
        if (\is_array($length)) {
11949
            throw new \InvalidArgumentException('Parameter "$length" can only be an array, if "$str" is also an array.');
11950
        }
11951
11952 10
        if (\is_array($offset)) {
11953
            throw new \InvalidArgumentException('Parameter "$offset" can only be an array, if "$str" is also an array.');
11954
        }
11955
11956 10
        if ($str === '') {
11957 1
            return $replacement;
11958
        }
11959
11960 9
        if (self::$SUPPORT['mbstring'] === true) {
11961 9
            $string_length = (int) self::strlen($str, $encoding);
11962
11963 9
            if ($offset < 0) {
11964 1
                $offset = (int) \max(0, $string_length + $offset);
11965 9
            } elseif ($offset > $string_length) {
11966 1
                $offset = $string_length;
11967
            }
11968
11969 9
            if ($length !== null && $length < 0) {
11970 1
                $length = (int) \max(0, $string_length - $offset + $length);
11971 9
            } elseif ($length === null || $length > $string_length) {
11972 4
                $length = $string_length;
11973
            }
11974
11975 9
            if (($offset + $length) > $string_length) {
11976 4
                $length = $string_length - $offset;
11977
            }
11978
11979 9
            return ((string) \mb_substr($str, 0, $offset, $encoding)) .
11980 9
                   $replacement .
11981 9
                   ((string) \mb_substr($str, $offset + $length, $string_length - $offset - $length, $encoding));
11982
        }
11983
11984
        //
11985
        // fallback for ascii only
11986
        //
11987
11988
        if (ASCII::is_ascii($str)) {
11989
            return ($length === null) ?
11990
                \substr_replace($str, $replacement, $offset) :
11991
                \substr_replace($str, $replacement, $offset, $length);
11992
        }
11993
11994
        //
11995
        // fallback via vanilla php
11996
        //
11997
11998
        \preg_match_all('/./us', $str, $str_matches);
11999
        \preg_match_all('/./us', $replacement, $replacement_matches);
12000
12001
        if ($length === null) {
12002
            $length_tmp = self::strlen($str, $encoding);
12003
            if ($length_tmp === false) {
12004
                // e.g.: non mbstring support + invalid chars
12005
                return '';
12006
            }
12007
            $length = $length_tmp;
12008
        }
12009
12010
        \array_splice($str_matches[0], $offset, $length, $replacement_matches[0]);
12011
12012
        return \implode('', $str_matches[0]);
12013
    }
12014
12015
    /**
12016
     * Removes a suffix ($needle) from the end of the string ($haystack).
12017
     *
12018
     * EXAMPLE: <code>
12019
     * UTF8::substr_right('BeginMiddleΚόσμε', 'Κόσμε'); // 'BeginMiddle'
12020
     * UTF8::substr_right('BeginMiddleΚόσμε', 'κόσμε'); // 'BeginMiddleΚόσμε'
12021
     * </code>
12022
     *
12023
     * @param string $haystack <p>The string to search in.</p>
12024
     * @param string $needle   <p>The substring to search for.</p>
12025
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
12026
     *
12027
     * @psalm-pure
12028
     *
12029
     * @return string
12030
     *                <p>Return the sub-string.</p>
12031
     */
12032 2
    public static function substr_right(
12033
        string $haystack,
12034
        string $needle,
12035
        string $encoding = 'UTF-8'
12036
    ): string {
12037 2
        if ($haystack === '') {
12038 2
            return '';
12039
        }
12040
12041 2
        if ($needle === '') {
12042 2
            return $haystack;
12043
        }
12044
12045
        if (
12046 2
            $encoding === 'UTF-8'
12047
            &&
12048 2
            \substr($haystack, -\strlen($needle)) === $needle
12049
        ) {
12050 2
            return (string) \mb_substr($haystack, 0, (int) \mb_strlen($haystack) - (int) \mb_strlen($needle));
12051
        }
12052
12053 2
        if (\substr($haystack, -\strlen($needle)) === $needle) {
12054
            return (string) self::substr(
12055
                $haystack,
12056
                0,
12057
                (int) self::strlen($haystack, $encoding) - (int) self::strlen($needle, $encoding),
12058
                $encoding
12059
            );
12060
        }
12061
12062 2
        return $haystack;
12063
    }
12064
12065
    /**
12066
     * Returns a case swapped version of the string.
12067
     *
12068
     * EXAMPLE: <code>UTF8::swapCase('déJÀ σσς iıII'); // 'DÉjà ΣΣΣ IIii'</code>
12069
     *
12070
     * @param string $str        <p>The input string.</p>
12071
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
12072
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
12073
     *
12074
     * @psalm-pure
12075
     *
12076
     * @return string
12077
     *                <p>Each character's case swapped.</p>
12078
     */
12079 6
    public static function swapCase(string $str, string $encoding = 'UTF-8', bool $clean_utf8 = false): string
12080
    {
12081 6
        if ($str === '') {
12082 1
            return '';
12083
        }
12084
12085 6
        if ($clean_utf8) {
12086
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
12087
            // if invalid characters are found in $haystack before $needle
12088 2
            $str = self::clean($str);
12089
        }
12090
12091 6
        if ($encoding === 'UTF-8') {
12092 4
            return (string) (\mb_strtolower($str) ^ \mb_strtoupper($str) ^ $str);
12093
        }
12094
12095 4
        return (string) (self::strtolower($str, $encoding) ^ self::strtoupper($str, $encoding) ^ $str);
12096
    }
12097
12098
    /**
12099
     * Checks whether symfony-polyfills are used.
12100
     *
12101
     * @psalm-pure
12102
     *
12103
     * @return bool
12104
     *              <p><strong>true</strong> if in use, <strong>false</strong> otherwise</p>
12105
     *
12106
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
12107
     */
12108
    public static function symfony_polyfill_used(): bool
12109
    {
12110
        // init
12111
        $return = false;
12112
12113
        $return_tmp = \extension_loaded('mbstring');
12114
        if (!$return_tmp && \function_exists('mb_strlen')) {
12115
            $return = true;
12116
        }
12117
12118
        $return_tmp = \extension_loaded('iconv');
12119
        if (!$return_tmp && \function_exists('iconv')) {
12120
            $return = true;
12121
        }
12122
12123
        return $return;
12124
    }
12125
12126
    /**
12127
     * @param string $str
12128
     * @param int    $tab_length
12129
     *
12130
     * @psalm-pure
12131
     *
12132
     * @return string
12133
     */
12134 6
    public static function tabs_to_spaces(string $str, int $tab_length = 4): string
12135
    {
12136 6
        if ($tab_length === 4) {
12137 3
            $spaces = '    ';
12138 3
        } elseif ($tab_length === 2) {
12139 1
            $spaces = '  ';
12140
        } else {
12141 2
            $spaces = \str_repeat(' ', $tab_length);
12142
        }
12143
12144 6
        return \str_replace("\t", $spaces, $str);
12145
    }
12146
12147
    /**
12148
     * Converts the first character of each word in the string to uppercase
12149
     * and all other chars to lowercase.
12150
     *
12151
     * @param string      $str                           <p>The input string.</p>
12152
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
12153
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
12154
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
12155
     *                                                   tr</p>
12156
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
12157
     *                                                   -> ß</p>
12158
     *
12159
     * @psalm-pure
12160
     *
12161
     * @return string
12162
     *                <p>A string with all characters of $str being title-cased.</p>
12163
     */
12164 5
    public static function titlecase(
12165
        string $str,
12166
        string $encoding = 'UTF-8',
12167
        bool $clean_utf8 = false,
12168
        string $lang = null,
12169
        bool $try_to_keep_the_string_length = false
12170
    ): string {
12171 5
        if ($clean_utf8) {
12172
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
12173
            // if invalid characters are found in $haystack before $needle
12174
            $str = self::clean($str);
12175
        }
12176
12177
        if (
12178 5
            $lang === null
12179
            &&
12180 5
            !$try_to_keep_the_string_length
12181
        ) {
12182 5
            if ($encoding === 'UTF-8') {
12183 3
                return \mb_convert_case($str, \MB_CASE_TITLE);
12184
            }
12185
12186 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
12187
12188 2
            return \mb_convert_case($str, \MB_CASE_TITLE, $encoding);
12189
        }
12190
12191
        return self::str_titleize(
12192
            $str,
12193
            null,
12194
            $encoding,
12195
            false,
12196
            $lang,
12197
            $try_to_keep_the_string_length,
12198
            false
12199
        );
12200
    }
12201
12202
    /**
12203
     * Convert a string into ASCII.
12204
     *
12205
     * EXAMPLE: <code>UTF8::to_ascii('déjà σσς iıii'); // 'deja sss iiii'</code>
12206
     *
12207
     * @param string $str     <p>The input string.</p>
12208
     * @param string $unknown [optional] <p>Character use if character unknown. (default is ?)</p>
12209
     * @param bool   $strict  [optional] <p>Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad
12210
     *                        performance</p>
12211
     *
12212
     * @psalm-pure
12213
     *
12214
     * @return string
12215
     */
12216 37
    public static function to_ascii(
12217
        string $str,
12218
        string $unknown = '?',
12219
        bool $strict = false
12220
    ): string {
12221 37
        return ASCII::to_transliterate($str, $unknown, $strict);
12222
    }
12223
12224
    /**
12225
     * @param bool|float|int|string $str
12226
     *
12227
     * @psalm-pure
12228
     *
12229
     * @return bool
12230
     */
12231 25
    public static function to_boolean($str): bool
12232
    {
12233
        // init
12234 25
        $str = (string) $str;
12235
12236 25
        if ($str === '') {
12237 2
            return false;
12238
        }
12239
12240
        // Info: http://php.net/manual/en/filter.filters.validate.php
12241
        $map = [
12242 23
            'true'  => true,
12243
            '1'     => true,
12244
            'on'    => true,
12245
            'yes'   => true,
12246
            'false' => false,
12247
            '0'     => false,
12248
            'off'   => false,
12249
            'no'    => false,
12250
        ];
12251
12252 23
        if (isset($map[$str])) {
12253 13
            return $map[$str];
12254
        }
12255
12256 10
        $key = \strtolower($str);
12257 10
        if (isset($map[$key])) {
12258 2
            return $map[$key];
12259
        }
12260
12261 8
        if (\is_numeric($str)) {
12262 6
            return ((float) $str) > 0;
12263
        }
12264
12265 2
        return (bool) \trim($str);
12266
    }
12267
12268
    /**
12269
     * Convert given string to safe filename (and keep string case).
12270
     *
12271
     * @param string $str
12272
     * @param bool   $use_transliterate No transliteration, conversion etc. is done by default - unsafe characters are
12273
     *                                  simply replaced with hyphen.
12274
     * @param string $fallback_char
12275
     *
12276
     * @psalm-pure
12277
     *
12278
     * @return string
12279
     */
12280 1
    public static function to_filename(
12281
        string $str,
12282
        bool $use_transliterate = false,
12283
        string $fallback_char = '-'
12284
    ): string {
12285 1
        return ASCII::to_filename(
12286 1
            $str,
12287 1
            $use_transliterate,
12288 1
            $fallback_char
12289
        );
12290
    }
12291
12292
    /**
12293
     * Convert a string into "ISO-8859"-encoding (Latin-1).
12294
     *
12295
     * EXAMPLE: <code>UTF8::to_utf8(UTF8::to_iso8859('  -ABC-中文空白-  ')); // '  -ABC-????-  '</code>
12296
     *
12297
     * @param string|string[] $str
12298
     *
12299
     * @psalm-pure
12300
     *
12301
     * @return string|string[]
12302
     *
12303
     * @template TToIso8859
12304
     * @phpstan-param TToIso8859 $str
12305
     * @phpstan-return TToIso8859
12306
     */
12307 8
    public static function to_iso8859($str)
12308
    {
12309 8
        if (\is_array($str)) {
12310 2
            foreach ($str as &$v) {
12311 2
                $v = self::to_iso8859($v);
12312
            }
12313
12314 2
            return $str;
12315
        }
12316
12317 8
        $str = (string) $str;
12318 8
        if ($str === '') {
12319 2
            return '';
12320
        }
12321
12322 8
        return self::utf8_decode($str);
12323
    }
12324
12325
    /**
12326
     * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
12327
     *
12328
     * <ul>
12329
     * <li>It decode UTF-8 codepoints and Unicode escape sequences.</li>
12330
     * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li>
12331
     * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
12332
     * case.</li>
12333
     * </ul>
12334
     *
12335
     * EXAMPLE: <code>UTF8::to_utf8(["\u0063\u0061\u0074"]); // array('cat')</code>
12336
     *
12337
     * @param string|string[] $str                        <p>Any string or array of strings.</p>
12338
     * @param bool            $decode_html_entity_to_utf8 <p>Set to true, if you need to decode html-entities.</p>
12339
     *
12340
     * @psalm-pure
12341
     *
12342
     * @return string|string[]
12343
     *                         <p>The UTF-8 encoded string</p>
12344
     *
12345
     * @template TToUtf8
12346
     * @phpstan-param TToUtf8 $str
12347
     * @phpstan-return TToUtf8
12348
     */
12349 42
    public static function to_utf8($str, bool $decode_html_entity_to_utf8 = false)
12350
    {
12351 42
        if (\is_array($str)) {
12352 4
            foreach ($str as &$v) {
12353 4
                $v = self::to_utf8_string($v, $decode_html_entity_to_utf8);
12354
            }
12355
12356
            /** @phpstan-var TToUtf8 $str */
12357 4
            return $str;
12358
        }
12359
12360
        /** @phpstan-var TToUtf8 $str */
12361 42
        $str = self::to_utf8_string($str, $decode_html_entity_to_utf8);
12362
12363 42
        return $str;
12364
    }
12365
12366
    /**
12367
     * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
12368
     *
12369
     * <ul>
12370
     * <li>It decode UTF-8 codepoints and Unicode escape sequences.</li>
12371
     * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li>
12372
     * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
12373
     * case.</li>
12374
     * </ul>
12375
     *
12376
     * EXAMPLE: <code>UTF8::to_utf8_string("\u0063\u0061\u0074"); // 'cat'</code>
12377
     *
12378
     * @param string $str                        <p>Any string.</p>
12379
     * @param bool   $decode_html_entity_to_utf8 <p>Set to true, if you need to decode html-entities.</p>
12380
     *
12381
     * @psalm-pure
12382
     *
12383
     * @return string
12384
     *                <p>The UTF-8 encoded string</p>
12385
     */
12386 42
    public static function to_utf8_string(string $str, bool $decode_html_entity_to_utf8 = false): string
12387
    {
12388 42
        if ($str === '') {
12389 7
            return $str;
12390
        }
12391
12392 42
        $max = \strlen($str);
12393 42
        $buf = '';
12394
12395 42
        for ($i = 0; $i < $max; ++$i) {
12396 42
            $c1 = $str[$i];
12397
12398 42
            if ($c1 >= "\xC0") { // should be converted to UTF8, if it's not UTF8 already
12399
12400 38
                if ($c1 <= "\xDF") { // looks like 2 bytes UTF8
12401
12402 35
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
12403
12404 35
                    if ($c2 >= "\x80" && $c2 <= "\xBF") { // yeah, almost sure it's UTF8 already
12405 21
                        $buf .= $c1 . $c2;
12406 21
                        ++$i;
12407
                    } else { // not valid UTF8 - convert it
12408 35
                        $buf .= self::to_utf8_convert_helper($c1);
12409
                    }
12410 35
                } elseif ($c1 >= "\xE0" && $c1 <= "\xEF") { // looks like 3 bytes UTF8
12411
12412 34
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
12413 34
                    $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
12414
12415 34
                    if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF") { // yeah, almost sure it's UTF8 already
12416 17
                        $buf .= $c1 . $c2 . $c3;
12417 17
                        $i += 2;
12418
                    } else { // not valid UTF8 - convert it
12419 34
                        $buf .= self::to_utf8_convert_helper($c1);
12420
                    }
12421 28
                } elseif ($c1 >= "\xF0" && $c1 <= "\xF7") { // looks like 4 bytes UTF8
12422
12423 28
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
12424 28
                    $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
12425 28
                    $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
12426
12427 28
                    if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF" && $c4 >= "\x80" && $c4 <= "\xBF") { // yeah, almost sure it's UTF8 already
12428 10
                        $buf .= $c1 . $c2 . $c3 . $c4;
12429 10
                        $i += 3;
12430
                    } else { // not valid UTF8 - convert it
12431 28
                        $buf .= self::to_utf8_convert_helper($c1);
12432
                    }
12433
                } else { // doesn't look like UTF8, but should be converted
12434
12435 38
                    $buf .= self::to_utf8_convert_helper($c1);
12436
                }
12437 40
            } elseif (($c1 & "\xC0") === "\x80") { // needs conversion
12438
12439 4
                $buf .= self::to_utf8_convert_helper($c1);
12440
            } else { // it doesn't need conversion
12441
12442 40
                $buf .= $c1;
12443
            }
12444
        }
12445
12446
        // decode unicode escape sequences + unicode surrogate pairs
12447 42
        $buf = \preg_replace_callback(
12448 42
            '/\\\\u([dD][89abAB][0-9a-fA-F]{2})\\\\u([dD][cdefCDEF][\da-fA-F]{2})|\\\\u([0-9a-fA-F]{4})/',
12449
            /**
12450
             * @param array $matches
12451
             *
12452
             * @psalm-pure
12453
             *
12454
             * @return string
12455
             */
12456
            static function (array $matches): string {
12457 13
                if (isset($matches[3])) {
12458 13
                    $cp = (int) \hexdec($matches[3]);
12459
                } else {
12460
                    // http://unicode.org/faq/utf_bom.html#utf16-4
12461 1
                    $cp = ((int) \hexdec($matches[1]) << 10)
12462 1
                          + (int) \hexdec($matches[2])
12463 1
                          + 0x10000
12464 1
                          - (0xD800 << 10)
12465 1
                          - 0xDC00;
12466
                }
12467
12468
                // https://github.com/php/php-src/blob/php-7.3.2/ext/standard/html.c#L471
12469
                //
12470
                // php_utf32_utf8(unsigned char *buf, unsigned k)
12471
12472 13
                if ($cp < 0x80) {
12473 8
                    return (string) self::chr($cp);
12474
                }
12475
12476 10
                if ($cp < 0xA0) {
12477
                    /** @noinspection UnnecessaryCastingInspection */
12478
                    return (string) self::chr(0xC0 | $cp >> 6) . (string) self::chr(0x80 | $cp & 0x3F);
12479
                }
12480
12481 10
                return self::decimal_to_chr($cp);
12482 42
            },
12483 42
            $buf
12484
        );
12485
12486 42
        if ($buf === null) {
12487
            return '';
12488
        }
12489
12490
        // decode UTF-8 codepoints
12491 42
        if ($decode_html_entity_to_utf8) {
12492 3
            $buf = self::html_entity_decode($buf);
12493
        }
12494
12495 42
        return $buf;
12496
    }
12497
12498
    /**
12499
     * Returns the given string as an integer, or null if the string isn't numeric.
12500
     *
12501
     * @param string $str
12502
     *
12503
     * @psalm-pure
12504
     *
12505
     * @return int|null
12506
     *                  <p>null if the string isn't numeric</p>
12507
     */
12508 1
    public static function to_int(string $str)
12509
    {
12510 1
        if (\is_numeric($str)) {
12511 1
            return (int) $str;
12512
        }
12513
12514 1
        return null;
12515
    }
12516
12517
    /**
12518
     * Returns the given input as string, or null if the input isn't int|float|string
12519
     * and do not implement the "__toString()" method.
12520
     *
12521
     * @param float|int|object|string|null $input
12522
     *
12523
     * @psalm-pure
12524
     *
12525
     * @return string|null
12526
     *                     <p>null if the input isn't int|float|string and has no "__toString()" method</p>
12527
     */
12528 1
    public static function to_string($input)
12529
    {
12530 1
        if ($input === null) {
12531
            return null;
12532
        }
12533
12534
        /** @var string $input_type - hack for psalm */
12535 1
        $input_type = \gettype($input);
12536
12537
        if (
12538 1
            $input_type === 'string'
12539
            ||
12540 1
            $input_type === 'integer'
12541
            ||
12542 1
            $input_type === 'float'
12543
            ||
12544 1
            $input_type === 'double'
12545
        ) {
12546 1
            return (string) $input;
12547
        }
12548
12549
        /** @phpstan-ignore-next-line - "gettype": FP? */
12550 1
        if ($input_type === 'object' && \method_exists($input, '__toString')) {
12551 1
            return (string) $input;
12552
        }
12553
12554 1
        return null;
12555
    }
12556
12557
    /**
12558
     * Strip whitespace or other characters from the beginning and end of a UTF-8 string.
12559
     *
12560
     * INFO: This is slower then "trim()"
12561
     *
12562
     * We can only use the original-function, if we use <= 7-Bit in the string / chars
12563
     * but the check for ASCII (7-Bit) cost more time, then we can safe here.
12564
     *
12565
     * EXAMPLE: <code>UTF8::trim('   -ABC-中文空白-  '); // '-ABC-中文空白-'</code>
12566
     *
12567
     * @param string      $str   <p>The string to be trimmed</p>
12568
     * @param string|null $chars [optional] <p>Optional characters to be stripped</p>
12569
     *
12570
     * @psalm-pure
12571
     *
12572
     * @return string
12573
     *                <p>The trimmed string.</p>
12574
     */
12575 57
    public static function trim(string $str = '', string $chars = null): string
12576
    {
12577 57
        if ($str === '') {
12578 9
            return '';
12579
        }
12580
12581 50
        if (self::$SUPPORT['mbstring'] === true) {
12582 50
            if ($chars !== null) {
12583
                /** @noinspection PregQuoteUsageInspection */
12584 28
                $chars = \preg_quote($chars);
12585 28
                $pattern = "^[${chars}]+|[${chars}]+\$";
12586
            } else {
12587 22
                $pattern = '^[\\s]+|[\\s]+$';
12588
            }
12589
12590 50
            return (string) \mb_ereg_replace($pattern, '', $str);
12591
        }
12592
12593 8
        if ($chars !== null) {
12594
            $chars = \preg_quote($chars, '/');
12595
            $pattern = "^[${chars}]+|[${chars}]+\$";
12596
        } else {
12597 8
            $pattern = '^[\\s]+|[\\s]+$';
12598
        }
12599
12600 8
        return self::regex_replace($str, $pattern, '');
12601
    }
12602
12603
    /**
12604
     * Makes string's first char uppercase.
12605
     *
12606
     * EXAMPLE: <code>UTF8::ucfirst('ñtërnâtiônàlizætiøn foo'); // 'Ñtërnâtiônàlizætiøn foo'</code>
12607
     *
12608
     * @param string      $str                           <p>The input string.</p>
12609
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
12610
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
12611
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
12612
     *                                                   tr</p>
12613
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
12614
     *                                                   -> ß</p>
12615
     *
12616
     * @psalm-pure
12617
     *
12618
     * @return string
12619
     *                <p>The resulting string with with char uppercase.</p>
12620
     */
12621 69
    public static function ucfirst(
12622
        string $str,
12623
        string $encoding = 'UTF-8',
12624
        bool $clean_utf8 = false,
12625
        string $lang = null,
12626
        bool $try_to_keep_the_string_length = false
12627
    ): string {
12628 69
        if ($str === '') {
12629 3
            return '';
12630
        }
12631
12632 68
        if ($clean_utf8) {
12633
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
12634
            // if invalid characters are found in $haystack before $needle
12635 1
            $str = self::clean($str);
12636
        }
12637
12638 68
        $use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
12639
12640 68
        if ($encoding === 'UTF-8') {
12641 22
            $str_part_two = (string) \mb_substr($str, 1);
12642
12643 22
            if ($use_mb_functions) {
12644 22
                $str_part_one = \mb_strtoupper(
12645 22
                    (string) \mb_substr($str, 0, 1)
12646
                );
12647
            } else {
12648
                $str_part_one = self::strtoupper(
12649
                    (string) \mb_substr($str, 0, 1),
12650
                    $encoding,
12651
                    false,
12652
                    $lang,
12653 22
                    $try_to_keep_the_string_length
12654
                );
12655
            }
12656
        } else {
12657 47
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
12658
12659 47
            $str_part_two = (string) self::substr($str, 1, null, $encoding);
12660
12661 47
            if ($use_mb_functions) {
12662 47
                $str_part_one = \mb_strtoupper(
12663 47
                    (string) \mb_substr($str, 0, 1, $encoding),
12664 47
                    $encoding
12665
                );
12666
            } else {
12667
                $str_part_one = self::strtoupper(
12668
                    (string) self::substr($str, 0, 1, $encoding),
12669
                    $encoding,
12670
                    false,
12671
                    $lang,
12672
                    $try_to_keep_the_string_length
12673
                );
12674
            }
12675
        }
12676
12677 68
        return $str_part_one . $str_part_two;
12678
    }
12679
12680
    /**
12681
     * Uppercase for all words in the string.
12682
     *
12683
     * EXAMPLE: <code>UTF8::ucwords('iñt ërn âTi ônà liz æti øn'); // 'Iñt Ërn ÂTi Ônà Liz Æti Øn'</code>
12684
     *
12685
     * @param string   $str        <p>The input string.</p>
12686
     * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
12687
     * @param string   $char_list  [optional] <p>Additional chars that contains to words and do not start a new
12688
     *                             word.</p>
12689
     * @param string   $encoding   [optional] <p>Set the charset.</p>
12690
     * @param bool     $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
12691
     *
12692
     * @psalm-pure
12693
     *
12694
     * @return string
12695
     */
12696 9
    public static function ucwords(
12697
        string $str,
12698
        array $exceptions = [],
12699
        string $char_list = '',
12700
        string $encoding = 'UTF-8',
12701
        bool $clean_utf8 = false
12702
    ): string {
12703 9
        if (!$str) {
12704 2
            return '';
12705
        }
12706
12707
        // INFO: mb_convert_case($str, MB_CASE_TITLE);
12708
        // -> MB_CASE_TITLE didn't only uppercase the first letter, it also lowercase all other letters
12709
12710 8
        if ($clean_utf8) {
12711
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
12712
            // if invalid characters are found in $haystack before $needle
12713 1
            $str = self::clean($str);
12714
        }
12715
12716 8
        $use_php_default_functions = !(bool) ($char_list . \implode('', $exceptions));
12717
12718
        if (
12719 8
            $use_php_default_functions
12720
            &&
12721 8
            ASCII::is_ascii($str)
12722
        ) {
12723
            return \ucwords($str);
12724
        }
12725
12726 8
        $words = self::str_to_words($str, $char_list);
12727 8
        $use_exceptions = $exceptions !== [];
12728
12729 8
        $words_str = '';
12730 8
        foreach ($words as &$word) {
12731 8
            if (!$word) {
12732 8
                continue;
12733
            }
12734
12735
            if (
12736 8
                !$use_exceptions
12737
                ||
12738 8
                !\in_array($word, $exceptions, true)
12739
            ) {
12740 8
                $words_str .= self::ucfirst($word, $encoding);
12741
            } else {
12742 8
                $words_str .= $word;
12743
            }
12744
        }
12745
12746 8
        return $words_str;
12747
    }
12748
12749
    /**
12750
     * Multi decode HTML entity + fix urlencoded-win1252-chars.
12751
     *
12752
     * EXAMPLE: <code>UTF8::urldecode('tes%20öäü%20\u00edtest+test'); // 'tes öäü ítest test'</code>
12753
     *
12754
     * e.g:
12755
     * 'test+test'                     => 'test test'
12756
     * 'D&#252;sseldorf'               => 'Düsseldorf'
12757
     * 'D%FCsseldorf'                  => 'Düsseldorf'
12758
     * 'D&#xFC;sseldorf'               => 'Düsseldorf'
12759
     * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
12760
     * 'Düsseldorf'                   => 'Düsseldorf'
12761
     * 'D%C3%BCsseldorf'               => 'Düsseldorf'
12762
     * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
12763
     * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
12764
     *
12765
     * @param string $str          <p>The input string.</p>
12766
     * @param bool   $multi_decode <p>Decode as often as possible.</p>
12767
     *
12768
     * @psalm-pure
12769
     *
12770
     * @return string
12771
     */
12772 4
    public static function urldecode(string $str, bool $multi_decode = true): string
12773
    {
12774 4
        if ($str === '') {
12775 3
            return '';
12776
        }
12777
12778 4
        $str = self::urldecode_unicode_helper($str);
12779
12780 4
        if ($multi_decode) {
12781
            do {
12782 3
                $str_compare = $str;
12783
12784
                /**
12785
                 * @psalm-suppress PossiblyInvalidArgument
12786
                 */
12787 3
                $str = \urldecode(
12788 3
                    self::html_entity_decode(
12789 3
                        self::to_utf8($str),
12790 3
                        \ENT_QUOTES | \ENT_HTML5
12791
                    )
12792
                );
12793 3
            } while ($str_compare !== $str);
12794
        } else {
12795
            /**
12796
             * @psalm-suppress PossiblyInvalidArgument
12797
             */
12798 1
            $str = \urldecode(
12799 1
                self::html_entity_decode(
12800 1
                    self::to_utf8($str),
12801 1
                    \ENT_QUOTES | \ENT_HTML5
12802
                )
12803
            );
12804
        }
12805
12806 4
        return self::fix_simple_utf8($str);
12807
    }
12808
12809
    /**
12810
     * Decodes a UTF-8 string to ISO-8859-1.
12811
     *
12812
     * EXAMPLE: <code>UTF8::encode('UTF-8', UTF8::utf8_decode('-ABC-中文空白-')); // '-ABC-????-'</code>
12813
     *
12814
     * @param string $str             <p>The input string.</p>
12815
     * @param bool   $keep_utf8_chars
12816
     *
12817
     * @psalm-pure
12818
     *
12819
     * @return string
12820
     */
12821 14
    public static function utf8_decode(string $str, bool $keep_utf8_chars = false): string
12822
    {
12823 14
        if ($str === '') {
12824 6
            return '';
12825
        }
12826
12827
        // save for later comparision
12828 14
        $str_backup = $str;
12829 14
        $len = \strlen($str);
12830
12831 14
        if (self::$ORD === null) {
12832
            self::$ORD = self::getData('ord');
12833
        }
12834
12835 14
        if (self::$CHR === null) {
12836
            self::$CHR = self::getData('chr');
12837
        }
12838
12839 14
        $no_char_found = '?';
12840 14
        for ($i = 0, $j = 0; $i < $len; ++$i, ++$j) {
12841 14
            switch ($str[$i] & "\xF0") {
12842 14
                case "\xC0":
12843 13
                case "\xD0":
12844 13
                    $c = (self::$ORD[$str[$i] & "\x1F"] << 6) | self::$ORD[$str[++$i] & "\x3F"];
12845 13
                    $str[$j] = $c < 256 ? self::$CHR[$c] : $no_char_found;
12846
12847 13
                    break;
12848
12849 13
                case "\xF0":
12850
                    ++$i;
12851
12852
                // no break
12853
12854 13
                case "\xE0":
12855 11
                    $str[$j] = $no_char_found;
12856 11
                    $i += 2;
12857
12858 11
                    break;
12859
12860
                default:
12861 12
                    $str[$j] = $str[$i];
12862
            }
12863
        }
12864
12865
        /** @var false|string $return - needed for PhpStan (stubs error) */
12866 14
        $return = \substr($str, 0, $j);
12867 14
        if ($return === false) {
12868
            $return = '';
12869
        }
12870
12871
        if (
12872 14
            $keep_utf8_chars
12873
            &&
12874 14
            (int) self::strlen($return) >= (int) self::strlen($str_backup)
12875
        ) {
12876 2
            return $str_backup;
12877
        }
12878
12879 14
        return $return;
12880
    }
12881
12882
    /**
12883
     * Encodes an ISO-8859-1 string to UTF-8.
12884
     *
12885
     * EXAMPLE: <code>UTF8::utf8_decode(UTF8::utf8_encode('-ABC-中文空白-')); // '-ABC-中文空白-'</code>
12886
     *
12887
     * @param string $str <p>The input string.</p>
12888
     *
12889
     * @psalm-pure
12890
     *
12891
     * @return string
12892
     */
12893 16
    public static function utf8_encode(string $str): string
12894
    {
12895 16
        if ($str === '') {
12896 14
            return '';
12897
        }
12898
12899
        /** @var false|string $str - the polyfill maybe return false */
12900 16
        $str = \utf8_encode($str);
0 ignored issues
show
Bug introduced by
It seems like $str can also be of type false; however, parameter $string of utf8_encode() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

12900
        $str = \utf8_encode(/** @scrutinizer ignore-type */ $str);
Loading history...
12901
12902 16
        if ($str === false) {
12903
            return '';
12904
        }
12905
12906 16
        return $str;
12907
    }
12908
12909
    /**
12910
     * Returns an array with all utf8 whitespace characters.
12911
     *
12912
     * @see http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
12913
     *
12914
     * @psalm-pure
12915
     *
12916
     * @return string[]
12917
     *                  An array with all known whitespace characters as values and the type of whitespace as keys
12918
     *                  as defined in above URL
12919
     */
12920 2
    public static function whitespace_table(): array
12921
    {
12922 2
        return self::$WHITESPACE_TABLE;
12923
    }
12924
12925
    /**
12926
     * Limit the number of words in a string.
12927
     *
12928
     * EXAMPLE: <code>UTF8::words_limit('fòô bàř fòô', 2, ''); // 'fòô bàř'</code>
12929
     *
12930
     * @param string $str        <p>The input string.</p>
12931
     * @param int    $limit      <p>The limit of words as integer.</p>
12932
     * @param string $str_add_on <p>Replacement for the striped string.</p>
12933
     *
12934
     * @psalm-pure
12935
     *
12936
     * @return string
12937
     */
12938 2
    public static function words_limit(
12939
        string $str,
12940
        int $limit = 100,
12941
        string $str_add_on = '…'
12942
    ): string {
12943 2
        if ($str === '' || $limit < 1) {
12944 2
            return '';
12945
        }
12946
12947 2
        \preg_match('/^\\s*+(?:[^\\s]++\\s*+){1,' . $limit . '}/u', $str, $matches);
12948
12949
        if (
12950 2
            !isset($matches[0])
12951
            ||
12952 2
            \mb_strlen($str) === (int) \mb_strlen($matches[0])
12953
        ) {
12954 2
            return $str;
12955
        }
12956
12957 2
        return \rtrim($matches[0]) . $str_add_on;
12958
    }
12959
12960
    /**
12961
     * Wraps a string to a given number of characters
12962
     *
12963
     * EXAMPLE: <code>UTF8::wordwrap('Iñtërnâtiônàlizætiøn', 2, '<br>', true)); // 'Iñ<br>të<br>rn<br>ât<br>iô<br>nà<br>li<br>zæ<br>ti<br>øn'</code>
12964
     *
12965
     * @see http://php.net/manual/en/function.wordwrap.php
12966
     *
12967
     * @param string $str   <p>The input string.</p>
12968
     * @param int    $width [optional] <p>The column width.</p>
12969
     * @param string $break [optional] <p>The line is broken using the optional break parameter.</p>
12970
     * @param bool   $cut   [optional] <p>
12971
     *                      If the cut is set to true, the string is
12972
     *                      always wrapped at or before the specified width. So if you have
12973
     *                      a word that is larger than the given width, it is broken apart.
12974
     *                      </p>
12975
     *
12976
     * @psalm-pure
12977
     *
12978
     * @return string
12979
     *                <p>The given string wrapped at the specified column.</p>
12980
     */
12981 12
    public static function wordwrap(
12982
        string $str,
12983
        int $width = 75,
12984
        string $break = "\n",
12985
        bool $cut = false
12986
    ): string {
12987 12
        if ($str === '' || $break === '') {
12988 4
            return '';
12989
        }
12990
12991 10
        $str_split = \explode($break, $str);
12992
12993
        /** @var string[] $charsArray */
12994 10
        $charsArray = [];
12995 10
        $word_split = '';
12996 10
        foreach ($str_split as $i => $i_value) {
12997 10
            if ($i) {
12998 3
                $charsArray[] = $break;
12999 3
                $word_split .= '#';
13000
            }
13001
13002 10
            foreach (self::str_split($i_value) as $c) {
13003 10
                $charsArray[] = $c;
13004 10
                if ($c === ' ') {
13005 3
                    $word_split .= ' ';
13006
                } else {
13007 10
                    $word_split .= '?';
13008
                }
13009
            }
13010
        }
13011
13012 10
        $str_return = '';
13013 10
        $j = 0;
13014 10
        $b = -1;
13015 10
        $i = -1;
13016 10
        $word_split = \wordwrap($word_split, $width, '#', $cut);
13017
13018 10
        $max = \mb_strlen($word_split);
13019
        /** @noinspection PhpAssignmentInConditionInspection - is ok here */
13020 10
        while (($b = \mb_strpos($word_split, '#', $b + 1)) !== false) {
13021 8
            for (++$i; $i < $b; ++$i) {
13022 8
                if (isset($charsArray[$j])) {
13023 8
                    $str_return .= $charsArray[$j];
13024 8
                    unset($charsArray[$j]);
13025
                }
13026 8
                ++$j;
13027
13028
                // prevent endless loop, e.g. if there is a error in the "mb_*" polyfill
13029 8
                if ($i > $max) {
13030
                    break 2;
13031
                }
13032
            }
13033
13034
            if (
13035 8
                $break === $charsArray[$j]
13036
                ||
13037 8
                $charsArray[$j] === ' '
13038
            ) {
13039 5
                unset($charsArray[$j++]);
13040
            }
13041
13042 8
            $str_return .= $break;
13043
13044
            // prevent endless loop, e.g. if there is a error in the "mb_*" polyfill
13045 8
            if ($b > $max) {
13046
                break;
13047
            }
13048
        }
13049
13050 10
        return $str_return . \implode('', $charsArray);
13051
    }
13052
13053
    /**
13054
     * Line-Wrap the string after $limit, but split the string by "$delimiter" before ...
13055
     *    ... so that we wrap the per line.
13056
     *
13057
     * @param string      $str             <p>The input string.</p>
13058
     * @param int         $width           [optional] <p>The column width.</p>
13059
     * @param string      $break           [optional] <p>The line is broken using the optional break parameter.</p>
13060
     * @param bool        $cut             [optional] <p>
13061
     *                                     If the cut is set to true, the string is
13062
     *                                     always wrapped at or before the specified width. So if you have
13063
     *                                     a word that is larger than the given width, it is broken apart.
13064
     *                                     </p>
13065
     * @param bool        $add_final_break [optional] <p>
13066
     *                                     If this flag is true, then the method will add a $break at the end
13067
     *                                     of the result string.
13068
     *                                     </p>
13069
     * @param string|null $delimiter       [optional] <p>
13070
     *                                     You can change the default behavior, where we split the string by newline.
13071
     *                                     </p>
13072
     *
13073
     * @psalm-pure
13074
     *
13075
     * @return string
13076
     */
13077 1
    public static function wordwrap_per_line(
13078
        string $str,
13079
        int $width = 75,
13080
        string $break = "\n",
13081
        bool $cut = false,
13082
        bool $add_final_break = true,
13083
        string $delimiter = null
13084
    ): string {
13085 1
        if ($delimiter === null) {
13086 1
            $strings = \preg_split('/\\r\\n|\\r|\\n/', $str);
13087
        } else {
13088 1
            $strings = \explode($delimiter, $str);
13089
        }
13090
13091 1
        $string_helper_array = [];
13092 1
        if ($strings !== false) {
13093 1
            foreach ($strings as $value) {
13094 1
                $string_helper_array[] = self::wordwrap($value, $width, $break, $cut);
13095
            }
13096
        }
13097
13098 1
        if ($add_final_break) {
13099 1
            $final_break = $break;
13100
        } else {
13101 1
            $final_break = '';
13102
        }
13103
13104 1
        return \implode($delimiter ?? "\n", $string_helper_array) . $final_break;
13105
    }
13106
13107
    /**
13108
     * Returns an array of Unicode White Space characters.
13109
     *
13110
     * @psalm-pure
13111
     *
13112
     * @return string[]
13113
     *                  <p>An array with numeric code point as key and White Space Character as value.</p>
13114
     */
13115 2
    public static function ws(): array
13116
    {
13117 2
        return self::$WHITESPACE;
13118
    }
13119
13120
    /**
13121
     * Checks whether the passed string contains only byte sequences that are valid UTF-8 characters.
13122
     *
13123
     * EXAMPLE: <code>
13124
     * UTF8::is_utf8_string('Iñtërnâtiônàlizætiøn']); // true
13125
     * //
13126
     * UTF8::is_utf8_string("Iñtërnâtiônàlizætiøn\xA0\xA1"); // false
13127
     * </code>
13128
     *
13129
     * @see          http://hsivonen.iki.fi/php-utf8/
13130
     *
13131
     * @param string $str    <p>The string to be checked.</p>
13132
     * @param bool   $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
13133
     *
13134
     * @psalm-pure
13135
     *
13136
     * @return bool
13137
     *
13138
     * @noinspection ReturnTypeCanBeDeclaredInspection
13139
     */
13140 110
    private static function is_utf8_string(string $str, bool $strict = false)
13141
    {
13142 110
        if ($str === '') {
13143 15
            return true;
13144
        }
13145
13146 103
        if ($strict) {
13147 2
            $is_binary = self::is_binary($str, true);
13148
13149 2
            if ($is_binary && self::is_utf16($str, false) !== false) {
13150 2
                return false;
13151
            }
13152
13153 2
            if ($is_binary && self::is_utf32($str, false) !== false) {
13154
                return false;
13155
            }
13156
        }
13157
13158 103
        if (self::$SUPPORT['pcre_utf8']) {
13159
            // If even just the first character can be matched, when the /u
13160
            // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
13161
            // invalid, nothing at all will match, even if the string contains
13162
            // some valid sequences
13163 103
            return \preg_match('/^./us', $str) === 1;
13164
        }
13165
13166 2
        $mState = 0; // cached expected number of octets after the current octet
13167
        // until the beginning of the next UTF8 character sequence
13168 2
        $mUcs4 = 0; // cached Unicode character
13169 2
        $mBytes = 1; // cached expected number of octets in the current sequence
13170
13171 2
        if (self::$ORD === null) {
13172
            self::$ORD = self::getData('ord');
13173
        }
13174
13175 2
        $len = \strlen($str);
13176 2
        for ($i = 0; $i < $len; ++$i) {
13177 2
            $in = self::$ORD[$str[$i]];
13178
13179 2
            if ($mState === 0) {
13180
                // When mState is zero we expect either a US-ASCII character or a
13181
                // multi-octet sequence.
13182 2
                if ((0x80 & $in) === 0) {
13183
                    // US-ASCII, pass straight through.
13184 2
                    $mBytes = 1;
13185 2
                } elseif ((0xE0 & $in) === 0xC0) {
13186
                    // First octet of 2 octet sequence.
13187 2
                    $mUcs4 = $in;
13188 2
                    $mUcs4 = ($mUcs4 & 0x1F) << 6;
13189 2
                    $mState = 1;
13190 2
                    $mBytes = 2;
13191 2
                } elseif ((0xF0 & $in) === 0xE0) {
13192
                    // First octet of 3 octet sequence.
13193 2
                    $mUcs4 = $in;
13194 2
                    $mUcs4 = ($mUcs4 & 0x0F) << 12;
13195 2
                    $mState = 2;
13196 2
                    $mBytes = 3;
13197
                } elseif ((0xF8 & $in) === 0xF0) {
13198
                    // First octet of 4 octet sequence.
13199
                    $mUcs4 = $in;
13200
                    $mUcs4 = ($mUcs4 & 0x07) << 18;
13201
                    $mState = 3;
13202
                    $mBytes = 4;
13203
                } elseif ((0xFC & $in) === 0xF8) {
13204
                    /* First octet of 5 octet sequence.
13205
                     *
13206
                     * This is illegal because the encoded codepoint must be either
13207
                     * (a) not the shortest form or
13208
                     * (b) outside the Unicode range of 0-0x10FFFF.
13209
                     * Rather than trying to resynchronize, we will carry on until the end
13210
                     * of the sequence and let the later error handling code catch it.
13211
                     */
13212
                    $mUcs4 = $in;
13213
                    $mUcs4 = ($mUcs4 & 0x03) << 24;
13214
                    $mState = 4;
13215
                    $mBytes = 5;
13216
                } elseif ((0xFE & $in) === 0xFC) {
13217
                    // First octet of 6 octet sequence, see comments for 5 octet sequence.
13218
                    $mUcs4 = $in;
13219
                    $mUcs4 = ($mUcs4 & 1) << 30;
13220
                    $mState = 5;
13221
                    $mBytes = 6;
13222
                } else {
13223
                    // Current octet is neither in the US-ASCII range nor a legal first
13224
                    // octet of a multi-octet sequence.
13225 2
                    return false;
13226
                }
13227 2
            } elseif ((0xC0 & $in) === 0x80) {
13228
13229
                // When mState is non-zero, we expect a continuation of the multi-octet
13230
                // sequence
13231
13232
                // Legal continuation.
13233 2
                $shift = ($mState - 1) * 6;
13234 2
                $tmp = $in;
13235 2
                $tmp = ($tmp & 0x0000003F) << $shift;
13236 2
                $mUcs4 |= $tmp;
13237
                // Prefix: End of the multi-octet sequence. mUcs4 now contains the final
13238
                // Unicode code point to be output.
13239 2
                if (--$mState === 0) {
13240
                    // Check for illegal sequences and code points.
13241
                    //
13242
                    // From Unicode 3.1, non-shortest form is illegal
13243
                    if (
13244 2
                        ($mBytes === 2 && $mUcs4 < 0x0080)
13245
                        ||
13246 2
                        ($mBytes === 3 && $mUcs4 < 0x0800)
13247
                        ||
13248 2
                        ($mBytes === 4 && $mUcs4 < 0x10000)
13249
                        ||
13250 2
                        ($mBytes > 4)
13251
                        ||
13252
                        // From Unicode 3.2, surrogate characters are illegal.
13253 2
                        (($mUcs4 & 0xFFFFF800) === 0xD800)
13254
                        ||
13255
                        // Code points outside the Unicode range are illegal.
13256 2
                        ($mUcs4 > 0x10FFFF)
13257
                    ) {
13258
                        return false;
13259
                    }
13260
                    // initialize UTF8 cache
13261 2
                    $mState = 0;
13262 2
                    $mUcs4 = 0;
13263 2
                    $mBytes = 1;
13264
                }
13265
            } else {
13266
                // ((0xC0 & (*in) != 0x80) && (mState != 0))
13267
                // Incomplete multi-octet sequence.
13268
                return false;
13269
            }
13270
        }
13271
13272 2
        return $mState === 0;
13273
    }
13274
13275
    /**
13276
     * @param string $str
13277
     * @param bool   $use_lowercase      <p>Use uppercase by default, otherwise use lowercase.</p>
13278
     * @param bool   $use_full_case_fold <p>Convert not only common cases.</p>
13279
     *
13280
     * @psalm-pure
13281
     *
13282
     * @return string
13283
     *
13284
     * @noinspection ReturnTypeCanBeDeclaredInspection
13285
     */
13286 33
    private static function fixStrCaseHelper(
13287
        string $str,
13288
        bool $use_lowercase = false,
13289
        bool $use_full_case_fold = false
13290
    ) {
13291 33
        $upper = self::$COMMON_CASE_FOLD['upper'];
13292 33
        $lower = self::$COMMON_CASE_FOLD['lower'];
13293
13294 33
        if ($use_lowercase) {
13295 2
            $str = \str_replace(
13296 2
                $upper,
13297 2
                $lower,
13298 2
                $str
13299
            );
13300
        } else {
13301 31
            $str = \str_replace(
13302 31
                $lower,
13303 31
                $upper,
13304 31
                $str
13305
            );
13306
        }
13307
13308 33
        if ($use_full_case_fold) {
13309
            /**
13310
             * @psalm-suppress ImpureStaticVariable
13311
             *
13312
             * @var array<mixed>|null
13313
             */
13314 31
            static $FULL_CASE_FOLD = null;
13315 31
            if ($FULL_CASE_FOLD === null) {
13316 1
                $FULL_CASE_FOLD = self::getData('caseFolding_full');
13317
            }
13318
13319 31
            if ($use_lowercase) {
13320 2
                $str = \str_replace($FULL_CASE_FOLD[0], $FULL_CASE_FOLD[1], $str);
13321
            } else {
13322 29
                $str = \str_replace($FULL_CASE_FOLD[1], $FULL_CASE_FOLD[0], $str);
13323
            }
13324
        }
13325
13326 33
        return $str;
13327
    }
13328
13329
    /**
13330
     * get data from "/data/*.php"
13331
     *
13332
     * @param string $file
13333
     *
13334
     * @psalm-pure
13335
     *
13336
     * @return array
13337
     *
13338
     * @noinspection ReturnTypeCanBeDeclaredInspection
13339
     */
13340 7
    private static function getData(string $file)
13341
    {
13342
        /** @noinspection PhpIncludeInspection */
13343
        /** @noinspection UsingInclusionReturnValueInspection */
13344
        /** @psalm-suppress UnresolvableInclude */
13345 7
        return include __DIR__ . '/data/' . $file . '.php';
13346
    }
13347
13348
    /**
13349
     * @psalm-pure
13350
     *
13351
     * @return true|null
13352
     *
13353
     * @noinspection ReturnTypeCanBeDeclaredInspection
13354
     */
13355 1
    private static function initEmojiData()
13356
    {
13357 1
        if (self::$EMOJI_KEYS_CACHE === null) {
13358 1
            if (self::$EMOJI === null) {
13359 1
                self::$EMOJI = self::getData('emoji');
13360
            }
13361
13362
            /**
13363
             * @psalm-suppress ImpureFunctionCall - static sort function is used
13364
             */
13365 1
            \uksort(
13366 1
                self::$EMOJI,
13367
                static function (string $a, string $b): int {
13368 1
                    return \strlen($b) <=> \strlen($a);
13369 1
                }
13370
            );
13371
13372 1
            self::$EMOJI_KEYS_CACHE = \array_keys(self::$EMOJI);
13373 1
            self::$EMOJI_VALUES_CACHE = self::$EMOJI;
13374
13375 1
            foreach (self::$EMOJI_KEYS_CACHE as $key) {
13376 1
                $tmp_key = \crc32($key);
13377 1
                self::$EMOJI_KEYS_REVERSIBLE_CACHE[] = '_-_PORTABLE_UTF8_-_' . $tmp_key . '_-_' . \strrev((string) $tmp_key) . '_-_8FTU_ELBATROP_-_';
13378
            }
13379
13380 1
            return true;
13381
        }
13382
13383
        return null;
13384
    }
13385
13386
    /**
13387
     * Checks whether mbstring "overloaded" is active on the server.
13388
     *
13389
     * @psalm-pure
13390
     *
13391
     * @return bool
13392
     */
13393
    private static function mbstring_overloaded(): bool
13394
    {
13395
        /**
13396
         * INI directive 'mbstring.func_overload' is deprecated since PHP 7.2
13397
         */
13398
13399
        /** @noinspection PhpComposerExtensionStubsInspection */
13400
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
13401
        /** @noinspection DeprecatedIniOptionsInspection */
13402
        return \defined('MB_OVERLOAD_STRING')
13403
               &&
13404
               ((int) @\ini_get('mbstring.func_overload') & \MB_OVERLOAD_STRING);
13405
    }
13406
13407
    /**
13408
     * @param array    $strings
13409
     * @param bool     $remove_empty_values
13410
     * @param int|null $remove_short_values
13411
     *
13412
     * @psalm-pure
13413
     *
13414
     * @return array
13415
     *
13416
     * @noinspection ReturnTypeCanBeDeclaredInspection
13417
     */
13418 2
    private static function reduce_string_array(
13419
        array $strings,
13420
        bool $remove_empty_values,
13421
        int $remove_short_values = null
13422
    ) {
13423
        // init
13424 2
        $return = [];
13425
13426 2
        foreach ($strings as &$str) {
13427
            if (
13428 2
                $remove_short_values !== null
13429
                &&
13430 2
                \mb_strlen($str) <= $remove_short_values
13431
            ) {
13432 2
                continue;
13433
            }
13434
13435
            if (
13436 2
                $remove_empty_values
13437
                &&
13438 2
                \trim($str) === ''
13439
            ) {
13440 2
                continue;
13441
            }
13442
13443 2
            $return[] = $str;
13444
        }
13445
13446 2
        return $return;
13447
    }
13448
13449
    /**
13450
     * rxClass
13451
     *
13452
     * @param string $s
13453
     * @param string $class
13454
     *
13455
     * @return string
13456
     *                    *
13457
     * @psalm-pure
13458
     */
13459 36
    private static function rxClass(string $s, string $class = '')
13460
    {
13461
        /**
13462
         * @psalm-suppress ImpureStaticVariable
13463
         *
13464
         * @var array<string,string>
13465
         */
13466 36
        static $RX_CLASS_CACHE = [];
13467
13468 36
        $cache_key = $s . '_' . $class;
13469
13470 36
        if (isset($RX_CLASS_CACHE[$cache_key])) {
13471 24
            return $RX_CLASS_CACHE[$cache_key];
13472
        }
13473
13474 16
        $class_array[] = $class;
0 ignored issues
show
Comprehensibility Best Practice introduced by
$class_array was never initialized. Although not strictly required by PHP, it is generally a good practice to add $class_array = array(); before regardless.
Loading history...
13475
13476
        /** @noinspection SuspiciousLoopInspection */
13477
        /** @noinspection AlterInForeachInspection */
13478 16
        foreach (self::str_split($s) as &$s) {
0 ignored issues
show
Bug introduced by
The expression self::str_split($s) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
13479 15
            if ($s === '-') {
13480
                $class_array[0] = '-' . $class_array[0];
13481 15
            } elseif (!isset($s[2])) {
13482 15
                $class_array[0] .= \preg_quote($s, '/');
13483 1
            } elseif (self::strlen($s) === 1) {
13484 1
                $class_array[0] .= $s;
13485
            } else {
13486 15
                $class_array[] = $s;
13487
            }
13488
        }
13489
13490 16
        if ($class_array[0]) {
13491 16
            $class_array[0] = '[' . $class_array[0] . ']';
13492
        }
13493
13494 16
        if (\count($class_array) === 1) {
13495 16
            $return = $class_array[0];
13496
        } else {
13497
            $return = '(?:' . \implode('|', $class_array) . ')';
13498
        }
13499
13500 16
        $RX_CLASS_CACHE[$cache_key] = $return;
13501
13502 16
        return $return;
13503
    }
13504
13505
    /**
13506
     * Personal names such as "Marcus Aurelius" are sometimes typed incorrectly using lowercase ("marcus aurelius").
13507
     *
13508
     * @param string $names
13509
     * @param string $delimiter
13510
     * @param string $encoding
13511
     *
13512
     * @psalm-pure
13513
     *
13514
     * @return string
13515
     *
13516
     * @noinspection ReturnTypeCanBeDeclaredInspection
13517
     */
13518 1
    private static function str_capitalize_name_helper(
13519
        string $names,
13520
        string $delimiter,
13521
        string $encoding = 'UTF-8'
13522
    ) {
13523
        // init
13524 1
        $name_helper_array = \explode($delimiter, $names);
13525 1
        if ($name_helper_array === false) {
13526
            return '';
13527
        }
13528
13529
        $special_cases = [
13530 1
            'names' => [
13531
                'ab',
13532
                'af',
13533
                'al',
13534
                'and',
13535
                'ap',
13536
                'bint',
13537
                'binte',
13538
                'da',
13539
                'de',
13540
                'del',
13541
                'den',
13542
                'der',
13543
                'di',
13544
                'dit',
13545
                'ibn',
13546
                'la',
13547
                'mac',
13548
                'nic',
13549
                'of',
13550
                'ter',
13551
                'the',
13552
                'und',
13553
                'van',
13554
                'von',
13555
                'y',
13556
                'zu',
13557
            ],
13558
            'prefixes' => [
13559
                'al-',
13560
                "d'",
13561
                'ff',
13562
                "l'",
13563
                'mac',
13564
                'mc',
13565
                'nic',
13566
            ],
13567
        ];
13568
13569 1
        foreach ($name_helper_array as &$name) {
13570 1
            if (\in_array($name, $special_cases['names'], true)) {
13571 1
                continue;
13572
            }
13573
13574 1
            $continue = false;
13575
13576 1
            if ($delimiter === '-') {
13577 1
                foreach ((array) $special_cases['names'] as &$beginning) {
13578 1
                    if (\strncmp($name, $beginning, \strlen($beginning)) === 0) {
13579 1
                        $continue = true;
13580
13581 1
                        break;
13582
                    }
13583
                }
13584 1
                unset($beginning);
13585
            }
13586
13587 1
            foreach ((array) $special_cases['prefixes'] as &$beginning) {
13588 1
                if (\strncmp($name, $beginning, \strlen($beginning)) === 0) {
13589 1
                    $continue = true;
13590
13591 1
                    break;
13592
                }
13593
            }
13594 1
            unset($beginning);
13595
13596 1
            if ($continue) {
13597 1
                continue;
13598
            }
13599
13600 1
            $name = self::ucfirst($name, $encoding);
13601
        }
13602
13603 1
        return \implode($delimiter, $name_helper_array);
13604
    }
13605
13606
    /**
13607
     * Generic case-sensitive transformation for collation matching.
13608
     *
13609
     * @param string $str <p>The input string</p>
13610
     *
13611
     * @psalm-pure
13612
     *
13613
     * @return string|null
13614
     *
13615
     * @noinspection ReturnTypeCanBeDeclaredInspection
13616
     */
13617 6
    private static function strtonatfold(string $str)
13618
    {
13619 6
        $str = \Normalizer::normalize($str, \Normalizer::NFD);
13620 6
        if ($str === false) {
13621 2
            return '';
13622
        }
13623
13624 6
        return \preg_replace(
13625 6
            '/\p{Mn}+/u',
13626 6
            '',
13627 6
            $str
13628
        );
13629
    }
13630
13631
    /**
13632
     * @param int|string $input
13633
     *
13634
     * @psalm-pure
13635
     *
13636
     * @return string
13637
     *
13638
     * @noinspection ReturnTypeCanBeDeclaredInspection
13639
     */
13640 30
    private static function to_utf8_convert_helper($input)
13641
    {
13642
        // init
13643 30
        $buf = '';
13644
13645 30
        if (self::$ORD === null) {
13646
            self::$ORD = self::getData('ord');
13647
        }
13648
13649 30
        if (self::$CHR === null) {
13650
            self::$CHR = self::getData('chr');
13651
        }
13652
13653 30
        if (self::$WIN1252_TO_UTF8 === null) {
13654 1
            self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
13655
        }
13656
13657 30
        $ordC1 = self::$ORD[$input];
13658 30
        if (isset(self::$WIN1252_TO_UTF8[$ordC1])) { // found in Windows-1252 special cases
13659 30
            $buf .= self::$WIN1252_TO_UTF8[$ordC1];
13660
        } else {
13661
            /** @noinspection OffsetOperationsInspection */
13662 1
            $cc1 = self::$CHR[$ordC1 / 64] | "\xC0";
13663 1
            $cc2 = ((string) $input & "\x3F") | "\x80";
0 ignored issues
show
Bug introduced by
Are you sure you want to use the bitwise & or did you mean &&?
Loading history...
Bug introduced by
Are you sure you want to use the bitwise | or did you mean ||?
Loading history...
13664 1
            $buf .= $cc1 . $cc2;
13665
        }
13666
13667 30
        return $buf;
13668
    }
13669
13670
    /**
13671
     * @param string $str
13672
     *
13673
     * @psalm-pure
13674
     *
13675
     * @return string
13676
     *
13677
     * @noinspection ReturnTypeCanBeDeclaredInspection
13678
     */
13679 9
    private static function urldecode_unicode_helper(string $str)
13680
    {
13681 9
        if (\strpos($str, '%u') === false) {
13682 9
            return $str;
13683
        }
13684
13685 7
        $pattern = '/%u([0-9a-fA-F]{3,4})/';
13686 7
        if (\preg_match($pattern, $str)) {
13687 7
            $str = (string) \preg_replace($pattern, '&#x\\1;', $str);
13688
        }
13689
13690 7
        return $str;
13691
    }
13692
}
13693