Passed
Push — master ( 14681c...4faedd )
by Lars
05:27
created

UTF8::chr_to_decimal()   B

Complexity

Conditions 8
Paths 19

Size

Total Lines 38
Code Lines 20

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 5
CRAP Score 36.3056

Importance

Changes 4
Bugs 2 Features 0
Metric Value
cc 8
eloc 20
c 4
b 2
f 0
nc 19
nop 1
dl 0
loc 38
ccs 5
cts 21
cp 0.2381
crap 36.3056
rs 8.4444
1
<?php
2
3
declare(strict_types=1);
4
5
namespace voku\helper;
6
7
final class UTF8
8
{
9
    /**
10
     * (CRLF|([ZWNJ-ZWJ]|T+|L*(LV?V+|LV|LVT)T*|L+|[^Control])[Extend]*|[Control])
11
     * This regular expression is a work around for http://bugs.exim.org/1279
12
     */
13
    const GRAPHEME_CLUSTER_RX = "(?:\r\n|(?:[ -~\x{200C}\x{200D}]|[ᆨ-ᇹ]+|[ᄀ-ᅟ]*(?:[가개갸걔거게겨계고과괘괴교구궈궤귀규그긔기까깨꺄꺠꺼께껴꼐꼬꽈꽤꾀꾜꾸꿔꿰뀌뀨끄끠끼나내냐냬너네녀녜노놔놰뇌뇨누눠눼뉘뉴느늬니다대댜댸더데뎌뎨도돠돼되됴두둬뒈뒤듀드듸디따때땨떄떠떼뗘뗴또똬뙈뙤뚀뚜뚸뛔뛰뜌뜨띄띠라래랴럐러레려례로롸뢔뢰료루뤄뤠뤼류르릐리마매먀먜머메며몌모뫄뫠뫼묘무뭐뭬뮈뮤므믜미바배뱌뱨버베벼볘보봐봬뵈뵤부붜붸뷔뷰브븨비빠빼뺘뺴뻐뻬뼈뼤뽀뽜뽸뾔뾰뿌뿨쀄쀠쀼쁘쁴삐사새샤섀서세셔셰소솨쇄쇠쇼수숴쉐쉬슈스싀시싸쌔쌰썌써쎄쎠쎼쏘쏴쐐쐬쑈쑤쒀쒜쒸쓔쓰씌씨아애야얘어에여예오와왜외요우워웨위유으의이자재쟈쟤저제져졔조좌좨죄죠주줘줴쥐쥬즈즤지짜째쨔쨰쩌쩨쪄쪠쪼쫘쫴쬐쬬쭈쭤쮀쮜쮸쯔쯰찌차채챠챼처체쳐쳬초촤쵀최쵸추춰췌취츄츠츼치카캐캬컈커케켜켸코콰쾌쾨쿄쿠쿼퀘퀴큐크킈키타태탸턔터테텨톄토톼퇘퇴툐투퉈퉤튀튜트틔티파패퍄퍠퍼페펴폐포퐈퐤푀표푸풔풰퓌퓨프픠피하해햐햬허헤혀혜호화홰회효후훠훼휘휴흐희히]?[ᅠ-ᆢ]+|[가-힣])[ᆨ-ᇹ]*|[ᄀ-ᅟ]+|[^\p{Cc}\p{Cf}\p{Zl}\p{Zp}])[\p{Mn}\p{Me}\x{09BE}\x{09D7}\x{0B3E}\x{0B57}\x{0BBE}\x{0BD7}\x{0CC2}\x{0CD5}\x{0CD6}\x{0D3E}\x{0D57}\x{0DCF}\x{0DDF}\x{200C}\x{200D}\x{1D165}\x{1D16E}-\x{1D172}]*|[\p{Cc}\p{Cf}\p{Zl}\p{Zp}])";
14
15
    /**
16
     * Bom => Byte-Length
17
     *
18
     * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
19
     *
20
     * @var array
21
     */
22
    private static $BOM = [
23
        "\xef\xbb\xbf"     => 3, // UTF-8 BOM
24
        ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
25
        "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
26
        '  þÿ'             => 6, // UTF-32 (BE) BOM as "WINDOWS-1252"
27
        "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
28
        'ÿþ  '             => 6, // UTF-32 (LE) BOM as "WINDOWS-1252"
29
        "\xfe\xff"         => 2, // UTF-16 (BE) BOM
30
        'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
31
        "\xff\xfe"         => 2, // UTF-16 (LE) BOM
32
        'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
33
    ];
34
35
    /**
36
     * Numeric code point => UTF-8 Character
37
     *
38
     * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
39
     *
40
     * @var array
41
     */
42
    private static $WHITESPACE = [
43
        // NUL Byte
44
        0 => "\x0",
45
        // Tab
46
        9 => "\x9",
47
        // New Line
48
        10 => "\xa",
49
        // Vertical Tab
50
        11 => "\xb",
51
        // Carriage Return
52
        13 => "\xd",
53
        // Ordinary Space
54
        32 => "\x20",
55
        // NO-BREAK SPACE
56
        160 => "\xc2\xa0",
57
        // OGHAM SPACE MARK
58
        5760 => "\xe1\x9a\x80",
59
        // MONGOLIAN VOWEL SEPARATOR
60
        6158 => "\xe1\xa0\x8e",
61
        // EN QUAD
62
        8192 => "\xe2\x80\x80",
63
        // EM QUAD
64
        8193 => "\xe2\x80\x81",
65
        // EN SPACE
66
        8194 => "\xe2\x80\x82",
67
        // EM SPACE
68
        8195 => "\xe2\x80\x83",
69
        // THREE-PER-EM SPACE
70
        8196 => "\xe2\x80\x84",
71
        // FOUR-PER-EM SPACE
72
        8197 => "\xe2\x80\x85",
73
        // SIX-PER-EM SPACE
74
        8198 => "\xe2\x80\x86",
75
        // FIGURE SPACE
76
        8199 => "\xe2\x80\x87",
77
        // PUNCTUATION SPACE
78
        8200 => "\xe2\x80\x88",
79
        // THIN SPACE
80
        8201 => "\xe2\x80\x89",
81
        //HAIR SPACE
82
        8202 => "\xe2\x80\x8a",
83
        // LINE SEPARATOR
84
        8232 => "\xe2\x80\xa8",
85
        // PARAGRAPH SEPARATOR
86
        8233 => "\xe2\x80\xa9",
87
        // NARROW NO-BREAK SPACE
88
        8239 => "\xe2\x80\xaf",
89
        // MEDIUM MATHEMATICAL SPACE
90
        8287 => "\xe2\x81\x9f",
91
        // HALFWIDTH HANGUL FILLER
92
        65440 => "\xef\xbe\xa0",
93
        // IDEOGRAPHIC SPACE
94
        12288 => "\xe3\x80\x80",
95
    ];
96
97
    /**
98
     * @var array
99
     */
100
    private static $WHITESPACE_TABLE = [
101
        'SPACE'                     => "\x20",
102
        'NO-BREAK SPACE'            => "\xc2\xa0",
103
        'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
104
        'EN QUAD'                   => "\xe2\x80\x80",
105
        'EM QUAD'                   => "\xe2\x80\x81",
106
        'EN SPACE'                  => "\xe2\x80\x82",
107
        'EM SPACE'                  => "\xe2\x80\x83",
108
        'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
109
        'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
110
        'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
111
        'FIGURE SPACE'              => "\xe2\x80\x87",
112
        'PUNCTUATION SPACE'         => "\xe2\x80\x88",
113
        'THIN SPACE'                => "\xe2\x80\x89",
114
        'HAIR SPACE'                => "\xe2\x80\x8a",
115
        'LINE SEPARATOR'            => "\xe2\x80\xa8",
116
        'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
117
        'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
118
        'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
119
        'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
120
        'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
121
        'HALFWIDTH HANGUL FILLER'   => "\xef\xbe\xa0",
122
    ];
123
124
    /**
125
     * @var array{upper: string[], lower: string[]}
126
     */
127
    private static $COMMON_CASE_FOLD = [
128
        'upper' => [
129
            'µ',
130
            'ſ',
131
            "\xCD\x85",
132
            'ς',
133
            'ẞ',
134
            "\xCF\x90",
135
            "\xCF\x91",
136
            "\xCF\x95",
137
            "\xCF\x96",
138
            "\xCF\xB0",
139
            "\xCF\xB1",
140
            "\xCF\xB5",
141
            "\xE1\xBA\x9B",
142
            "\xE1\xBE\xBE",
143
        ],
144
        'lower' => [
145
            'μ',
146
            's',
147
            'ι',
148
            'σ',
149
            'ß',
150
            'β',
151
            'θ',
152
            'φ',
153
            'π',
154
            'κ',
155
            'ρ',
156
            'ε',
157
            "\xE1\xB9\xA1",
158
            'ι',
159
        ],
160
    ];
161
162
    /**
163
     * @var array
164
     */
165
    private static $SUPPORT = [];
166
167
    /**
168
     * @var array|null
169
     */
170
    private static $BROKEN_UTF8_FIX;
171
172
    /**
173
     * @var array|null
174
     */
175
    private static $WIN1252_TO_UTF8;
176
177
    /**
178
     * @var array|null
179
     */
180
    private static $INTL_TRANSLITERATOR_LIST;
181
182
    /**
183
     * @var array|null
184
     */
185
    private static $ENCODINGS;
186
187
    /**
188
     * @var array|null
189
     */
190
    private static $ORD;
191
192
    /**
193
     * @var array|null
194
     */
195
    private static $EMOJI;
196
197
    /**
198
     * @var array|null
199
     */
200
    private static $EMOJI_VALUES_CACHE;
201
202
    /**
203
     * @var array|null
204
     */
205
    private static $EMOJI_KEYS_CACHE;
206
207
    /**
208
     * @var array|null
209
     */
210
    private static $EMOJI_KEYS_REVERSIBLE_CACHE;
211
212
    /**
213
     * @var array|null
214
     */
215
    private static $CHR;
216
217
    /**
218
     * __construct()
219
     */
220 33
    public function __construct()
221
    {
222 33
    }
223
224
    /**
225
     * Return the character at the specified position: $str[1] like functionality.
226
     *
227
     * @param string $str      <p>A UTF-8 string.</p>
228
     * @param int    $pos      <p>The position of character to return.</p>
229
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
230
     *
231
     * @return string single multi-byte character
232
     */
233 3
    public static function access(string $str, int $pos, string $encoding = 'UTF-8'): string
234
    {
235 3
        if ($str === '' || $pos < 0) {
236 2
            return '';
237
        }
238
239 3
        if ($encoding === 'UTF-8') {
240 3
            return (string) \mb_substr($str, $pos, 1);
241
        }
242
243
        return (string) self::substr($str, $pos, 1, $encoding);
244
    }
245
246
    /**
247
     * Prepends UTF-8 BOM character to the string and returns the whole string.
248
     *
249
     * INFO: If BOM already existed there, the Input string is returned.
250
     *
251
     * @param string $str <p>The input string.</p>
252
     *
253
     * @return string the output string that contains BOM
254
     */
255 2
    public static function add_bom_to_string(string $str): string
256
    {
257 2
        if (self::string_has_bom($str) === false) {
258 2
            $str = self::bom() . $str;
259
        }
260
261 2
        return $str;
262
    }
263
264
    /**
265
     * Changes all keys in an array.
266
     *
267
     * @param array  $array    <p>The array to work on</p>
268
     * @param int    $case     [optional] <p> Either <strong>CASE_UPPER</strong><br>
269
     *                         or <strong>CASE_LOWER</strong> (default)</p>
270
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
271
     *
272
     * @return string[]
273
     *                  <p>An array with its keys lower- or uppercased.</p>
274
     */
275 2
    public static function array_change_key_case(
276
        array $array,
277
        int $case = \CASE_LOWER,
278
        string $encoding = 'UTF-8'
279
    ): array {
280
        if (
281 2
            $case !== \CASE_LOWER
282
            &&
283 2
            $case !== \CASE_UPPER
284
        ) {
285
            $case = \CASE_LOWER;
286
        }
287
288 2
        $return = [];
289 2
        foreach ($array as $key => &$value) {
290 2
            $key = $case === \CASE_LOWER
291 2
                ? self::strtolower((string) $key, $encoding)
292 2
                : self::strtoupper((string) $key, $encoding);
293
294 2
            $return[$key] = $value;
295
        }
296
297 2
        return $return;
298
    }
299
300
    /**
301
     * Returns the substring between $start and $end, if found, or an empty
302
     * string. An optional offset may be supplied from which to begin the
303
     * search for the start string.
304
     *
305
     * @param string $str
306
     * @param string $start    <p>Delimiter marking the start of the substring.</p>
307
     * @param string $end      <p>Delimiter marking the end of the substring.</p>
308
     * @param int    $offset   [optional] <p>Index from which to begin the search. Default: 0</p>
309
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
310
     *
311
     * @return string
312
     */
313 16
    public static function between(
314
        string $str,
315
        string $start,
316
        string $end,
317
        int $offset = 0,
318
        string $encoding = 'UTF-8'
319
    ): string {
320 16
        if ($encoding === 'UTF-8') {
321 8
            $start_position = \mb_strpos($str, $start, $offset);
322 8
            if ($start_position === false) {
323 1
                return '';
324
            }
325
326 7
            $substr_index = $start_position + (int) \mb_strlen($start);
327 7
            $end_position = \mb_strpos($str, $end, $substr_index);
328
            if (
329 7
                $end_position === false
330
                ||
331 7
                $end_position === $substr_index
332
            ) {
333 2
                return '';
334
            }
335
336 5
            return (string) \mb_substr($str, $substr_index, $end_position - $substr_index);
337
        }
338
339 8
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
340
341 8
        $start_position = self::strpos($str, $start, $offset, $encoding);
342 8
        if ($start_position === false) {
343 1
            return '';
344
        }
345
346 7
        $substr_index = $start_position + (int) self::strlen($start, $encoding);
347 7
        $end_position = self::strpos($str, $end, $substr_index, $encoding);
348
        if (
349 7
            $end_position === false
350
            ||
351 7
            $end_position === $substr_index
352
        ) {
353 2
            return '';
354
        }
355
356 5
        return (string) self::substr(
357 5
            $str,
358 5
            $substr_index,
359 5
            $end_position - $substr_index,
360 5
            $encoding
361
        );
362
    }
363
364
    /**
365
     * Convert binary into a string.
366
     *
367
     * @param mixed $bin 1|0
368
     *
369
     * @return string
370
     */
371 2
    public static function binary_to_str($bin): string
372
    {
373 2
        if (!isset($bin[0])) {
374
            return '';
375
        }
376
377 2
        $convert = \base_convert($bin, 2, 16);
378 2
        if ($convert === '0') {
379 1
            return '';
380
        }
381
382 2
        return \pack('H*', $convert);
383
    }
384
385
    /**
386
     * Returns the UTF-8 Byte Order Mark Character.
387
     *
388
     * INFO: take a look at UTF8::$bom for e.g. UTF-16 and UTF-32 BOM values
389
     *
390
     * @return string UTF-8 Byte Order Mark
391
     */
392 4
    public static function bom(): string
393
    {
394 4
        return "\xef\xbb\xbf";
395
    }
396
397
    /**
398
     * @alias of UTF8::chr_map()
399
     *
400
     * @param callable $callback
401
     * @param string   $str
402
     *
403
     * @return string[]
404
     *
405
     * @see UTF8::chr_map()
406
     */
407 2
    public static function callback($callback, string $str): array
408
    {
409 2
        return self::chr_map($callback, $str);
410
    }
411
412
    /**
413
     * Returns the character at $index, with indexes starting at 0.
414
     *
415
     * @param string $str      <p>The input string.</p>
416
     * @param int    $index    <p>Position of the character.</p>
417
     * @param string $encoding [optional] <p>Default is UTF-8</p>
418
     *
419
     * @return string the character at $index
420
     */
421 9
    public static function char_at(string $str, int $index, string $encoding = 'UTF-8'): string
422
    {
423 9
        if ($encoding === 'UTF-8') {
424 5
            return (string) \mb_substr($str, $index, 1);
425
        }
426
427 4
        return (string) self::substr($str, $index, 1, $encoding);
428
    }
429
430
    /**
431
     * Returns an array consisting of the characters in the string.
432
     *
433
     * @param string $str <p>The input string.</p>
434
     *
435
     * @return string[] an array of chars
436
     */
437 3
    public static function chars(string $str): array
438
    {
439 3
        return self::str_split($str);
440
    }
441
442
    /**
443
     * This method will auto-detect your server environment for UTF-8 support.
444
     *
445
     * @return true|null
446
     *
447
     * @internal <p>You don't need to run it manually, it will be triggered if it's needed.</p>
448
     */
449 5
    public static function checkForSupport()
450
    {
451 5
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
452
            self::$SUPPORT['already_checked_via_portable_utf8'] = true;
453
454
            // http://php.net/manual/en/book.mbstring.php
455
            self::$SUPPORT['mbstring'] = self::mbstring_loaded();
456
            self::$SUPPORT['mbstring_func_overload'] = self::mbstring_overloaded();
457
            if (self::$SUPPORT['mbstring'] === true) {
458
                \mb_internal_encoding('UTF-8');
459
                /** @noinspection UnusedFunctionResultInspection */
460
                /** @noinspection PhpComposerExtensionStubsInspection */
461
                \mb_regex_encoding('UTF-8');
462
                self::$SUPPORT['mbstring_internal_encoding'] = 'UTF-8';
463
            }
464
465
            // http://php.net/manual/en/book.iconv.php
466
            self::$SUPPORT['iconv'] = self::iconv_loaded();
467
468
            // http://php.net/manual/en/book.intl.php
469
            self::$SUPPORT['intl'] = self::intl_loaded();
470
471
            // http://php.net/manual/en/class.intlchar.php
472
            self::$SUPPORT['intlChar'] = self::intlChar_loaded();
473
474
            // http://php.net/manual/en/book.ctype.php
475
            self::$SUPPORT['ctype'] = self::ctype_loaded();
476
477
            // http://php.net/manual/en/class.finfo.php
478
            self::$SUPPORT['finfo'] = self::finfo_loaded();
479
480
            // http://php.net/manual/en/book.json.php
481
            self::$SUPPORT['json'] = self::json_loaded();
482
483
            // http://php.net/manual/en/book.pcre.php
484
            self::$SUPPORT['pcre_utf8'] = self::pcre_utf8_support();
485
486
            self::$SUPPORT['symfony_polyfill_used'] = self::symfony_polyfill_used();
487
            if (self::$SUPPORT['symfony_polyfill_used'] === true) {
488
                \mb_internal_encoding('UTF-8');
489
                self::$SUPPORT['mbstring_internal_encoding'] = 'UTF-8';
490
            }
491
492
            return true;
493
        }
494
495 5
        return null;
496
    }
497
498
    /**
499
     * Generates a UTF-8 encoded character from the given code point.
500
     *
501
     * INFO: opposite to UTF8::ord()
502
     *
503
     * @param int|string $code_point <p>The code point for which to generate a character.</p>
504
     * @param string     $encoding   [optional] <p>Default is UTF-8</p>
505
     *
506
     * @return string|null multi-byte character, returns null on failure or empty input
507
     */
508 25
    public static function chr($code_point, string $encoding = 'UTF-8')
509
    {
510
        // init
511 25
        static $CHAR_CACHE = [];
512
513 25
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
514 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
515
        }
516
517
        if (
518 25
            $encoding !== 'UTF-8'
519
            &&
520 25
            $encoding !== 'ISO-8859-1'
521
            &&
522 25
            $encoding !== 'WINDOWS-1252'
523
            &&
524 25
            self::$SUPPORT['mbstring'] === false
525
        ) {
526
            \trigger_error('UTF8::chr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
527
        }
528
529 25
        $cache_key = $code_point . $encoding;
530 25
        if (isset($CHAR_CACHE[$cache_key]) === true) {
531 23
            return $CHAR_CACHE[$cache_key];
532
        }
533
534 14
        if ($code_point <= 127) { // use "simple"-char only until "\x80"
535
536 13
            if (self::$CHR === null) {
537
                self::$CHR = self::getData('chr');
538
            }
539
540
            /**
541
             * @psalm-suppress PossiblyNullArrayAccess
542
             */
543 13
            $chr = self::$CHR[$code_point];
544
545 13
            if ($encoding !== 'UTF-8') {
546 1
                $chr = self::encode($encoding, $chr);
547
            }
548
549 13
            return $CHAR_CACHE[$cache_key] = $chr;
550
        }
551
552
        //
553
        // fallback via "IntlChar"
554
        //
555
556 7
        if (self::$SUPPORT['intlChar'] === true) {
557
            /** @noinspection PhpComposerExtensionStubsInspection */
558 7
            $chr = \IntlChar::chr($code_point);
559
560 7
            if ($encoding !== 'UTF-8') {
561
                $chr = self::encode($encoding, $chr);
562
            }
563
564 7
            return $CHAR_CACHE[$cache_key] = $chr;
565
        }
566
567
        //
568
        // fallback via vanilla php
569
        //
570
571
        if (self::$CHR === null) {
572
            self::$CHR = self::getData('chr');
573
        }
574
575
        $code_point = (int) $code_point;
576
        if ($code_point <= 0x7F) {
577
            /**
578
             * @psalm-suppress PossiblyNullArrayAccess
579
             */
580
            $chr = self::$CHR[$code_point];
581
        } elseif ($code_point <= 0x7FF) {
582
            /**
583
             * @psalm-suppress PossiblyNullArrayAccess
584
             */
585
            $chr = self::$CHR[($code_point >> 6) + 0xC0] .
586
                   self::$CHR[($code_point & 0x3F) + 0x80];
587
        } elseif ($code_point <= 0xFFFF) {
588
            /**
589
             * @psalm-suppress PossiblyNullArrayAccess
590
             */
591
            $chr = self::$CHR[($code_point >> 12) + 0xE0] .
592
                   self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
593
                   self::$CHR[($code_point & 0x3F) + 0x80];
594
        } else {
595
            /**
596
             * @psalm-suppress PossiblyNullArrayAccess
597
             */
598
            $chr = self::$CHR[($code_point >> 18) + 0xF0] .
599
                   self::$CHR[(($code_point >> 12) & 0x3F) + 0x80] .
600
                   self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
601
                   self::$CHR[($code_point & 0x3F) + 0x80];
602
        }
603
604
        if ($encoding !== 'UTF-8') {
605
            $chr = self::encode($encoding, $chr);
606
        }
607
608
        return $CHAR_CACHE[$cache_key] = $chr;
609
    }
610
611
    /**
612
     * Applies callback to all characters of a string.
613
     *
614
     * @param callable $callback <p>The callback function.</p>
615
     * @param string   $str      <p>UTF-8 string to run callback on.</p>
616
     *
617
     * @return string[]
618
     *                  <p>The outcome of the callback, as array.</p>
619
     */
620 2
    public static function chr_map($callback, string $str): array
621
    {
622 2
        return \array_map(
623 2
            $callback,
624 2
            self::str_split($str)
625
        );
626
    }
627
628
    /**
629
     * Generates an array of byte length of each character of a Unicode string.
630
     *
631
     * 1 byte => U+0000  - U+007F
632
     * 2 byte => U+0080  - U+07FF
633
     * 3 byte => U+0800  - U+FFFF
634
     * 4 byte => U+10000 - U+10FFFF
635
     *
636
     * @param string $str <p>The original unicode string.</p>
637
     *
638
     * @return int[] an array of byte lengths of each character
639
     */
640 4
    public static function chr_size_list(string $str): array
641
    {
642 4
        if ($str === '') {
643 4
            return [];
644
        }
645
646 4
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
647
            return \array_map(
648
                static function (string $data): int {
649
                    // "mb_" is available if overload is used, so use it ...
650
                    return \mb_strlen($data, 'CP850'); // 8-BIT
651
                },
652
                self::str_split($str)
653
            );
654
        }
655
656 4
        return \array_map('\strlen', self::str_split($str));
657
    }
658
659
    /**
660
     * Get a decimal code representation of a specific character.
661
     *
662
     * @param string $char <p>The input character.</p>
663
     *
664
     * @return int
665
     */
666 4
    public static function chr_to_decimal(string $char): int
667
    {
668 4
        if (self::$SUPPORT['iconv'] === true) {
669 4
            $chr_tmp = \iconv('UTF-8', 'UCS-4LE', $char);
670 4
            if ($chr_tmp !== false) {
671
                /** @noinspection OffsetOperationsInspection */
672 4
                return \unpack('V', $chr_tmp)[1];
673
            }
674
        }
675
676
        $code = self::ord($char[0]);
677
        $bytes = 1;
678
679
        if (!($code & 0x80)) {
680
            // 0xxxxxxx
681
            return $code;
682
        }
683
684
        if (($code & 0xe0) === 0xc0) {
685
            // 110xxxxx
686
            $bytes = 2;
687
            $code &= ~0xc0;
688
        } elseif (($code & 0xf0) === 0xe0) {
689
            // 1110xxxx
690
            $bytes = 3;
691
            $code &= ~0xe0;
692
        } elseif (($code & 0xf8) === 0xf0) {
693
            // 11110xxx
694
            $bytes = 4;
695
            $code &= ~0xf0;
696
        }
697
698
        for ($i = 2; $i <= $bytes; ++$i) {
699
            // 10xxxxxx
700
            $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
701
        }
702
703
        return $code;
704
    }
705
706
    /**
707
     * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
708
     *
709
     * @param int|string $char   <p>The input character</p>
710
     * @param string     $prefix [optional]
711
     *
712
     * @return string The code point encoded as U+xxxx
713
     */
714 2
    public static function chr_to_hex($char, string $prefix = 'U+'): string
715
    {
716 2
        if ($char === '') {
717 2
            return '';
718
        }
719
720 2
        if ($char === '&#0;') {
721 2
            $char = '';
722
        }
723
724 2
        return self::int_to_hex(self::ord((string) $char), $prefix);
725
    }
726
727
    /**
728
     * alias for "UTF8::chr_to_decimal()"
729
     *
730
     * @param string $chr
731
     *
732
     * @return int
733
     *
734
     * @see UTF8::chr_to_decimal()
735
     * @deprecated <p>please use "UTF8::chr_to_decimal()"</p>
736
     */
737 2
    public static function chr_to_int(string $chr): int
738
    {
739 2
        return self::chr_to_decimal($chr);
740
    }
741
742
    /**
743
     * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
744
     *
745
     * @param string $body         <p>The original string to be split.</p>
746
     * @param int    $chunk_length [optional] <p>The maximum character length of a chunk.</p>
747
     * @param string $end          [optional] <p>The character(s) to be inserted at the end of each chunk.</p>
748
     *
749
     * @return string the chunked string
750
     */
751 4
    public static function chunk_split(string $body, int $chunk_length = 76, string $end = "\r\n"): string
752
    {
753 4
        return \implode($end, self::str_split($body, $chunk_length));
754
    }
755
756
    /**
757
     * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
758
     *
759
     * @param string $str                           <p>The string to be sanitized.</p>
760
     * @param bool   $remove_bom                    [optional] <p>Set to true, if you need to remove UTF-BOM.</p>
761
     * @param bool   $normalize_whitespace          [optional] <p>Set to true, if you need to normalize the
762
     *                                              whitespace.</p>
763
     * @param bool   $normalize_msword              [optional] <p>Set to true, if you need to normalize MS Word chars
764
     *                                              e.g.: "…"
765
     *                                              => "..."</p>
766
     * @param bool   $keep_non_breaking_space       [optional] <p>Set to true, to keep non-breaking-spaces, in
767
     *                                              combination with
768
     *                                              $normalize_whitespace</p>
769
     * @param bool   $replace_diamond_question_mark [optional] <p>Set to true, if you need to remove diamond question
770
     *                                              mark e.g.: "�"</p>
771
     * @param bool   $remove_invisible_characters   [optional] <p>Set to false, if you not want to remove invisible
772
     *                                              characters e.g.: "\0"</p>
773
     *
774
     * @return string clean UTF-8 encoded string
775
     */
776 87
    public static function clean(
777
        string $str,
778
        bool $remove_bom = false,
779
        bool $normalize_whitespace = false,
780
        bool $normalize_msword = false,
781
        bool $keep_non_breaking_space = false,
782
        bool $replace_diamond_question_mark = false,
783
        bool $remove_invisible_characters = true
784
    ): string {
785
        // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
786
        // caused connection reset problem on larger strings
787
788 87
        $regex = '/
789
          (
790
            (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
791
            |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
792
            |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
793
            |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
794
            ){1,100}                      # ...one or more times
795
          )
796
        | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
797
        | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
798
        /x';
799
        /** @noinspection NotOptimalRegularExpressionsInspection */
800 87
        $str = (string) \preg_replace($regex, '$1', $str);
801
802 87
        if ($replace_diamond_question_mark === true) {
803 33
            $str = self::replace_diamond_question_mark($str, '');
804
        }
805
806 87
        if ($remove_invisible_characters === true) {
807 87
            $str = self::remove_invisible_characters($str);
808
        }
809
810 87
        if ($normalize_whitespace === true) {
811 37
            $str = self::normalize_whitespace($str, $keep_non_breaking_space);
812
        }
813
814 87
        if ($normalize_msword === true) {
815 4
            $str = self::normalize_msword($str);
816
        }
817
818 87
        if ($remove_bom === true) {
819 37
            $str = self::remove_bom($str);
820
        }
821
822 87
        return $str;
823
    }
824
825
    /**
826
     * Clean-up a string and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
827
     *
828
     * @param string $str <p>The input string.</p>
829
     *
830
     * @return string
831
     */
832 33
    public static function cleanup($str): string
833
    {
834
        // init
835 33
        $str = (string) $str;
836
837 33
        if ($str === '') {
838 5
            return '';
839
        }
840
841
        // fixed ISO <-> UTF-8 Errors
842 33
        $str = self::fix_simple_utf8($str);
843
844
        // remove all none UTF-8 symbols
845
        // && remove diamond question mark (�)
846
        // && remove remove invisible characters (e.g. "\0")
847
        // && remove BOM
848
        // && normalize whitespace chars (but keep non-breaking-spaces)
849 33
        return self::clean(
850 33
            $str,
851 33
            true,
852 33
            true,
853 33
            false,
854 33
            true,
855 33
            true,
856 33
            true
857
        );
858
    }
859
860
    /**
861
     * Accepts a string or a array of strings and returns an array of Unicode code points.
862
     *
863
     * INFO: opposite to UTF8::string()
864
     *
865
     * @param string|string[] $arg     <p>A UTF-8 encoded string or an array of such strings.</p>
866
     * @param bool            $u_style <p>If True, will return code points in U+xxxx format,
867
     *                                 default, code points will be returned as integers.</p>
868
     *
869
     * @return array<int|string>
870
     *                           The array of code points:<br>
871
     *                           array<int> for $u_style === false<br>
872
     *                           array<string> for $u_style === true<br>
873
     */
874 12
    public static function codepoints($arg, bool $u_style = false): array
875
    {
876 12
        if (\is_string($arg) === true) {
877 12
            $arg = self::str_split($arg);
878
        }
879
880 12
        if (!\is_array($arg)) {
0 ignored issues
show
introduced by
The condition is_array($arg) is always true.
Loading history...
881 4
            return [];
882
        }
883
884 12
        if ($arg === []) {
885 7
            return [];
886
        }
887
888 11
        $arg = \array_map(
889
            [
890 11
                self::class,
891
                'ord',
892
            ],
893 11
            $arg
894
        );
895
896 11
        if ($u_style === true) {
897 2
            $arg = \array_map(
898
                [
899 2
                    self::class,
900
                    'int_to_hex',
901
                ],
902 2
                $arg
903
            );
904
        }
905
906 11
        return $arg;
907
    }
908
909
    /**
910
     * Trims the string and replaces consecutive whitespace characters with a
911
     * single space. This includes tabs and newline characters, as well as
912
     * multibyte whitespace such as the thin space and ideographic space.
913
     *
914
     * @param string $str <p>The input string.</p>
915
     *
916
     * @return string string with a trimmed $str and condensed whitespace
917
     */
918 13
    public static function collapse_whitespace(string $str): string
919
    {
920 13
        if (self::$SUPPORT['mbstring'] === true) {
921
            /** @noinspection PhpComposerExtensionStubsInspection */
922 13
            return \trim((string) \mb_ereg_replace('[[:space:]]+', ' ', $str));
923
        }
924
925
        return \trim(self::regex_replace($str, '[[:space:]]+', ' '));
926
    }
927
928
    /**
929
     * Returns count of characters used in a string.
930
     *
931
     * @param string $str                     <p>The input string.</p>
932
     * @param bool   $clean_utf8              [optional] <p>Remove non UTF-8 chars from the string.</p>
933
     * @param bool   $try_to_use_mb_functions [optional] <p>Set to false, if you don't want to use
934
     *
935
     * @return int[] an associative array of Character as keys and
936
     *               their count as values
937
     */
938 19
    public static function count_chars(
939
        string $str,
940
        bool $clean_utf8 = false,
941
        bool $try_to_use_mb_functions = true
942
    ): array {
943 19
        return \array_count_values(
944 19
            self::str_split(
945 19
                $str,
946 19
                1,
947 19
                $clean_utf8,
948 19
                $try_to_use_mb_functions
949
            )
950
        );
951
    }
952
953
    /**
954
     * Remove css media-queries.
955
     *
956
     * @param string $str
957
     *
958
     * @return string
959
     */
960 1
    public static function css_stripe_media_queries(string $str): string
961
    {
962 1
        return (string) \preg_replace(
963 1
            '#@media\\s+(?:only\\s)?(?:[\\s{(]|screen|all)\\s?[^{]+{.*}\\s*}\\s*#isumU',
964 1
            '',
965 1
            $str
966
        );
967
    }
968
969
    /**
970
     * Checks whether ctype is available on the server.
971
     *
972
     * @return bool
973
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
974
     */
975
    public static function ctype_loaded(): bool
976
    {
977
        return \extension_loaded('ctype');
978
    }
979
980
    /**
981
     * Converts an int value into a UTF-8 character.
982
     *
983
     * @param mixed $int
984
     *
985
     * @return string
986
     */
987 19
    public static function decimal_to_chr($int): string
988
    {
989 19
        return self::html_entity_decode('&#' . $int . ';', \ENT_QUOTES | \ENT_HTML5);
990
    }
991
992
    /**
993
     * Decodes a MIME header field
994
     *
995
     * @param string $str
996
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
997
     *
998
     * @return false|string
999
     *                      A decoded MIME field on success,
1000
     *                      or false if an error occurs during the decoding
1001
     */
1002
    public static function decode_mimeheader($str, string $encoding = 'UTF-8')
1003
    {
1004
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
1005
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
1006
        }
1007
1008
        if (self::$SUPPORT['iconv'] === true) {
1009
            return \iconv_mime_decode($str, \ICONV_MIME_DECODE_CONTINUE_ON_ERROR, $encoding);
1010
        }
1011
1012
        if ($encoding !== 'UTF-8') {
1013
            $str = self::encode($encoding, $str);
1014
        }
1015
1016
        return \mb_decode_mimeheader($str);
1017
    }
1018
1019
    /**
1020
     * Decodes a string which was encoded by "UTF8::emoji_encode()".
1021
     *
1022
     * @param string $str                            <p>The input string.</p>
1023
     * @param bool   $use_reversible_string_mappings [optional] <p>
1024
     *                                               When <b>TRUE</b>, we se a reversible string mapping
1025
     *                                               between "emoji_encode" and "emoji_decode".</p>
1026
     *
1027
     * @return string
1028
     */
1029 9
    public static function emoji_decode(
1030
        string $str,
1031
        bool $use_reversible_string_mappings = false
1032
    ): string {
1033 9
        self::initEmojiData();
1034
1035 9
        if ($use_reversible_string_mappings === true) {
1036 9
            return (string) \str_replace(
1037 9
                (array) self::$EMOJI_KEYS_REVERSIBLE_CACHE,
1038 9
                (array) self::$EMOJI_VALUES_CACHE,
1039 9
                $str
1040
            );
1041
        }
1042
1043 1
        return (string) \str_replace(
1044 1
            (array) self::$EMOJI_KEYS_CACHE,
1045 1
            (array) self::$EMOJI_VALUES_CACHE,
1046 1
            $str
1047
        );
1048
    }
1049
1050
    /**
1051
     * Encode a string with emoji chars into a non-emoji string.
1052
     *
1053
     * @param string $str                            <p>The input string</p>
1054
     * @param bool   $use_reversible_string_mappings [optional] <p>
1055
     *                                               when <b>TRUE</b>, we se a reversible string mapping
1056
     *                                               between "emoji_encode" and "emoji_decode"</p>
1057
     *
1058
     * @return string
1059
     */
1060 12
    public static function emoji_encode(
1061
        string $str,
1062
        bool $use_reversible_string_mappings = false
1063
    ): string {
1064 12
        self::initEmojiData();
1065
1066 12
        if ($use_reversible_string_mappings === true) {
1067 9
            return (string) \str_replace(
1068 9
                (array) self::$EMOJI_VALUES_CACHE,
1069 9
                (array) self::$EMOJI_KEYS_REVERSIBLE_CACHE,
1070 9
                $str
1071
            );
1072
        }
1073
1074 4
        return (string) \str_replace(
1075 4
            (array) self::$EMOJI_VALUES_CACHE,
1076 4
            (array) self::$EMOJI_KEYS_CACHE,
1077 4
            $str
1078
        );
1079
    }
1080
1081
    /**
1082
     * Encode a string with a new charset-encoding.
1083
     *
1084
     * INFO:  This function will also try to fix broken / double encoding,
1085
     *        so you can call this function also on a UTF-8 string and you don't mess up the string.
1086
     *
1087
     * @param string $to_encoding                   <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.</p>
1088
     * @param string $str                           <p>The input string</p>
1089
     * @param bool   $auto_detect_the_from_encoding [optional] <p>Force the new encoding (we try to fix broken / double
1090
     *                                              encoding for UTF-8)<br> otherwise we auto-detect the current
1091
     *                                              string-encoding</p>
1092
     * @param string $from_encoding                 [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
1093
     *                                              A empty string will trigger the autodetect anyway.</p>
1094
     *
1095
     * @return string
1096
     *
1097
     * @psalm-suppress InvalidReturnStatement
1098
     */
1099 28
    public static function encode(
1100
        string $to_encoding,
1101
        string $str,
1102
        bool $auto_detect_the_from_encoding = true,
1103
        string $from_encoding = ''
1104
    ): string {
1105 28
        if ($str === '' || $to_encoding === '') {
1106 13
            return $str;
1107
        }
1108
1109 28
        if ($to_encoding !== 'UTF-8' && $to_encoding !== 'CP850') {
1110 7
            $to_encoding = self::normalize_encoding($to_encoding, 'UTF-8');
1111
        }
1112
1113 28
        if ($from_encoding && $from_encoding !== 'UTF-8' && $from_encoding !== 'CP850') {
1114 2
            $from_encoding = self::normalize_encoding($from_encoding, null);
1115
        }
1116
1117
        if (
1118 28
            $to_encoding
1119
            &&
1120 28
            $from_encoding
1121
            &&
1122 28
            $from_encoding === $to_encoding
1123
        ) {
1124
            return $str;
1125
        }
1126
1127 28
        if ($to_encoding === 'JSON') {
1128 1
            $return = self::json_encode($str);
1129 1
            if ($return === false) {
1130
                throw new \InvalidArgumentException('The input string [' . $str . '] can not be used for json_encode().');
1131
            }
1132
1133 1
            return $return;
1134
        }
1135 28
        if ($from_encoding === 'JSON') {
1136 1
            $str = self::json_decode($str);
1137 1
            $from_encoding = '';
1138
        }
1139
1140 28
        if ($to_encoding === 'BASE64') {
1141 2
            return \base64_encode($str);
1142
        }
1143 28
        if ($from_encoding === 'BASE64') {
1144 2
            $str = \base64_decode($str, true);
1145 2
            $from_encoding = '';
1146
        }
1147
1148 28
        if ($to_encoding === 'HTML-ENTITIES') {
1149 2
            return self::html_encode($str, true, 'UTF-8');
1150
        }
1151 28
        if ($from_encoding === 'HTML-ENTITIES') {
1152 2
            $str = self::html_entity_decode($str, \ENT_COMPAT, 'UTF-8');
1153 2
            $from_encoding = '';
1154
        }
1155
1156 28
        $from_encoding_auto_detected = false;
1157
        if (
1158 28
            $auto_detect_the_from_encoding === true
1159
            ||
1160 28
            !$from_encoding
1161
        ) {
1162 28
            $from_encoding_auto_detected = self::str_detect_encoding($str);
1163
        }
1164
1165
        // DEBUG
1166
        //var_dump($to_encoding, $from_encoding, $from_encoding_auto_detected, $str, "\n\n");
1167
1168 28
        if ($from_encoding_auto_detected !== false) {
1169
            /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
1170 24
            $from_encoding = $from_encoding_auto_detected;
1171 7
        } elseif ($auto_detect_the_from_encoding === true) {
1172
            // fallback for the "autodetect"-mode
1173 7
            return self::to_utf8($str);
1174
        }
1175
1176
        if (
1177 24
            !$from_encoding
1178
            ||
1179 24
            $from_encoding === $to_encoding
1180
        ) {
1181 15
            return $str;
1182
        }
1183
1184
        if (
1185 19
            $to_encoding === 'UTF-8'
1186
            &&
1187
            (
1188 17
                $from_encoding === 'WINDOWS-1252'
1189
                ||
1190 19
                $from_encoding === 'ISO-8859-1'
1191
            )
1192
        ) {
1193 13
            return self::to_utf8($str);
1194
        }
1195
1196
        if (
1197 12
            $to_encoding === 'ISO-8859-1'
1198
            &&
1199
            (
1200 6
                $from_encoding === 'WINDOWS-1252'
1201
                ||
1202 12
                $from_encoding === 'UTF-8'
1203
            )
1204
        ) {
1205 6
            return self::to_iso8859($str);
1206
        }
1207
1208
        if (
1209 10
            $to_encoding !== 'UTF-8'
1210
            &&
1211 10
            $to_encoding !== 'ISO-8859-1'
1212
            &&
1213 10
            $to_encoding !== 'WINDOWS-1252'
1214
            &&
1215 10
            self::$SUPPORT['mbstring'] === false
1216
        ) {
1217
            \trigger_error('UTF8::encode() without mbstring cannot handle "' . $to_encoding . '" encoding', \E_USER_WARNING);
1218
        }
1219
1220 10
        if (self::$SUPPORT['mbstring'] === true) {
1221
            // warning: do not use the symfony polyfill here
1222 10
            $str_encoded = \mb_convert_encoding(
1223 10
                $str,
1224 10
                $to_encoding,
1225 10
                $from_encoding
1226
            );
1227
1228 10
            if ($str_encoded) {
1229 10
                return $str_encoded;
1230
            }
1231
        }
1232
1233
        $return = \iconv($from_encoding, $to_encoding, $str);
1234
        if ($return !== false) {
1235
            return $return;
1236
        }
1237
1238
        return $str;
1239
    }
1240
1241
    /**
1242
     * @param string $str
1243
     * @param string $from_charset      [optional] <p>Set the input charset.</p>
1244
     * @param string $to_charset        [optional] <p>Set the output charset.</p>
1245
     * @param string $transfer_encoding [optional] <p>Set the transfer encoding.</p>
1246
     * @param string $linefeed          [optional] <p>Set the used linefeed.</p>
1247
     * @param int    $indent            [optional] <p>Set the max length indent.</p>
1248
     *
1249
     * @return false|string
1250
     *                      <p>An encoded MIME field on success,
1251
     *                      or false if an error occurs during the encoding.</p>
1252
     */
1253
    public static function encode_mimeheader(
1254
        $str,
1255
        $from_charset = 'UTF-8',
1256
        $to_charset = 'UTF-8',
1257
        $transfer_encoding = 'Q',
1258
        $linefeed = '\\r\\n',
1259
        $indent = 76
1260
    ) {
1261
        if ($from_charset !== 'UTF-8' && $from_charset !== 'CP850') {
1262
            $from_charset = self::normalize_encoding($from_charset, 'UTF-8');
1263
        }
1264
1265
        if ($to_charset !== 'UTF-8' && $to_charset !== 'CP850') {
1266
            $to_charset = self::normalize_encoding($to_charset, 'UTF-8');
1267
        }
1268
1269
        return \iconv_mime_encode(
1270
            '',
1271
            $str,
1272
            [
1273
                'scheme'           => $transfer_encoding,
1274
                'line-length'      => $indent,
1275
                'input-charset'    => $from_charset,
1276
                'output-charset'   => $to_charset,
1277
                'line-break-chars' => $linefeed,
1278
            ]
1279
        );
1280
    }
1281
1282
    /**
1283
     * Create an extract from a sentence, so if the search-string was found, it try to centered in the output.
1284
     *
1285
     * @param string   $str                       <p>The input string.</p>
1286
     * @param string   $search                    <p>The searched string.</p>
1287
     * @param int|null $length                    [optional] <p>Default: null === text->length / 2</p>
1288
     * @param string   $replacer_for_skipped_text [optional] <p>Default: …</p>
1289
     * @param string   $encoding                  [optional] <p>Set the charset for e.g. "mb_" function</p>
1290
     *
1291
     * @return string
1292
     */
1293 1
    public static function extract_text(
1294
        string $str,
1295
        string $search = '',
1296
        int $length = null,
1297
        string $replacer_for_skipped_text = '…',
1298
        string $encoding = 'UTF-8'
1299
    ): string {
1300 1
        if ($str === '') {
1301 1
            return '';
1302
        }
1303
1304 1
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
1305
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
1306
        }
1307
1308 1
        $trim_chars = "\t\r\n -_()!~?=+/*\\,.:;\"'[]{}`&";
1309
1310 1
        if ($length === null) {
1311 1
            $length = (int) \round((int) self::strlen($str, $encoding) / 2, 0);
1312
        }
1313
1314 1
        if ($search === '') {
1315 1
            if ($encoding === 'UTF-8') {
1316 1
                if ($length > 0) {
1317 1
                    $string_length = (int) \mb_strlen($str);
1318 1
                    $end = ($length - 1) > $string_length ? $string_length : ($length - 1);
1319
                } else {
1320 1
                    $end = 0;
1321
                }
1322
1323 1
                $pos = (int) \min(
1324 1
                    \mb_strpos($str, ' ', $end),
1325 1
                    \mb_strpos($str, '.', $end)
1326
                );
1327
            } else {
1328
                if ($length > 0) {
1329
                    $string_length = (int) self::strlen($str, $encoding);
1330
                    $end = ($length - 1) > $string_length ? $string_length : ($length - 1);
1331
                } else {
1332
                    $end = 0;
1333
                }
1334
1335
                $pos = (int) \min(
1336
                    self::strpos($str, ' ', $end, $encoding),
1337
                    self::strpos($str, '.', $end, $encoding)
1338
                );
1339
            }
1340
1341 1
            if ($pos) {
1342 1
                if ($encoding === 'UTF-8') {
1343 1
                    $str_sub = \mb_substr($str, 0, $pos);
1344
                } else {
1345
                    $str_sub = self::substr($str, 0, $pos, $encoding);
1346
                }
1347
1348 1
                if ($str_sub === false) {
1349
                    return '';
1350
                }
1351
1352 1
                return \rtrim($str_sub, $trim_chars) . $replacer_for_skipped_text;
1353
            }
1354
1355
            return $str;
1356
        }
1357
1358 1
        if ($encoding === 'UTF-8') {
1359 1
            $word_position = (int) \mb_stripos($str, $search);
1360 1
            $half_side = (int) ($word_position - $length / 2 + (int) \mb_strlen($search) / 2);
1361
        } else {
1362
            $word_position = (int) self::stripos($str, $search, 0, $encoding);
1363
            $half_side = (int) ($word_position - $length / 2 + (int) self::strlen($search, $encoding) / 2);
1364
        }
1365
1366 1
        $pos_start = 0;
1367 1
        if ($half_side > 0) {
1368 1
            if ($encoding === 'UTF-8') {
1369 1
                $half_text = \mb_substr($str, 0, $half_side);
1370
            } else {
1371
                $half_text = self::substr($str, 0, $half_side, $encoding);
1372
            }
1373 1
            if ($half_text !== false) {
1374 1
                if ($encoding === 'UTF-8') {
1375 1
                    $pos_start = (int) \max(
1376 1
                        \mb_strrpos($half_text, ' '),
1377 1
                        \mb_strrpos($half_text, '.')
1378
                    );
1379
                } else {
1380
                    $pos_start = (int) \max(
1381
                        self::strrpos($half_text, ' ', 0, $encoding),
1382
                        self::strrpos($half_text, '.', 0, $encoding)
1383
                    );
1384
                }
1385
            }
1386
        }
1387
1388 1
        if ($word_position && $half_side > 0) {
1389 1
            $offset = $pos_start + $length - 1;
1390 1
            $real_length = (int) self::strlen($str, $encoding);
1391
1392 1
            if ($offset > $real_length) {
1393
                $offset = $real_length;
1394
            }
1395
1396 1
            if ($encoding === 'UTF-8') {
1397 1
                $pos_end = (int) \min(
1398 1
                    \mb_strpos($str, ' ', $offset),
1399 1
                    \mb_strpos($str, '.', $offset)
1400 1
                ) - $pos_start;
1401
            } else {
1402
                $pos_end = (int) \min(
1403
                    self::strpos($str, ' ', $offset, $encoding),
1404
                    self::strpos($str, '.', $offset, $encoding)
1405
                ) - $pos_start;
1406
            }
1407
1408 1
            if (!$pos_end || $pos_end <= 0) {
1409 1
                if ($encoding === 'UTF-8') {
1410 1
                    $str_sub = \mb_substr($str, $pos_start, (int) \mb_strlen($str));
1411
                } else {
1412
                    $str_sub = self::substr($str, $pos_start, (int) self::strlen($str, $encoding), $encoding);
1413
                }
1414 1
                if ($str_sub !== false) {
1415 1
                    $extract = $replacer_for_skipped_text . \ltrim($str_sub, $trim_chars);
1416
                } else {
1417 1
                    $extract = '';
1418
                }
1419
            } else {
1420 1
                if ($encoding === 'UTF-8') {
1421 1
                    $str_sub = \mb_substr($str, $pos_start, $pos_end);
1422
                } else {
1423
                    $str_sub = self::substr($str, $pos_start, $pos_end, $encoding);
1424
                }
1425 1
                if ($str_sub !== false) {
1426 1
                    $extract = $replacer_for_skipped_text . \trim($str_sub, $trim_chars) . $replacer_for_skipped_text;
1427
                } else {
1428 1
                    $extract = '';
1429
                }
1430
            }
1431
        } else {
1432 1
            $offset = $length - 1;
1433 1
            $true_length = (int) self::strlen($str, $encoding);
1434
1435 1
            if ($offset > $true_length) {
1436
                $offset = $true_length;
1437
            }
1438
1439 1
            if ($encoding === 'UTF-8') {
1440 1
                $pos_end = (int) \min(
1441 1
                    \mb_strpos($str, ' ', $offset),
1442 1
                    \mb_strpos($str, '.', $offset)
1443
                );
1444
            } else {
1445
                $pos_end = (int) \min(
1446
                    self::strpos($str, ' ', $offset, $encoding),
1447
                    self::strpos($str, '.', $offset, $encoding)
1448
                );
1449
            }
1450
1451 1
            if ($pos_end) {
1452 1
                if ($encoding === 'UTF-8') {
1453 1
                    $str_sub = \mb_substr($str, 0, $pos_end);
1454
                } else {
1455
                    $str_sub = self::substr($str, 0, $pos_end, $encoding);
1456
                }
1457 1
                if ($str_sub !== false) {
1458 1
                    $extract = \rtrim($str_sub, $trim_chars) . $replacer_for_skipped_text;
1459
                } else {
1460 1
                    $extract = '';
1461
                }
1462
            } else {
1463 1
                $extract = $str;
1464
            }
1465
        }
1466
1467 1
        return $extract;
1468
    }
1469
1470
    /**
1471
     * Reads entire file into a string.
1472
     *
1473
     * WARNING: Do not use UTF-8 Option ($convert_to_utf8) for binary files (e.g.: images) !!!
1474
     *
1475
     * @see http://php.net/manual/en/function.file-get-contents.php
1476
     *
1477
     * @param string        $filename         <p>
1478
     *                                        Name of the file to read.
1479
     *                                        </p>
1480
     * @param bool          $use_include_path [optional] <p>
1481
     *                                        Prior to PHP 5, this parameter is called
1482
     *                                        use_include_path and is a bool.
1483
     *                                        As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
1484
     *                                        to trigger include path
1485
     *                                        search.
1486
     *                                        </p>
1487
     * @param resource|null $context          [optional] <p>
1488
     *                                        A valid context resource created with
1489
     *                                        stream_context_create. If you don't need to use a
1490
     *                                        custom context, you can skip this parameter by &null;.
1491
     *                                        </p>
1492
     * @param int|null      $offset           [optional] <p>
1493
     *                                        The offset where the reading starts.
1494
     *                                        </p>
1495
     * @param int|null      $max_length       [optional] <p>
1496
     *                                        Maximum length of data read. The default is to read until end
1497
     *                                        of file is reached.
1498
     *                                        </p>
1499
     * @param int           $timeout          <p>The time in seconds for the timeout.</p>
1500
     * @param bool          $convert_to_utf8  <strong>WARNING!!!</strong> <p>Maybe you can't use this option for
1501
     *                                        some files, because they used non default utf-8 chars. Binary files
1502
     *                                        like images or pdf will not be converted.</p>
1503
     * @param string        $from_encoding    [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
1504
     *                                        A empty string will trigger the autodetect anyway.</p>
1505
     *
1506
     * @return false|string
1507
     *                      <p>The function returns the read data as string or <b>false</b> on failure.</p>
1508
     */
1509 12
    public static function file_get_contents(
1510
        string $filename,
1511
        bool $use_include_path = false,
1512
        $context = null,
1513
        int $offset = null,
1514
        int $max_length = null,
1515
        int $timeout = 10,
1516
        bool $convert_to_utf8 = true,
1517
        string $from_encoding = ''
1518
    ) {
1519
        // init
1520 12
        $filename = \filter_var($filename, \FILTER_SANITIZE_STRING);
1521
        /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
1522 12
        if ($filename === false) {
1523
            return false;
1524
        }
1525
1526 12
        if ($timeout && $context === null) {
1527 9
            $context = \stream_context_create(
1528
                [
1529
                    'http' => [
1530 9
                        'timeout' => $timeout,
1531
                    ],
1532
                ]
1533
            );
1534
        }
1535
1536 12
        if ($offset === null) {
1537 12
            $offset = 0;
1538
        }
1539
1540 12
        if (\is_int($max_length) === true) {
1541 2
            $data = \file_get_contents($filename, $use_include_path, $context, $offset, $max_length);
1542
        } else {
1543 12
            $data = \file_get_contents($filename, $use_include_path, $context, $offset);
1544
        }
1545
1546
        // return false on error
1547 12
        if ($data === false) {
1548
            return false;
1549
        }
1550
1551 12
        if ($convert_to_utf8 === true) {
1552
            if (
1553 12
                self::is_binary($data, true) !== true
1554
                ||
1555 9
                self::is_utf16($data, false) !== false
1556
                ||
1557 12
                self::is_utf32($data, false) !== false
1558
            ) {
1559 9
                $data = self::encode('UTF-8', $data, false, $from_encoding);
1560 9
                $data = self::cleanup($data);
1561
            }
1562
        }
1563
1564 12
        return $data;
1565
    }
1566
1567
    /**
1568
     * Checks if a file starts with BOM (Byte Order Mark) character.
1569
     *
1570
     * @param string $file_path <p>Path to a valid file.</p>
1571
     *
1572
     * @throws \RuntimeException if file_get_contents() returned false
1573
     *
1574
     * @return bool
1575
     *              <p><strong>true</strong> if the file has BOM at the start, <strong>false</strong> otherwise</p>
1576
     */
1577 2
    public static function file_has_bom(string $file_path): bool
1578
    {
1579 2
        $file_content = \file_get_contents($file_path);
1580 2
        if ($file_content === false) {
1581
            throw new \RuntimeException('file_get_contents() returned false for:' . $file_path);
1582
        }
1583
1584 2
        return self::string_has_bom($file_content);
1585
    }
1586
1587
    /**
1588
     * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1589
     *
1590
     * @param mixed  $var
1591
     * @param int    $normalization_form
1592
     * @param string $leading_combining
1593
     *
1594
     * @return mixed
1595
     */
1596 62
    public static function filter(
1597
        $var,
1598
        int $normalization_form = \Normalizer::NFC,
1599
        string $leading_combining = '◌'
1600
    ) {
1601 62
        switch (\gettype($var)) {
1602 62
            case 'array':
1603
                /** @noinspection ForeachSourceInspection */
1604 6
                foreach ($var as $k => &$v) {
1605 6
                    $v = self::filter($v, $normalization_form, $leading_combining);
1606
                }
1607 6
                unset($v);
1608
1609 6
                break;
1610 62
            case 'object':
1611
                /** @noinspection ForeachSourceInspection */
1612 4
                foreach ($var as $k => &$v) {
1613 4
                    $v = self::filter($v, $normalization_form, $leading_combining);
1614
                }
1615 4
                unset($v);
1616
1617 4
                break;
1618 62
            case 'string':
1619
1620 62
                if (\strpos($var, "\r") !== false) {
1621
                    // Workaround https://bugs.php.net/65732
1622 3
                    $var = self::normalize_line_ending($var);
1623
                }
1624
1625 62
                if (ASCII::is_ascii($var) === false) {
1626 32
                    if (\Normalizer::isNormalized($var, $normalization_form)) {
1627 27
                        $n = '-';
1628
                    } else {
1629 12
                        $n = \Normalizer::normalize($var, $normalization_form);
1630
1631 12
                        if (isset($n[0])) {
1632 7
                            $var = $n;
1633
                        } else {
1634 8
                            $var = self::encode('UTF-8', $var, true);
1635
                        }
1636
                    }
1637
1638
                    if (
1639 32
                        $var[0] >= "\x80"
1640
                        &&
1641 32
                        isset($n[0], $leading_combining[0])
1642
                        &&
1643 32
                        \preg_match('/^\\p{Mn}/u', $var)
1644
                    ) {
1645
                        // Prevent leading combining chars
1646
                        // for NFC-safe concatenations.
1647 3
                        $var = $leading_combining . $var;
1648
                    }
1649
                }
1650
1651 62
                break;
1652
        }
1653
1654 62
        return $var;
1655
    }
1656
1657
    /**
1658
     * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1659
     *
1660
     * Gets a specific external variable by name and optionally filters it
1661
     *
1662
     * @see http://php.net/manual/en/function.filter-input.php
1663
     *
1664
     * @param int    $type          <p>
1665
     *                              One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1666
     *                              <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1667
     *                              <b>INPUT_ENV</b>.
1668
     *                              </p>
1669
     * @param string $variable_name <p>
1670
     *                              Name of a variable to get.
1671
     *                              </p>
1672
     * @param int    $filter        [optional] <p>
1673
     *                              The ID of the filter to apply. The
1674
     *                              manual page lists the available filters.
1675
     *                              </p>
1676
     * @param mixed  $options       [optional] <p>
1677
     *                              Associative array of options or bitwise disjunction of flags. If filter
1678
     *                              accepts options, flags can be provided in "flags" field of array.
1679
     *                              </p>
1680
     *
1681
     * @return mixed Value of the requested variable on success, <b>FALSE</b> if the filter fails, or <b>NULL</b> if the
1682
     *               <i>variable_name</i> variable is not set. If the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it
1683
     *               returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter fails.
1684
     */
1685
    public static function filter_input(
1686
        int $type,
1687
        string $variable_name,
1688
        int $filter = \FILTER_DEFAULT,
1689
        $options = null
1690
    ) {
1691
        if (\func_num_args() < 4) {
1692
            $var = \filter_input($type, $variable_name, $filter);
1693
        } else {
1694
            $var = \filter_input($type, $variable_name, $filter, $options);
1695
        }
1696
1697
        return self::filter($var);
1698
    }
1699
1700
    /**
1701
     * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1702
     *
1703
     * Gets external variables and optionally filters them
1704
     *
1705
     * @see http://php.net/manual/en/function.filter-input-array.php
1706
     *
1707
     * @param int   $type       <p>
1708
     *                          One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1709
     *                          <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1710
     *                          <b>INPUT_ENV</b>.
1711
     *                          </p>
1712
     * @param mixed $definition [optional] <p>
1713
     *                          An array defining the arguments. A valid key is a string
1714
     *                          containing a variable name and a valid value is either a filter type, or an array
1715
     *                          optionally specifying the filter, flags and options. If the value is an
1716
     *                          array, valid keys are filter which specifies the
1717
     *                          filter type,
1718
     *                          flags which specifies any flags that apply to the
1719
     *                          filter, and options which specifies any options that
1720
     *                          apply to the filter. See the example below for a better understanding.
1721
     *                          </p>
1722
     *                          <p>
1723
     *                          This parameter can be also an integer holding a filter constant. Then all values in the
1724
     *                          input array are filtered by this filter.
1725
     *                          </p>
1726
     * @param bool  $add_empty  [optional] <p>
1727
     *                          Add missing keys as <b>NULL</b> to the return value.
1728
     *                          </p>
1729
     *
1730
     * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b> on failure.
1731
     *               An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not
1732
     *               set. Or if the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it returns <b>FALSE</b> if the variable
1733
     *               is not set and <b>NULL</b> if the filter fails.
1734
     */
1735
    public static function filter_input_array(
1736
        int $type,
1737
        $definition = null,
1738
        bool $add_empty = true
1739
    ) {
1740
        if (\func_num_args() < 2) {
1741
            $a = \filter_input_array($type);
1742
        } else {
1743
            $a = \filter_input_array($type, $definition, $add_empty);
1744
        }
1745
1746
        return self::filter($a);
1747
    }
1748
1749
    /**
1750
     * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1751
     *
1752
     * Filters a variable with a specified filter
1753
     *
1754
     * @see http://php.net/manual/en/function.filter-var.php
1755
     *
1756
     * @param mixed $variable <p>
1757
     *                        Value to filter.
1758
     *                        </p>
1759
     * @param int   $filter   [optional] <p>
1760
     *                        The ID of the filter to apply. The
1761
     *                        manual page lists the available filters.
1762
     *                        </p>
1763
     * @param mixed $options  [optional] <p>
1764
     *                        Associative array of options or bitwise disjunction of flags. If filter
1765
     *                        accepts options, flags can be provided in "flags" field of array. For
1766
     *                        the "callback" filter, callable type should be passed. The
1767
     *                        callback must accept one argument, the value to be filtered, and return
1768
     *                        the value after filtering/sanitizing it.
1769
     *                        </p>
1770
     *                        <p>
1771
     *                        <code>
1772
     *                        // for filters that accept options, use this format
1773
     *                        $options = array(
1774
     *                        'options' => array(
1775
     *                        'default' => 3, // value to return if the filter fails
1776
     *                        // other options here
1777
     *                        'min_range' => 0
1778
     *                        ),
1779
     *                        'flags' => FILTER_FLAG_ALLOW_OCTAL,
1780
     *                        );
1781
     *                        $var = filter_var('0755', FILTER_VALIDATE_INT, $options);
1782
     *                        // for filter that only accept flags, you can pass them directly
1783
     *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);
1784
     *                        // for filter that only accept flags, you can also pass as an array
1785
     *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN,
1786
     *                        array('flags' => FILTER_NULL_ON_FAILURE));
1787
     *                        // callback validate filter
1788
     *                        function foo($value)
1789
     *                        {
1790
     *                        // Expected format: Surname, GivenNames
1791
     *                        if (strpos($value, ", ") === false) return false;
1792
     *                        list($surname, $givennames) = explode(", ", $value, 2);
1793
     *                        $empty = (empty($surname) || empty($givennames));
1794
     *                        $notstrings = (!is_string($surname) || !is_string($givennames));
1795
     *                        if ($empty || $notstrings) {
1796
     *                        return false;
1797
     *                        } else {
1798
     *                        return $value;
1799
     *                        }
1800
     *                        }
1801
     *                        $var = filter_var('Doe, Jane Sue', FILTER_CALLBACK, array('options' => 'foo'));
1802
     *                        </code>
1803
     *                        </p>
1804
     *
1805
     * @return mixed the filtered data, or <b>FALSE</b> if the filter fails
1806
     */
1807 2
    public static function filter_var(
1808
        $variable,
1809
        int $filter = \FILTER_DEFAULT,
1810
        $options = null
1811
    ) {
1812 2
        if (\func_num_args() < 3) {
1813 2
            $variable = \filter_var($variable, $filter);
1814
        } else {
1815 2
            $variable = \filter_var($variable, $filter, $options);
1816
        }
1817
1818 2
        return self::filter($variable);
1819
    }
1820
1821
    /**
1822
     * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1823
     *
1824
     * Gets multiple variables and optionally filters them
1825
     *
1826
     * @see http://php.net/manual/en/function.filter-var-array.php
1827
     *
1828
     * @param array $data       <p>
1829
     *                          An array with string keys containing the data to filter.
1830
     *                          </p>
1831
     * @param mixed $definition [optional] <p>
1832
     *                          An array defining the arguments. A valid key is a string
1833
     *                          containing a variable name and a valid value is either a
1834
     *                          filter type, or an
1835
     *                          array optionally specifying the filter, flags and options.
1836
     *                          If the value is an array, valid keys are filter
1837
     *                          which specifies the filter type,
1838
     *                          flags which specifies any flags that apply to the
1839
     *                          filter, and options which specifies any options that
1840
     *                          apply to the filter. See the example below for a better understanding.
1841
     *                          </p>
1842
     *                          <p>
1843
     *                          This parameter can be also an integer holding a filter constant. Then all values in the
1844
     *                          input array are filtered by this filter.
1845
     *                          </p>
1846
     * @param bool  $add_empty  [optional] <p>
1847
     *                          Add missing keys as <b>NULL</b> to the return value.
1848
     *                          </p>
1849
     *
1850
     * @return mixed an array containing the values of the requested variables on success, or <b>FALSE</b> on failure.
1851
     *               An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not
1852
     *               set
1853
     */
1854 2
    public static function filter_var_array(
1855
        array $data,
1856
        $definition = null,
1857
        bool $add_empty = true
1858
    ) {
1859 2
        if (\func_num_args() < 2) {
1860 2
            $a = \filter_var_array($data);
1861
        } else {
1862 2
            $a = \filter_var_array($data, $definition, $add_empty);
1863
        }
1864
1865 2
        return self::filter($a);
1866
    }
1867
1868
    /**
1869
     * Checks whether finfo is available on the server.
1870
     *
1871
     * @return bool
1872
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
1873
     */
1874
    public static function finfo_loaded(): bool
1875
    {
1876
        return \class_exists('finfo');
1877
    }
1878
1879
    /**
1880
     * Returns the first $n characters of the string.
1881
     *
1882
     * @param string $str      <p>The input string.</p>
1883
     * @param int    $n        <p>Number of characters to retrieve from the start.</p>
1884
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
1885
     *
1886
     * @return string
1887
     */
1888 13
    public static function first_char(
1889
        string $str,
1890
        int $n = 1,
1891
        string $encoding = 'UTF-8'
1892
    ): string {
1893 13
        if ($str === '' || $n <= 0) {
1894 5
            return '';
1895
        }
1896
1897 8
        if ($encoding === 'UTF-8') {
1898 4
            return (string) \mb_substr($str, 0, $n);
1899
        }
1900
1901 4
        return (string) self::substr($str, 0, $n, $encoding);
1902
    }
1903
1904
    /**
1905
     * Check if the number of Unicode characters isn't greater than the specified integer.
1906
     *
1907
     * @param string $str      the original string to be checked
1908
     * @param int    $box_size the size in number of chars to be checked against string
1909
     *
1910
     * @return bool true if string is less than or equal to $box_size, false otherwise
1911
     */
1912 2
    public static function fits_inside(string $str, int $box_size): bool
1913
    {
1914 2
        return (int) self::strlen($str) <= $box_size;
1915
    }
1916
1917
    /**
1918
     * Try to fix simple broken UTF-8 strings.
1919
     *
1920
     * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
1921
     *
1922
     * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
1923
     * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
1924
     * See: http://en.wikipedia.org/wiki/Windows-1252
1925
     *
1926
     * @param string $str <p>The input string</p>
1927
     *
1928
     * @return string
1929
     */
1930 46
    public static function fix_simple_utf8(string $str): string
1931
    {
1932 46
        if ($str === '') {
1933 4
            return '';
1934
        }
1935
1936 46
        static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null;
1937 46
        static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null;
1938
1939 46
        if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) {
1940 1
            if (self::$BROKEN_UTF8_FIX === null) {
1941 1
                self::$BROKEN_UTF8_FIX = self::getData('utf8_fix');
1942
            }
1943
1944 1
            $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = \array_keys(self::$BROKEN_UTF8_FIX);
1945 1
            $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = \array_values(self::$BROKEN_UTF8_FIX);
1946
        }
1947
1948 46
        return \str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str);
1949
    }
1950
1951
    /**
1952
     * Fix a double (or multiple) encoded UTF8 string.
1953
     *
1954
     * @param string|string[] $str you can use a string or an array of strings
1955
     *
1956
     * @return string|string[]
1957
     *                         Will return the fixed input-"array" or
1958
     *                         the fixed input-"string"
1959
     *
1960
     * @psalm-suppress InvalidReturnType
1961
     */
1962 2
    public static function fix_utf8($str)
1963
    {
1964 2
        if (\is_array($str) === true) {
1965 2
            foreach ($str as $k => &$v) {
1966 2
                $v = self::fix_utf8($v);
1967
            }
1968 2
            unset($v);
1969
1970
            /**
1971
             * @psalm-suppress InvalidReturnStatement
1972
             */
1973 2
            return $str;
1974
        }
1975
1976 2
        $str = (string) $str;
1977 2
        $last = '';
1978 2
        while ($last !== $str) {
1979 2
            $last = $str;
1980
            /**
1981
             * @psalm-suppress PossiblyInvalidArgument
1982
             */
1983 2
            $str = self::to_utf8(
1984 2
                self::utf8_decode($str, true)
1985
            );
1986
        }
1987
1988
        /**
1989
         * @psalm-suppress InvalidReturnStatement
1990
         */
1991 2
        return $str;
1992
    }
1993
1994
    /**
1995
     * Get character of a specific character.
1996
     *
1997
     * @param string $char
1998
     *
1999
     * @return string 'RTL' or 'LTR'
2000
     */
2001 2
    public static function getCharDirection(string $char): string
2002
    {
2003 2
        if (self::$SUPPORT['intlChar'] === true) {
2004
            /** @noinspection PhpComposerExtensionStubsInspection */
2005 2
            $tmp_return = \IntlChar::charDirection($char);
2006
2007
            // from "IntlChar"-Class
2008
            $char_direction = [
2009 2
                'RTL' => [1, 13, 14, 15, 21],
2010
                'LTR' => [0, 11, 12, 20],
2011
            ];
2012
2013 2
            if (\in_array($tmp_return, $char_direction['LTR'], true)) {
2014
                return 'LTR';
2015
            }
2016
2017 2
            if (\in_array($tmp_return, $char_direction['RTL'], true)) {
2018 2
                return 'RTL';
2019
            }
2020
        }
2021
2022 2
        $c = static::chr_to_decimal($char);
2023
2024 2
        if (!($c >= 0x5be && $c <= 0x10b7f)) {
2025 2
            return 'LTR';
2026
        }
2027
2028 2
        if ($c <= 0x85e) {
2029 2
            if ($c === 0x5be ||
2030 2
                $c === 0x5c0 ||
2031 2
                $c === 0x5c3 ||
2032 2
                $c === 0x5c6 ||
2033 2
                ($c >= 0x5d0 && $c <= 0x5ea) ||
2034 2
                ($c >= 0x5f0 && $c <= 0x5f4) ||
2035 2
                $c === 0x608 ||
2036 2
                $c === 0x60b ||
2037 2
                $c === 0x60d ||
2038 2
                $c === 0x61b ||
2039 2
                ($c >= 0x61e && $c <= 0x64a) ||
2040
                ($c >= 0x66d && $c <= 0x66f) ||
2041
                ($c >= 0x671 && $c <= 0x6d5) ||
2042
                ($c >= 0x6e5 && $c <= 0x6e6) ||
2043
                ($c >= 0x6ee && $c <= 0x6ef) ||
2044
                ($c >= 0x6fa && $c <= 0x70d) ||
2045
                $c === 0x710 ||
2046
                ($c >= 0x712 && $c <= 0x72f) ||
2047
                ($c >= 0x74d && $c <= 0x7a5) ||
2048
                $c === 0x7b1 ||
2049
                ($c >= 0x7c0 && $c <= 0x7ea) ||
2050
                ($c >= 0x7f4 && $c <= 0x7f5) ||
2051
                $c === 0x7fa ||
2052
                ($c >= 0x800 && $c <= 0x815) ||
2053
                $c === 0x81a ||
2054
                $c === 0x824 ||
2055
                $c === 0x828 ||
2056
                ($c >= 0x830 && $c <= 0x83e) ||
2057
                ($c >= 0x840 && $c <= 0x858) ||
2058 2
                $c === 0x85e
2059
            ) {
2060 2
                return 'RTL';
2061
            }
2062 2
        } elseif ($c === 0x200f) {
2063
            return 'RTL';
2064 2
        } elseif ($c >= 0xfb1d) {
2065 2
            if ($c === 0xfb1d ||
2066 2
                ($c >= 0xfb1f && $c <= 0xfb28) ||
2067 2
                ($c >= 0xfb2a && $c <= 0xfb36) ||
2068 2
                ($c >= 0xfb38 && $c <= 0xfb3c) ||
2069 2
                $c === 0xfb3e ||
2070 2
                ($c >= 0xfb40 && $c <= 0xfb41) ||
2071 2
                ($c >= 0xfb43 && $c <= 0xfb44) ||
2072 2
                ($c >= 0xfb46 && $c <= 0xfbc1) ||
2073 2
                ($c >= 0xfbd3 && $c <= 0xfd3d) ||
2074 2
                ($c >= 0xfd50 && $c <= 0xfd8f) ||
2075 2
                ($c >= 0xfd92 && $c <= 0xfdc7) ||
2076 2
                ($c >= 0xfdf0 && $c <= 0xfdfc) ||
2077 2
                ($c >= 0xfe70 && $c <= 0xfe74) ||
2078 2
                ($c >= 0xfe76 && $c <= 0xfefc) ||
2079 2
                ($c >= 0x10800 && $c <= 0x10805) ||
2080 2
                $c === 0x10808 ||
2081 2
                ($c >= 0x1080a && $c <= 0x10835) ||
2082 2
                ($c >= 0x10837 && $c <= 0x10838) ||
2083 2
                $c === 0x1083c ||
2084 2
                ($c >= 0x1083f && $c <= 0x10855) ||
2085 2
                ($c >= 0x10857 && $c <= 0x1085f) ||
2086 2
                ($c >= 0x10900 && $c <= 0x1091b) ||
2087 2
                ($c >= 0x10920 && $c <= 0x10939) ||
2088 2
                $c === 0x1093f ||
2089 2
                $c === 0x10a00 ||
2090 2
                ($c >= 0x10a10 && $c <= 0x10a13) ||
2091 2
                ($c >= 0x10a15 && $c <= 0x10a17) ||
2092 2
                ($c >= 0x10a19 && $c <= 0x10a33) ||
2093 2
                ($c >= 0x10a40 && $c <= 0x10a47) ||
2094 2
                ($c >= 0x10a50 && $c <= 0x10a58) ||
2095 2
                ($c >= 0x10a60 && $c <= 0x10a7f) ||
2096 2
                ($c >= 0x10b00 && $c <= 0x10b35) ||
2097 2
                ($c >= 0x10b40 && $c <= 0x10b55) ||
2098 2
                ($c >= 0x10b58 && $c <= 0x10b72) ||
2099 2
                ($c >= 0x10b78 && $c <= 0x10b7f)
2100
            ) {
2101 2
                return 'RTL';
2102
            }
2103
        }
2104
2105 2
        return 'LTR';
2106
    }
2107
2108
    /**
2109
     * Check for php-support.
2110
     *
2111
     * @param string|null $key
2112
     *
2113
     * @return mixed
2114
     *               Return the full support-"array", if $key === null<br>
2115
     *               return bool-value, if $key is used and available<br>
2116
     *               otherwise return <strong>null</strong>
2117
     */
2118 27
    public static function getSupportInfo(string $key = null)
2119
    {
2120 27
        if ($key === null) {
2121 4
            return self::$SUPPORT;
2122
        }
2123
2124 25
        if (self::$INTL_TRANSLITERATOR_LIST === null) {
2125 1
            self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
2126
        }
2127
        // compatibility fix for old versions
2128 25
        self::$SUPPORT['intl__transliterator_list_ids'] = self::$INTL_TRANSLITERATOR_LIST;
2129
2130 25
        return self::$SUPPORT[$key] ?? null;
2131
    }
2132
2133
    /**
2134
     * Warning: this method only works for some file-types (png, jpg)
2135
     *          if you need more supported types, please use e.g. "finfo"
2136
     *
2137
     * @param string $str
2138
     * @param array  $fallback with this keys: 'ext', 'mime', 'type'
2139
     *
2140
     * @return array
2141
     *               with this keys: 'ext', 'mime', 'type'
2142
     */
2143 39
    public static function get_file_type(
2144
        string $str,
2145
        array $fallback = [
2146
            'ext'  => null,
2147
            'mime' => 'application/octet-stream',
2148
            'type' => null,
2149
        ]
2150
    ): array {
2151 39
        if ($str === '') {
2152
            return $fallback;
2153
        }
2154
2155
        /** @var false|string $str_info - needed for PhpStan (stubs error) */
2156 39
        $str_info = \substr($str, 0, 2);
2157 39
        if ($str_info === false || \strlen($str_info) !== 2) {
2158 11
            return $fallback;
2159
        }
2160
2161
        // DEBUG
2162
        //var_dump($str_info);
2163
2164
        /** @var array|false $str_info - needed for PhpStan (stubs error) */
2165 35
        $str_info = \unpack('C2chars', $str_info);
0 ignored issues
show
Bug introduced by
$str_info of type array|false is incompatible with the type string expected by parameter $data of unpack(). ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

2165
        $str_info = \unpack('C2chars', /** @scrutinizer ignore-type */ $str_info);
Loading history...
2166 35
        if ($str_info === false) {
2167
            return $fallback;
2168
        }
2169
        /** @noinspection OffsetOperationsInspection */
2170 35
        $type_code = (int) ($str_info['chars1'] . $str_info['chars2']);
2171
2172
        // DEBUG
2173
        //var_dump($type_code);
2174
2175
        //
2176
        // info: https://en.wikipedia.org/wiki/Magic_number_%28programming%29#Format_indicator
2177
        //
2178
        switch ($type_code) {
2179
            // WARNING: do not add too simple comparisons, because of false-positive results:
2180
            //
2181
            // 3780 => 'pdf', 7790 => 'exe', 7784 => 'midi', 8075 => 'zip',
2182
            // 8297 => 'rar', 7173 => 'gif', 7373 => 'tiff' 6677 => 'bmp', ...
2183
            //
2184 35
            case 255216:
2185
                $ext = 'jpg';
2186
                $mime = 'image/jpeg';
2187
                $type = 'binary';
2188
2189
                break;
2190 35
            case 13780:
2191 7
                $ext = 'png';
2192 7
                $mime = 'image/png';
2193 7
                $type = 'binary';
2194
2195 7
                break;
2196
            default:
2197 34
                return $fallback;
2198
        }
2199
2200
        return [
2201 7
            'ext'  => $ext,
2202 7
            'mime' => $mime,
2203 7
            'type' => $type,
2204
        ];
2205
    }
2206
2207
    /**
2208
     * @param int    $length         <p>Length of the random string.</p>
2209
     * @param string $possible_chars [optional] <p>Characters string for the random selection.</p>
2210
     * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
2211
     *
2212
     * @return string
2213
     */
2214 1
    public static function get_random_string(
2215
        int $length,
2216
        string $possible_chars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789',
2217
        string $encoding = 'UTF-8'
2218
    ): string {
2219
        // init
2220 1
        $i = 0;
2221 1
        $str = '';
2222
2223
        //
2224
        // add random chars
2225
        //
2226
2227 1
        if ($encoding === 'UTF-8') {
2228 1
            $max_length = (int) \mb_strlen($possible_chars);
2229 1
            if ($max_length === 0) {
2230 1
                return '';
2231
            }
2232
2233 1
            while ($i < $length) {
2234
                try {
2235 1
                    $rand_int = \random_int(0, $max_length - 1);
2236
                } catch (\Exception $e) {
2237
                    /** @noinspection RandomApiMigrationInspection */
2238
                    $rand_int = \mt_rand(0, $max_length - 1);
2239
                }
2240 1
                $char = \mb_substr($possible_chars, $rand_int, 1);
2241 1
                if ($char !== false) {
2242 1
                    $str .= $char;
2243 1
                    ++$i;
2244
                }
2245
            }
2246
        } else {
2247
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2248
2249
            $max_length = (int) self::strlen($possible_chars, $encoding);
2250
            if ($max_length === 0) {
2251
                return '';
2252
            }
2253
2254
            while ($i < $length) {
2255
                try {
2256
                    $rand_int = \random_int(0, $max_length - 1);
2257
                } catch (\Exception $e) {
2258
                    /** @noinspection RandomApiMigrationInspection */
2259
                    $rand_int = \mt_rand(0, $max_length - 1);
2260
                }
2261
                $char = self::substr($possible_chars, $rand_int, 1, $encoding);
2262
                if ($char !== false) {
2263
                    $str .= $char;
2264
                    ++$i;
2265
                }
2266
            }
2267
        }
2268
2269 1
        return $str;
2270
    }
2271
2272
    /**
2273
     * @param int|string $entropy_extra [optional] <p>Extra entropy via a string or int value.</p>
2274
     * @param bool       $use_md5       [optional] <p>Return the unique identifier as md5-hash? Default: true</p>
2275
     *
2276
     * @return string
2277
     */
2278 1
    public static function get_unique_string($entropy_extra = '', bool $use_md5 = true): string
2279
    {
2280 1
        $unique_helper = \random_int(0, \mt_getrandmax()) .
2281 1
                        \session_id() .
2282 1
                        ($_SERVER['REMOTE_ADDR'] ?? '') .
2283 1
                        ($_SERVER['SERVER_ADDR'] ?? '') .
2284 1
                        $entropy_extra;
2285
2286 1
        $unique_string = \uniqid($unique_helper, true);
2287
2288 1
        if ($use_md5) {
2289 1
            $unique_string = \md5($unique_string . $unique_helper);
2290
        }
2291
2292 1
        return $unique_string;
2293
    }
2294
2295
    /**
2296
     * alias for "UTF8::string_has_bom()"
2297
     *
2298
     * @param string $str
2299
     *
2300
     * @return bool
2301
     *
2302
     * @see UTF8::string_has_bom()
2303
     * @deprecated <p>please use "UTF8::string_has_bom()"</p>
2304
     */
2305 2
    public static function hasBom(string $str): bool
2306
    {
2307 2
        return self::string_has_bom($str);
2308
    }
2309
2310
    /**
2311
     * Returns true if the string contains a lower case char, false otherwise.
2312
     *
2313
     * @param string $str <p>The input string.</p>
2314
     *
2315
     * @return bool whether or not the string contains a lower case character
2316
     */
2317 47
    public static function has_lowercase(string $str): bool
2318
    {
2319 47
        if (self::$SUPPORT['mbstring'] === true) {
2320
            /** @noinspection PhpComposerExtensionStubsInspection */
2321 47
            return \mb_ereg_match('.*[[:lower:]]', $str);
2322
        }
2323
2324
        return self::str_matches_pattern($str, '.*[[:lower:]]');
2325
    }
2326
2327
    /**
2328
     * Returns true if the string contains an upper case char, false otherwise.
2329
     *
2330
     * @param string $str <p>The input string.</p>
2331
     *
2332
     * @return bool whether or not the string contains an upper case character
2333
     */
2334 12
    public static function has_uppercase(string $str): bool
2335
    {
2336 12
        if (self::$SUPPORT['mbstring'] === true) {
2337
            /** @noinspection PhpComposerExtensionStubsInspection */
2338 12
            return \mb_ereg_match('.*[[:upper:]]', $str);
2339
        }
2340
2341
        return self::str_matches_pattern($str, '.*[[:upper:]]');
2342
    }
2343
2344
    /**
2345
     * Converts a hexadecimal value into a UTF-8 character.
2346
     *
2347
     * @param string $hexdec <p>The hexadecimal value.</p>
2348
     *
2349
     * @return false|string one single UTF-8 character
2350
     */
2351 4
    public static function hex_to_chr(string $hexdec)
2352
    {
2353 4
        return self::decimal_to_chr(\hexdec($hexdec));
2354
    }
2355
2356
    /**
2357
     * Converts hexadecimal U+xxxx code point representation to integer.
2358
     *
2359
     * INFO: opposite to UTF8::int_to_hex()
2360
     *
2361
     * @param string $hexdec <p>The hexadecimal code point representation.</p>
2362
     *
2363
     * @return false|int the code point, or false on failure
2364
     */
2365 2
    public static function hex_to_int($hexdec)
2366
    {
2367
        // init
2368 2
        $hexdec = (string) $hexdec;
2369
2370 2
        if ($hexdec === '') {
2371 2
            return false;
2372
        }
2373
2374 2
        if (\preg_match('/^(?:\\\u|U\+|)([a-zA-Z0-9]{4,6})$/', $hexdec, $match)) {
2375 2
            return \intval($match[1], 16);
2376
        }
2377
2378 2
        return false;
2379
    }
2380
2381
    /**
2382
     * alias for "UTF8::html_entity_decode()"
2383
     *
2384
     * @param string $str
2385
     * @param int    $flags
2386
     * @param string $encoding
2387
     *
2388
     * @return string
2389
     *
2390
     * @see UTF8::html_entity_decode()
2391
     * @deprecated <p>please use "UTF8::html_entity_decode()"</p>
2392
     */
2393 2
    public static function html_decode(
2394
        string $str,
2395
        int $flags = null,
2396
        string $encoding = 'UTF-8'
2397
    ): string {
2398 2
        return self::html_entity_decode($str, $flags, $encoding);
2399
    }
2400
2401
    /**
2402
     * Converts a UTF-8 string to a series of HTML numbered entities.
2403
     *
2404
     * INFO: opposite to UTF8::html_decode()
2405
     *
2406
     * @param string $str              <p>The Unicode string to be encoded as numbered entities.</p>
2407
     * @param bool   $keep_ascii_chars [optional] <p>Keep ASCII chars.</p>
2408
     * @param string $encoding         [optional] <p>Set the charset for e.g. "mb_" function</p>
2409
     *
2410
     * @return string HTML numbered entities
2411
     */
2412 14
    public static function html_encode(
2413
        string $str,
2414
        bool $keep_ascii_chars = false,
2415
        string $encoding = 'UTF-8'
2416
    ): string {
2417 14
        if ($str === '') {
2418 4
            return '';
2419
        }
2420
2421 14
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2422 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2423
        }
2424
2425
        // INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2426 14
        if (self::$SUPPORT['mbstring'] === true) {
2427 14
            $start_code = 0x00;
2428 14
            if ($keep_ascii_chars === true) {
2429 13
                $start_code = 0x80;
2430
            }
2431
2432 14
            if ($encoding === 'UTF-8') {
2433
                /** @var false|string|null $return - needed for PhpStan (stubs error) */
2434 14
                $return = \mb_encode_numericentity(
2435 14
                    $str,
2436 14
                    [$start_code, 0xfffff, 0, 0xfffff, 0]
2437
                );
2438 14
                if ($return !== null && $return !== false) {
2439 14
                    return $return;
2440
                }
2441
            }
2442
2443
            /** @var false|string|null $return - needed for PhpStan (stubs error) */
2444 4
            $return = \mb_encode_numericentity(
2445 4
                $str,
2446 4
                [$start_code, 0xfffff, 0, 0xfffff, 0],
2447 4
                $encoding
2448
            );
2449 4
            if ($return !== null && $return !== false) {
2450 4
                return $return;
2451
            }
2452
        }
2453
2454
        //
2455
        // fallback via vanilla php
2456
        //
2457
2458
        return \implode(
2459
            '',
2460
            \array_map(
2461
                static function (string $chr) use ($keep_ascii_chars, $encoding): string {
2462
                    return self::single_chr_html_encode($chr, $keep_ascii_chars, $encoding);
2463
                },
2464
                self::str_split($str)
2465
            )
2466
        );
2467
    }
2468
2469
    /**
2470
     * UTF-8 version of html_entity_decode()
2471
     *
2472
     * The reason we are not using html_entity_decode() by itself is because
2473
     * while it is not technically correct to leave out the semicolon
2474
     * at the end of an entity most browsers will still interpret the entity
2475
     * correctly. html_entity_decode() does not convert entities without
2476
     * semicolons, so we are left with our own little solution here. Bummer.
2477
     *
2478
     * Convert all HTML entities to their applicable characters
2479
     *
2480
     * INFO: opposite to UTF8::html_encode()
2481
     *
2482
     * @see http://php.net/manual/en/function.html-entity-decode.php
2483
     *
2484
     * @param string $str      <p>
2485
     *                         The input string.
2486
     *                         </p>
2487
     * @param int    $flags    [optional] <p>
2488
     *                         A bitmask of one or more of the following flags, which specify how to handle quotes
2489
     *                         and which document type to use. The default is ENT_COMPAT | ENT_HTML401.
2490
     *                         <table>
2491
     *                         Available <i>flags</i> constants
2492
     *                         <tr valign="top">
2493
     *                         <td>Constant Name</td>
2494
     *                         <td>Description</td>
2495
     *                         </tr>
2496
     *                         <tr valign="top">
2497
     *                         <td><b>ENT_COMPAT</b></td>
2498
     *                         <td>Will convert double-quotes and leave single-quotes alone.</td>
2499
     *                         </tr>
2500
     *                         <tr valign="top">
2501
     *                         <td><b>ENT_QUOTES</b></td>
2502
     *                         <td>Will convert both double and single quotes.</td>
2503
     *                         </tr>
2504
     *                         <tr valign="top">
2505
     *                         <td><b>ENT_NOQUOTES</b></td>
2506
     *                         <td>Will leave both double and single quotes unconverted.</td>
2507
     *                         </tr>
2508
     *                         <tr valign="top">
2509
     *                         <td><b>ENT_HTML401</b></td>
2510
     *                         <td>
2511
     *                         Handle code as HTML 4.01.
2512
     *                         </td>
2513
     *                         </tr>
2514
     *                         <tr valign="top">
2515
     *                         <td><b>ENT_XML1</b></td>
2516
     *                         <td>
2517
     *                         Handle code as XML 1.
2518
     *                         </td>
2519
     *                         </tr>
2520
     *                         <tr valign="top">
2521
     *                         <td><b>ENT_XHTML</b></td>
2522
     *                         <td>
2523
     *                         Handle code as XHTML.
2524
     *                         </td>
2525
     *                         </tr>
2526
     *                         <tr valign="top">
2527
     *                         <td><b>ENT_HTML5</b></td>
2528
     *                         <td>
2529
     *                         Handle code as HTML 5.
2530
     *                         </td>
2531
     *                         </tr>
2532
     *                         </table>
2533
     *                         </p>
2534
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
2535
     *
2536
     * @return string the decoded string
2537
     */
2538 46
    public static function html_entity_decode(
2539
        string $str,
2540
        int $flags = null,
2541
        string $encoding = 'UTF-8'
2542
    ): string {
2543
        if (
2544 46
            !isset($str[3]) // examples: &; || &x;
2545
            ||
2546 46
            \strpos($str, '&') === false // no "&"
2547
        ) {
2548 23
            return $str;
2549
        }
2550
2551 44
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2552 9
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2553
        }
2554
2555 44
        if ($flags === null) {
2556 10
            $flags = \ENT_QUOTES | \ENT_HTML5;
2557
        }
2558
2559
        if (
2560 44
            $encoding !== 'UTF-8'
2561
            &&
2562 44
            $encoding !== 'ISO-8859-1'
2563
            &&
2564 44
            $encoding !== 'WINDOWS-1252'
2565
            &&
2566 44
            self::$SUPPORT['mbstring'] === false
2567
        ) {
2568
            \trigger_error('UTF8::html_entity_decode() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
2569
        }
2570
2571
        do {
2572 44
            $str_compare = $str;
2573
2574 44
            if (\strpos($str, '&') !== false) {
2575
2576 44
                if (\strpos($str, '&#') !== false) {
2577
                    // decode also numeric & UTF16 two byte entities
2578 36
                    $str = (string) \preg_replace(
2579 36
                        '/(&#(?:x0*[0-9a-fA-F]{2,6}(?![0-9a-fA-F;])|(?:0*\d{2,6}(?![0-9;]))))/S',
2580 36
                        '$1;',
2581 36
                        $str
2582
                    );
2583
                }
2584
2585 44
                $str = \html_entity_decode(
2586 44
                    $str,
2587 44
                    $flags,
2588 44
                    $encoding
2589
                );
2590
2591
            }
2592 44
        } while ($str_compare !== $str);
2593
2594 44
        return $str;
2595
    }
2596
2597
    /**
2598
     * Create a escape html version of the string via "UTF8::htmlspecialchars()".
2599
     *
2600
     * @param string $str
2601
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
2602
     *
2603
     * @return string
2604
     */
2605 6
    public static function html_escape(string $str, string $encoding = 'UTF-8'): string
2606
    {
2607 6
        return self::htmlspecialchars(
2608 6
            $str,
2609 6
            \ENT_QUOTES | \ENT_SUBSTITUTE,
2610 6
            $encoding
2611
        );
2612
    }
2613
2614
    /**
2615
     * Remove empty html-tag.
2616
     *
2617
     * e.g.: <tag></tag>
2618
     *
2619
     * @param string $str
2620
     *
2621
     * @return string
2622
     */
2623 1
    public static function html_stripe_empty_tags(string $str): string
2624
    {
2625 1
        return (string) \preg_replace(
2626 1
            '/<[^\\/>]*?>\\s*?<\\/[^>]*?>/u',
2627 1
            '',
2628 1
            $str
2629
        );
2630
    }
2631
2632
    /**
2633
     * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities()
2634
     *
2635
     * @see http://php.net/manual/en/function.htmlentities.php
2636
     *
2637
     * @param string $str           <p>
2638
     *                              The input string.
2639
     *                              </p>
2640
     * @param int    $flags         [optional] <p>
2641
     *                              A bitmask of one or more of the following flags, which specify how to handle
2642
     *                              quotes, invalid code unit sequences and the used document type. The default is
2643
     *                              ENT_COMPAT | ENT_HTML401.
2644
     *                              <table>
2645
     *                              Available <i>flags</i> constants
2646
     *                              <tr valign="top">
2647
     *                              <td>Constant Name</td>
2648
     *                              <td>Description</td>
2649
     *                              </tr>
2650
     *                              <tr valign="top">
2651
     *                              <td><b>ENT_COMPAT</b></td>
2652
     *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2653
     *                              </tr>
2654
     *                              <tr valign="top">
2655
     *                              <td><b>ENT_QUOTES</b></td>
2656
     *                              <td>Will convert both double and single quotes.</td>
2657
     *                              </tr>
2658
     *                              <tr valign="top">
2659
     *                              <td><b>ENT_NOQUOTES</b></td>
2660
     *                              <td>Will leave both double and single quotes unconverted.</td>
2661
     *                              </tr>
2662
     *                              <tr valign="top">
2663
     *                              <td><b>ENT_IGNORE</b></td>
2664
     *                              <td>
2665
     *                              Silently discard invalid code unit sequences instead of returning
2666
     *                              an empty string. Using this flag is discouraged as it
2667
     *                              may have security implications.
2668
     *                              </td>
2669
     *                              </tr>
2670
     *                              <tr valign="top">
2671
     *                              <td><b>ENT_SUBSTITUTE</b></td>
2672
     *                              <td>
2673
     *                              Replace invalid code unit sequences with a Unicode Replacement Character
2674
     *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty
2675
     *                              string.
2676
     *                              </td>
2677
     *                              </tr>
2678
     *                              <tr valign="top">
2679
     *                              <td><b>ENT_DISALLOWED</b></td>
2680
     *                              <td>
2681
     *                              Replace invalid code points for the given document type with a
2682
     *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2683
     *                              (otherwise) instead of leaving them as is. This may be useful, for
2684
     *                              instance, to ensure the well-formedness of XML documents with
2685
     *                              embedded external content.
2686
     *                              </td>
2687
     *                              </tr>
2688
     *                              <tr valign="top">
2689
     *                              <td><b>ENT_HTML401</b></td>
2690
     *                              <td>
2691
     *                              Handle code as HTML 4.01.
2692
     *                              </td>
2693
     *                              </tr>
2694
     *                              <tr valign="top">
2695
     *                              <td><b>ENT_XML1</b></td>
2696
     *                              <td>
2697
     *                              Handle code as XML 1.
2698
     *                              </td>
2699
     *                              </tr>
2700
     *                              <tr valign="top">
2701
     *                              <td><b>ENT_XHTML</b></td>
2702
     *                              <td>
2703
     *                              Handle code as XHTML.
2704
     *                              </td>
2705
     *                              </tr>
2706
     *                              <tr valign="top">
2707
     *                              <td><b>ENT_HTML5</b></td>
2708
     *                              <td>
2709
     *                              Handle code as HTML 5.
2710
     *                              </td>
2711
     *                              </tr>
2712
     *                              </table>
2713
     *                              </p>
2714
     * @param string $encoding      [optional] <p>
2715
     *                              Like <b>htmlspecialchars</b>,
2716
     *                              <b>htmlentities</b> takes an optional third argument
2717
     *                              <i>encoding</i> which defines encoding used in
2718
     *                              conversion.
2719
     *                              Although this argument is technically optional, you are highly
2720
     *                              encouraged to specify the correct value for your code.
2721
     *                              </p>
2722
     * @param bool   $double_encode [optional] <p>
2723
     *                              When <i>double_encode</i> is turned off PHP will not
2724
     *                              encode existing html entities. The default is to convert everything.
2725
     *                              </p>
2726
     *
2727
     * @return string
2728
     *                <p>
2729
     *                The encoded string.
2730
     *                <br><br>
2731
     *                If the input <i>string</i> contains an invalid code unit
2732
     *                sequence within the given <i>encoding</i> an empty string
2733
     *                will be returned, unless either the <b>ENT_IGNORE</b> or
2734
     *                <b>ENT_SUBSTITUTE</b> flags are set.
2735
     *                </p>
2736
     */
2737 9
    public static function htmlentities(
2738
        string $str,
2739
        int $flags = \ENT_COMPAT,
2740
        string $encoding = 'UTF-8',
2741
        bool $double_encode = true
2742
    ): string {
2743 9
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2744 7
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2745
        }
2746
2747 9
        $str = \htmlentities(
2748 9
            $str,
2749 9
            $flags,
2750 9
            $encoding,
2751 9
            $double_encode
2752
        );
2753
2754
        /**
2755
         * PHP doesn't replace a backslash to its html entity since this is something
2756
         * that's mostly used to escape characters when inserting in a database. Since
2757
         * we're using a decent database layer, we don't need this shit and we're replacing
2758
         * the double backslashes by its' html entity equivalent.
2759
         *
2760
         * https://github.com/forkcms/library/blob/master/spoon/filter/filter.php#L303
2761
         */
2762 9
        $str = \str_replace('\\', '&#92;', $str);
2763
2764 9
        return self::html_encode($str, true, $encoding);
2765
    }
2766
2767
    /**
2768
     * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
2769
     *
2770
     * INFO: Take a look at "UTF8::htmlentities()"
2771
     *
2772
     * @see http://php.net/manual/en/function.htmlspecialchars.php
2773
     *
2774
     * @param string $str           <p>
2775
     *                              The string being converted.
2776
     *                              </p>
2777
     * @param int    $flags         [optional] <p>
2778
     *                              A bitmask of one or more of the following flags, which specify how to handle
2779
     *                              quotes, invalid code unit sequences and the used document type. The default is
2780
     *                              ENT_COMPAT | ENT_HTML401.
2781
     *                              <table>
2782
     *                              Available <i>flags</i> constants
2783
     *                              <tr valign="top">
2784
     *                              <td>Constant Name</td>
2785
     *                              <td>Description</td>
2786
     *                              </tr>
2787
     *                              <tr valign="top">
2788
     *                              <td><b>ENT_COMPAT</b></td>
2789
     *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2790
     *                              </tr>
2791
     *                              <tr valign="top">
2792
     *                              <td><b>ENT_QUOTES</b></td>
2793
     *                              <td>Will convert both double and single quotes.</td>
2794
     *                              </tr>
2795
     *                              <tr valign="top">
2796
     *                              <td><b>ENT_NOQUOTES</b></td>
2797
     *                              <td>Will leave both double and single quotes unconverted.</td>
2798
     *                              </tr>
2799
     *                              <tr valign="top">
2800
     *                              <td><b>ENT_IGNORE</b></td>
2801
     *                              <td>
2802
     *                              Silently discard invalid code unit sequences instead of returning
2803
     *                              an empty string. Using this flag is discouraged as it
2804
     *                              may have security implications.
2805
     *                              </td>
2806
     *                              </tr>
2807
     *                              <tr valign="top">
2808
     *                              <td><b>ENT_SUBSTITUTE</b></td>
2809
     *                              <td>
2810
     *                              Replace invalid code unit sequences with a Unicode Replacement Character
2811
     *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty
2812
     *                              string.
2813
     *                              </td>
2814
     *                              </tr>
2815
     *                              <tr valign="top">
2816
     *                              <td><b>ENT_DISALLOWED</b></td>
2817
     *                              <td>
2818
     *                              Replace invalid code points for the given document type with a
2819
     *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2820
     *                              (otherwise) instead of leaving them as is. This may be useful, for
2821
     *                              instance, to ensure the well-formedness of XML documents with
2822
     *                              embedded external content.
2823
     *                              </td>
2824
     *                              </tr>
2825
     *                              <tr valign="top">
2826
     *                              <td><b>ENT_HTML401</b></td>
2827
     *                              <td>
2828
     *                              Handle code as HTML 4.01.
2829
     *                              </td>
2830
     *                              </tr>
2831
     *                              <tr valign="top">
2832
     *                              <td><b>ENT_XML1</b></td>
2833
     *                              <td>
2834
     *                              Handle code as XML 1.
2835
     *                              </td>
2836
     *                              </tr>
2837
     *                              <tr valign="top">
2838
     *                              <td><b>ENT_XHTML</b></td>
2839
     *                              <td>
2840
     *                              Handle code as XHTML.
2841
     *                              </td>
2842
     *                              </tr>
2843
     *                              <tr valign="top">
2844
     *                              <td><b>ENT_HTML5</b></td>
2845
     *                              <td>
2846
     *                              Handle code as HTML 5.
2847
     *                              </td>
2848
     *                              </tr>
2849
     *                              </table>
2850
     *                              </p>
2851
     * @param string $encoding      [optional] <p>
2852
     *                              Defines encoding used in conversion.
2853
     *                              </p>
2854
     *                              <p>
2855
     *                              For the purposes of this function, the encodings
2856
     *                              ISO-8859-1, ISO-8859-15,
2857
     *                              UTF-8, cp866,
2858
     *                              cp1251, cp1252, and
2859
     *                              KOI8-R are effectively equivalent, provided the
2860
     *                              <i>string</i> itself is valid for the encoding, as
2861
     *                              the characters affected by <b>htmlspecialchars</b> occupy
2862
     *                              the same positions in all of these encodings.
2863
     *                              </p>
2864
     * @param bool   $double_encode [optional] <p>
2865
     *                              When <i>double_encode</i> is turned off PHP will not
2866
     *                              encode existing html entities, the default is to convert everything.
2867
     *                              </p>
2868
     *
2869
     * @return string the converted string.
2870
     *                </p>
2871
     *                <p>
2872
     *                If the input <i>string</i> contains an invalid code unit
2873
     *                sequence within the given <i>encoding</i> an empty string
2874
     *                will be returned, unless either the <b>ENT_IGNORE</b> or
2875
     *                <b>ENT_SUBSTITUTE</b> flags are set
2876
     */
2877 8
    public static function htmlspecialchars(
2878
        string $str,
2879
        int $flags = \ENT_COMPAT,
2880
        string $encoding = 'UTF-8',
2881
        bool $double_encode = true
2882
    ): string {
2883 8
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2884 8
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2885
        }
2886
2887 8
        return \htmlspecialchars(
2888 8
            $str,
2889 8
            $flags,
2890 8
            $encoding,
2891 8
            $double_encode
2892
        );
2893
    }
2894
2895
    /**
2896
     * Checks whether iconv is available on the server.
2897
     *
2898
     * @return bool
2899
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
2900
     */
2901
    public static function iconv_loaded(): bool
2902
    {
2903
        return \extension_loaded('iconv');
2904
    }
2905
2906
    /**
2907
     * alias for "UTF8::decimal_to_chr()"
2908
     *
2909
     * @param mixed $int
2910
     *
2911
     * @return string
2912
     *
2913
     * @see UTF8::decimal_to_chr()
2914
     * @deprecated <p>please use "UTF8::decimal_to_chr()"</p>
2915
     */
2916 4
    public static function int_to_chr($int): string
2917
    {
2918 4
        return self::decimal_to_chr($int);
2919
    }
2920
2921
    /**
2922
     * Converts Integer to hexadecimal U+xxxx code point representation.
2923
     *
2924
     * INFO: opposite to UTF8::hex_to_int()
2925
     *
2926
     * @param int    $int    <p>The integer to be converted to hexadecimal code point.</p>
2927
     * @param string $prefix [optional]
2928
     *
2929
     * @return string the code point, or empty string on failure
2930
     */
2931 6
    public static function int_to_hex(int $int, string $prefix = 'U+'): string
2932
    {
2933 6
        $hex = \dechex($int);
2934
2935 6
        $hex = (\strlen($hex) < 4 ? \substr('0000' . $hex, -4) : $hex);
2936
2937 6
        return $prefix . $hex . '';
2938
    }
2939
2940
    /**
2941
     * Checks whether intl-char is available on the server.
2942
     *
2943
     * @return bool
2944
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
2945
     */
2946
    public static function intlChar_loaded(): bool
2947
    {
2948
        return \class_exists('IntlChar');
2949
    }
2950
2951
    /**
2952
     * Checks whether intl is available on the server.
2953
     *
2954
     * @return bool
2955
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
2956
     */
2957 5
    public static function intl_loaded(): bool
2958
    {
2959 5
        return \extension_loaded('intl');
2960
    }
2961
2962
    /**
2963
     * alias for "UTF8::is_ascii()"
2964
     *
2965
     * @param string $str
2966
     *
2967
     * @return bool
2968
     *
2969
     * @see UTF8::is_ascii()
2970
     * @deprecated <p>please use "UTF8::is_ascii()"</p>
2971
     */
2972 2
    public static function isAscii(string $str): bool
2973
    {
2974 2
        return ASCII::is_ascii($str);
2975
    }
2976
2977
    /**
2978
     * alias for "UTF8::is_base64()"
2979
     *
2980
     * @param string $str
2981
     *
2982
     * @return bool
2983
     *
2984
     * @see UTF8::is_base64()
2985
     * @deprecated <p>please use "UTF8::is_base64()"</p>
2986
     */
2987 2
    public static function isBase64($str): bool
2988
    {
2989 2
        return self::is_base64($str);
2990
    }
2991
2992
    /**
2993
     * alias for "UTF8::is_binary()"
2994
     *
2995
     * @param mixed $str
2996
     * @param bool  $strict
2997
     *
2998
     * @return bool
2999
     *
3000
     * @see UTF8::is_binary()
3001
     * @deprecated <p>please use "UTF8::is_binary()"</p>
3002
     */
3003 4
    public static function isBinary($str, $strict = false): bool
3004
    {
3005 4
        return self::is_binary($str, $strict);
3006
    }
3007
3008
    /**
3009
     * alias for "UTF8::is_bom()"
3010
     *
3011
     * @param string $utf8_chr
3012
     *
3013
     * @return bool
3014
     *
3015
     * @see UTF8::is_bom()
3016
     * @deprecated <p>please use "UTF8::is_bom()"</p>
3017
     */
3018 2
    public static function isBom(string $utf8_chr): bool
3019
    {
3020 2
        return self::is_bom($utf8_chr);
3021
    }
3022
3023
    /**
3024
     * alias for "UTF8::is_html()"
3025
     *
3026
     * @param string $str
3027
     *
3028
     * @return bool
3029
     *
3030
     * @see UTF8::is_html()
3031
     * @deprecated <p>please use "UTF8::is_html()"</p>
3032
     */
3033 2
    public static function isHtml(string $str): bool
3034
    {
3035 2
        return self::is_html($str);
3036
    }
3037
3038
    /**
3039
     * alias for "UTF8::is_json()"
3040
     *
3041
     * @param string $str
3042
     *
3043
     * @return bool
3044
     *
3045
     * @see UTF8::is_json()
3046
     * @deprecated <p>please use "UTF8::is_json()"</p>
3047
     */
3048
    public static function isJson(string $str): bool
3049
    {
3050
        return self::is_json($str);
3051
    }
3052
3053
    /**
3054
     * alias for "UTF8::is_utf16()"
3055
     *
3056
     * @param mixed $str
3057
     *
3058
     * @return false|int
3059
     *                   <strong>false</strong> if is't not UTF16,<br>
3060
     *                   <strong>1</strong> for UTF-16LE,<br>
3061
     *                   <strong>2</strong> for UTF-16BE
3062
     *
3063
     * @see UTF8::is_utf16()
3064
     * @deprecated <p>please use "UTF8::is_utf16()"</p>
3065
     */
3066 2
    public static function isUtf16($str)
3067
    {
3068 2
        return self::is_utf16($str);
3069
    }
3070
3071
    /**
3072
     * alias for "UTF8::is_utf32()"
3073
     *
3074
     * @param mixed $str
3075
     *
3076
     * @return false|int
3077
     *                   <strong>false</strong> if is't not UTF16,
3078
     *                   <strong>1</strong> for UTF-32LE,
3079
     *                   <strong>2</strong> for UTF-32BE
3080
     *
3081
     * @see UTF8::is_utf32()
3082
     * @deprecated <p>please use "UTF8::is_utf32()"</p>
3083
     */
3084 2
    public static function isUtf32($str)
3085
    {
3086 2
        return self::is_utf32($str);
3087
    }
3088
3089
    /**
3090
     * alias for "UTF8::is_utf8()"
3091
     *
3092
     * @param string $str
3093
     * @param bool   $strict
3094
     *
3095
     * @return bool
3096
     *
3097
     * @see UTF8::is_utf8()
3098
     * @deprecated <p>please use "UTF8::is_utf8()"</p>
3099
     */
3100 17
    public static function isUtf8($str, $strict = false): bool
3101
    {
3102 17
        return self::is_utf8($str, $strict);
3103
    }
3104
3105
    /**
3106
     * Returns true if the string contains only alphabetic chars, false otherwise.
3107
     *
3108
     * @param string $str
3109
     *
3110
     * @return bool
3111
     *              Whether or not $str contains only alphabetic chars
3112
     */
3113 10
    public static function is_alpha(string $str): bool
3114
    {
3115 10
        if (self::$SUPPORT['mbstring'] === true) {
3116
            /** @noinspection PhpComposerExtensionStubsInspection */
3117 10
            return \mb_ereg_match('^[[:alpha:]]*$', $str);
3118
        }
3119
3120
        return self::str_matches_pattern($str, '^[[:alpha:]]*$');
3121
    }
3122
3123
    /**
3124
     * Returns true if the string contains only alphabetic and numeric chars, false otherwise.
3125
     *
3126
     * @param string $str
3127
     *
3128
     * @return bool
3129
     *              Whether or not $str contains only alphanumeric chars
3130
     */
3131 13
    public static function is_alphanumeric(string $str): bool
3132
    {
3133 13
        if (self::$SUPPORT['mbstring'] === true) {
3134
            /** @noinspection PhpComposerExtensionStubsInspection */
3135 13
            return \mb_ereg_match('^[[:alnum:]]*$', $str);
3136
        }
3137
3138
        return self::str_matches_pattern($str, '^[[:alnum:]]*$');
3139
    }
3140
3141
    /**
3142
     * Checks if a string is 7 bit ASCII.
3143
     *
3144
     * @param string $str <p>The string to check.</p>
3145
     *
3146
     * @return bool
3147
     *              <strong>true</strong> if it is ASCII<br>
3148
     *              <strong>false</strong> otherwise
3149
     */
3150 8
    public static function is_ascii(string $str): bool
3151
    {
3152 8
        return ASCII::is_ascii($str);
3153
    }
3154
3155
    /**
3156
     * Returns true if the string is base64 encoded, false otherwise.
3157
     *
3158
     * @param mixed|string $str                   <p>The input string.</p>
3159
     * @param bool         $empty_string_is_valid [optional] <p>Is an empty string valid base64 or not?</p>
3160
     *
3161
     * @return bool whether or not $str is base64 encoded
3162
     */
3163 16
    public static function is_base64($str, $empty_string_is_valid = false): bool
3164
    {
3165
        if (
3166 16
            $empty_string_is_valid === false
3167
            &&
3168 16
            $str === ''
3169
        ) {
3170 3
            return false;
3171
        }
3172
3173
        /**
3174
         * @psalm-suppress RedundantConditionGivenDocblockType
3175
         */
3176 15
        if (\is_string($str) === false) {
3177 2
            return false;
3178
        }
3179
3180 15
        $base64String = \base64_decode($str, true);
3181
3182 15
        return $base64String !== false && \base64_encode($base64String) === $str;
3183
    }
3184
3185
    /**
3186
     * Check if the input is binary... (is look like a hack).
3187
     *
3188
     * @param mixed $input
3189
     * @param bool  $strict
3190
     *
3191
     * @return bool
3192
     */
3193 39
    public static function is_binary($input, bool $strict = false): bool
3194
    {
3195 39
        $input = (string) $input;
3196 39
        if ($input === '') {
3197 10
            return false;
3198
        }
3199
3200 39
        if (\preg_match('~^[01]+$~', $input)) {
3201 13
            return true;
3202
        }
3203
3204 39
        $ext = self::get_file_type($input);
3205 39
        if ($ext['type'] === 'binary') {
3206 7
            return true;
3207
        }
3208
3209 38
        $test_length = \strlen($input);
3210 38
        $test_null_counting = \substr_count($input, "\x0", 0, $test_length);
3211 38
        if (($test_null_counting / $test_length) > 0.25) {
3212 15
            return true;
3213
        }
3214
3215 34
        if ($strict === true) {
3216 34
            if (self::$SUPPORT['finfo'] === false) {
3217
                throw new \RuntimeException('ext-fileinfo: is not installed');
3218
            }
3219
3220
            /** @noinspection PhpComposerExtensionStubsInspection */
3221 34
            $finfo_encoding = (new \finfo(\FILEINFO_MIME_ENCODING))->buffer($input);
3222 34
            if ($finfo_encoding && $finfo_encoding === 'binary') {
3223 15
                return true;
3224
            }
3225
        }
3226
3227 30
        return false;
3228
    }
3229
3230
    /**
3231
     * Check if the file is binary.
3232
     *
3233
     * @param string $file
3234
     *
3235
     * @return bool
3236
     */
3237 6
    public static function is_binary_file($file): bool
3238
    {
3239
        // init
3240 6
        $block = '';
3241
3242 6
        $fp = \fopen($file, 'rb');
3243 6
        if (\is_resource($fp)) {
3244 6
            $block = \fread($fp, 512);
3245 6
            \fclose($fp);
3246
        }
3247
3248 6
        if ($block === '') {
3249 2
            return false;
3250
        }
3251
3252 6
        return self::is_binary($block, true);
3253
    }
3254
3255
    /**
3256
     * Returns true if the string contains only whitespace chars, false otherwise.
3257
     *
3258
     * @param string $str
3259
     *
3260
     * @return bool
3261
     *              Whether or not $str contains only whitespace characters
3262
     */
3263 15
    public static function is_blank(string $str): bool
3264
    {
3265 15
        if (self::$SUPPORT['mbstring'] === true) {
3266
            /** @noinspection PhpComposerExtensionStubsInspection */
3267 15
            return \mb_ereg_match('^[[:space:]]*$', $str);
3268
        }
3269
3270
        return self::str_matches_pattern($str, '^[[:space:]]*$');
3271
    }
3272
3273
    /**
3274
     * Checks if the given string is equal to any "Byte Order Mark".
3275
     *
3276
     * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
3277
     *
3278
     * @param string $str <p>The input string.</p>
3279
     *
3280
     * @return bool
3281
     *              <strong>true</strong> if the $utf8_chr is Byte Order Mark, <strong>false</strong> otherwise
3282
     */
3283 2
    public static function is_bom($str): bool
3284
    {
3285
        /** @noinspection PhpUnusedLocalVariableInspection */
3286 2
        foreach (self::$BOM as $bom_string => &$bom_byte_length) {
3287 2
            if ($str === $bom_string) {
3288 2
                return true;
3289
            }
3290
        }
3291
3292 2
        return false;
3293
    }
3294
3295
    /**
3296
     * Determine whether the string is considered to be empty.
3297
     *
3298
     * A variable is considered empty if it does not exist or if its value equals FALSE.
3299
     * empty() does not generate a warning if the variable does not exist.
3300
     *
3301
     * @param mixed $str
3302
     *
3303
     * @return bool whether or not $str is empty()
3304
     */
3305
    public static function is_empty($str): bool
3306
    {
3307
        return empty($str);
3308
    }
3309
3310
    /**
3311
     * Returns true if the string contains only hexadecimal chars, false otherwise.
3312
     *
3313
     * @param string $str
3314
     *
3315
     * @return bool
3316
     *              Whether or not $str contains only hexadecimal chars
3317
     */
3318 13
    public static function is_hexadecimal(string $str): bool
3319
    {
3320 13
        if (self::$SUPPORT['mbstring'] === true) {
3321
            /** @noinspection PhpComposerExtensionStubsInspection */
3322 13
            return \mb_ereg_match('^[[:xdigit:]]*$', $str);
3323
        }
3324
3325
        return self::str_matches_pattern($str, '^[[:xdigit:]]*$');
3326
    }
3327
3328
    /**
3329
     * Check if the string contains any HTML tags.
3330
     *
3331
     * @param string $str <p>The input string.</p>
3332
     *
3333
     * @return bool
3334
     */
3335 3
    public static function is_html(string $str): bool
3336
    {
3337 3
        if ($str === '') {
3338 3
            return false;
3339
        }
3340
3341
        // init
3342 3
        $matches = [];
3343
3344 3
        $str = self::emoji_encode($str); // hack for emoji support :/
3345
3346 3
        \preg_match("/<\\/?\\w+(?:(?:\\s+\\w+(?:\\s*=\\s*(?:\".*?\"|'.*?'|[^'\">\\s]+))?)*\\s*|\\s*)\\/?>/u", $str, $matches);
3347
3348 3
        return $matches !== [];
3349
    }
3350
3351
    /**
3352
     * Try to check if "$str" is a JSON-string.
3353
     *
3354
     * @param string $str                                    <p>The input string.</p>
3355
     * @param bool   $only_array_or_object_results_are_valid [optional] <p>Only array and objects are valid json results.</p>
3356
     *
3357
     * @return bool
3358
     */
3359 42
    public static function is_json(
3360
        string $str,
3361
        $only_array_or_object_results_are_valid = true
3362
    ): bool {
3363 42
        if ($str === '') {
3364 4
            return false;
3365
        }
3366
3367 40
        if (self::$SUPPORT['json'] === false) {
3368
            throw new \RuntimeException('ext-json: is not installed');
3369
        }
3370
3371 40
        $json = self::json_decode($str);
3372 40
        if ($json === null && \strtoupper($str) !== 'NULL') {
3373 18
            return false;
3374
        }
3375
3376
        if (
3377 24
            $only_array_or_object_results_are_valid === true
3378
            &&
3379 24
            \is_object($json) === false
3380
            &&
3381 24
            \is_array($json) === false
3382
        ) {
3383 5
            return false;
3384
        }
3385
3386
        /** @noinspection PhpComposerExtensionStubsInspection */
3387 19
        return \json_last_error() === \JSON_ERROR_NONE;
3388
    }
3389
3390
    /**
3391
     * @param string $str
3392
     *
3393
     * @return bool
3394
     */
3395 8
    public static function is_lowercase(string $str): bool
3396
    {
3397 8
        if (self::$SUPPORT['mbstring'] === true) {
3398
            /** @noinspection PhpComposerExtensionStubsInspection */
3399 8
            return \mb_ereg_match('^[[:lower:]]*$', $str);
3400
        }
3401
3402
        return self::str_matches_pattern($str, '^[[:lower:]]*$');
3403
    }
3404
3405
    /**
3406
     * Returns true if the string is serialized, false otherwise.
3407
     *
3408
     * @param string $str
3409
     *
3410
     * @return bool whether or not $str is serialized
3411
     */
3412 7
    public static function is_serialized(string $str): bool
3413
    {
3414 7
        if ($str === '') {
3415 1
            return false;
3416
        }
3417
3418
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
3419
        /** @noinspection UnserializeExploitsInspection */
3420 6
        return $str === 'b:0;'
3421
               ||
3422 6
               @\unserialize($str) !== false;
3423
    }
3424
3425
    /**
3426
     * Returns true if the string contains only lower case chars, false
3427
     * otherwise.
3428
     *
3429
     * @param string $str <p>The input string.</p>
3430
     *
3431
     * @return bool
3432
     *              <p>Whether or not $str contains only lower case characters.</p>
3433
     */
3434 8
    public static function is_uppercase(string $str): bool
3435
    {
3436 8
        if (self::$SUPPORT['mbstring'] === true) {
3437
            /** @noinspection PhpComposerExtensionStubsInspection */
3438 8
            return \mb_ereg_match('^[[:upper:]]*$', $str);
3439
        }
3440
3441
        return self::str_matches_pattern($str, '^[[:upper:]]*$');
3442
    }
3443
3444
    /**
3445
     * Check if the string is UTF-16.
3446
     *
3447
     * @param mixed $str                       <p>The input string.</p>
3448
     * @param bool  $check_if_string_is_binary
3449
     *
3450
     * @return false|int
3451
     *                   <strong>false</strong> if is't not UTF-16,<br>
3452
     *                   <strong>1</strong> for UTF-16LE,<br>
3453
     *                   <strong>2</strong> for UTF-16BE
3454
     */
3455 22
    public static function is_utf16($str, $check_if_string_is_binary = true)
3456
    {
3457
        // init
3458 22
        $str = (string) $str;
3459 22
        $str_chars = [];
3460
3461
        if (
3462 22
            $check_if_string_is_binary === true
3463
            &&
3464 22
            self::is_binary($str, true) === false
3465
        ) {
3466 2
            return false;
3467
        }
3468
3469 22
        if (self::$SUPPORT['mbstring'] === false) {
3470 3
            \trigger_error('UTF8::is_utf16() without mbstring may did not work correctly', \E_USER_WARNING);
3471
        }
3472
3473 22
        $str = self::remove_bom($str);
3474
3475 22
        $maybe_utf16le = 0;
3476 22
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
3477 22
        if ($test) {
3478 15
            $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
3479 15
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
3480 15
            if ($test3 === $test) {
3481 15
                if ($str_chars === []) {
3482 15
                    $str_chars = self::count_chars($str, true, false);
3483
                }
3484 15
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3485 15
                    if (\in_array($test3char, $str_chars, true) === true) {
3486 15
                        ++$maybe_utf16le;
3487
                    }
3488
                }
3489 15
                unset($test3charEmpty);
3490
            }
3491
        }
3492
3493 22
        $maybe_utf16be = 0;
3494 22
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
3495 22
        if ($test) {
3496 15
            $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
3497 15
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
3498 15
            if ($test3 === $test) {
3499 15
                if ($str_chars === []) {
3500 7
                    $str_chars = self::count_chars($str, true, false);
3501
                }
3502 15
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3503 15
                    if (\in_array($test3char, $str_chars, true) === true) {
3504 15
                        ++$maybe_utf16be;
3505
                    }
3506
                }
3507 15
                unset($test3charEmpty);
3508
            }
3509
        }
3510
3511 22
        if ($maybe_utf16be !== $maybe_utf16le) {
3512 7
            if ($maybe_utf16le > $maybe_utf16be) {
3513 5
                return 1;
3514
            }
3515
3516 6
            return 2;
3517
        }
3518
3519 18
        return false;
3520
    }
3521
3522
    /**
3523
     * Check if the string is UTF-32.
3524
     *
3525
     * @param mixed $str                       <p>The input string.</p>
3526
     * @param bool  $check_if_string_is_binary
3527
     *
3528
     * @return false|int
3529
     *                   <strong>false</strong> if is't not UTF-32,<br>
3530
     *                   <strong>1</strong> for UTF-32LE,<br>
3531
     *                   <strong>2</strong> for UTF-32BE
3532
     */
3533 20
    public static function is_utf32($str, $check_if_string_is_binary = true)
3534
    {
3535
        // init
3536 20
        $str = (string) $str;
3537 20
        $str_chars = [];
3538
3539
        if (
3540 20
            $check_if_string_is_binary === true
3541
            &&
3542 20
            self::is_binary($str, true) === false
3543
        ) {
3544 2
            return false;
3545
        }
3546
3547 20
        if (self::$SUPPORT['mbstring'] === false) {
3548 3
            \trigger_error('UTF8::is_utf32() without mbstring may did not work correctly', \E_USER_WARNING);
3549
        }
3550
3551 20
        $str = self::remove_bom($str);
3552
3553 20
        $maybe_utf32le = 0;
3554 20
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
3555 20
        if ($test) {
3556 13
            $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
3557 13
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
3558 13
            if ($test3 === $test) {
3559 13
                if ($str_chars === []) {
3560 13
                    $str_chars = self::count_chars($str, true, false);
3561
                }
3562 13
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3563 13
                    if (\in_array($test3char, $str_chars, true) === true) {
3564 13
                        ++$maybe_utf32le;
3565
                    }
3566
                }
3567 13
                unset($test3charEmpty);
3568
            }
3569
        }
3570
3571 20
        $maybe_utf32be = 0;
3572 20
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
3573 20
        if ($test) {
3574 13
            $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
3575 13
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
3576 13
            if ($test3 === $test) {
3577 13
                if ($str_chars === []) {
3578 7
                    $str_chars = self::count_chars($str, true, false);
3579
                }
3580 13
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3581 13
                    if (\in_array($test3char, $str_chars, true) === true) {
3582 13
                        ++$maybe_utf32be;
3583
                    }
3584
                }
3585 13
                unset($test3charEmpty);
3586
            }
3587
        }
3588
3589 20
        if ($maybe_utf32be !== $maybe_utf32le) {
3590 3
            if ($maybe_utf32le > $maybe_utf32be) {
3591 2
                return 1;
3592
            }
3593
3594 3
            return 2;
3595
        }
3596
3597 20
        return false;
3598
    }
3599
3600
    /**
3601
     * Checks whether the passed input contains only byte sequences that appear valid UTF-8.
3602
     *
3603
     * @param int|string|string[]|null $str    <p>The input to be checked.</p>
3604
     * @param bool                     $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
3605
     *
3606
     * @return bool
3607
     */
3608 82
    public static function is_utf8($str, bool $strict = false): bool
3609
    {
3610 82
        if (\is_array($str) === true) {
3611 2
            foreach ($str as &$v) {
3612 2
                if (self::is_utf8($v, $strict) === false) {
3613 2
                    return false;
3614
                }
3615
            }
3616
3617
            return true;
3618
        }
3619
3620 82
        return self::is_utf8_string((string) $str, $strict);
3621
    }
3622
3623
    /**
3624
     * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3625
     * Decodes a JSON string
3626
     *
3627
     * @see http://php.net/manual/en/function.json-decode.php
3628
     *
3629
     * @param string $json    <p>
3630
     *                        The <i>json</i> string being decoded.
3631
     *                        </p>
3632
     *                        <p>
3633
     *                        This function only works with UTF-8 encoded strings.
3634
     *                        </p>
3635
     *                        <p>PHP implements a superset of
3636
     *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3637
     *                        only supports these values when they are nested inside an array or an object.
3638
     *                        </p>
3639
     * @param bool   $assoc   [optional] <p>
3640
     *                        When <b>TRUE</b>, returned objects will be converted into
3641
     *                        associative arrays.
3642
     *                        </p>
3643
     * @param int    $depth   [optional] <p>
3644
     *                        User specified recursion depth.
3645
     *                        </p>
3646
     * @param int    $options [optional] <p>
3647
     *                        Bitmask of JSON decode options. Currently only
3648
     *                        <b>JSON_BIGINT_AS_STRING</b>
3649
     *                        is supported (default is to cast large integers as floats)
3650
     *                        </p>
3651
     *
3652
     * @return mixed
3653
     *               The value encoded in <i>json</i> in appropriate PHP type. Values true, false and
3654
     *               null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b> and <b>NULL</b> respectively.
3655
     *               <b>NULL</b> is returned if the <i>json</i> cannot be decoded or if the encoded data
3656
     *               is deeper than the recursion limit.
3657
     */
3658 43
    public static function json_decode(
3659
        string $json,
3660
        bool $assoc = false,
3661
        int $depth = 512,
3662
        int $options = 0
3663
    ) {
3664 43
        $json = self::filter($json);
3665
3666 43
        if (self::$SUPPORT['json'] === false) {
3667
            throw new \RuntimeException('ext-json: is not installed');
3668
        }
3669
3670
        /** @noinspection PhpComposerExtensionStubsInspection */
3671 43
        return \json_decode($json, $assoc, $depth, $options);
3672
    }
3673
3674
    /**
3675
     * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3676
     * Returns the JSON representation of a value.
3677
     *
3678
     * @see http://php.net/manual/en/function.json-encode.php
3679
     *
3680
     * @param mixed $value   <p>
3681
     *                       The <i>value</i> being encoded. Can be any type except
3682
     *                       a resource.
3683
     *                       </p>
3684
     *                       <p>
3685
     *                       All string data must be UTF-8 encoded.
3686
     *                       </p>
3687
     *                       <p>PHP implements a superset of
3688
     *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3689
     *                       only supports these values when they are nested inside an array or an object.
3690
     *                       </p>
3691
     * @param int   $options [optional] <p>
3692
     *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
3693
     *                       <b>JSON_HEX_TAG</b>,
3694
     *                       <b>JSON_HEX_AMP</b>,
3695
     *                       <b>JSON_HEX_APOS</b>,
3696
     *                       <b>JSON_NUMERIC_CHECK</b>,
3697
     *                       <b>JSON_PRETTY_PRINT</b>,
3698
     *                       <b>JSON_UNESCAPED_SLASHES</b>,
3699
     *                       <b>JSON_FORCE_OBJECT</b>,
3700
     *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
3701
     *                       constants is described on
3702
     *                       the JSON constants page.
3703
     *                       </p>
3704
     * @param int   $depth   [optional] <p>
3705
     *                       Set the maximum depth. Must be greater than zero.
3706
     *                       </p>
3707
     *
3708
     * @return false|string
3709
     *                      A JSON encoded <strong>string</strong> on success or<br>
3710
     *                      <strong>FALSE</strong> on failure
3711
     */
3712 5
    public static function json_encode($value, int $options = 0, int $depth = 512)
3713
    {
3714 5
        $value = self::filter($value);
3715
3716 5
        if (self::$SUPPORT['json'] === false) {
3717
            throw new \RuntimeException('ext-json: is not installed');
3718
        }
3719
3720
        /** @noinspection PhpComposerExtensionStubsInspection */
3721 5
        return \json_encode($value, $options, $depth);
3722
    }
3723
3724
    /**
3725
     * Checks whether JSON is available on the server.
3726
     *
3727
     * @return bool
3728
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
3729
     */
3730
    public static function json_loaded(): bool
3731
    {
3732
        return \function_exists('json_decode');
3733
    }
3734
3735
    /**
3736
     * Makes string's first char lowercase.
3737
     *
3738
     * @param string      $str                           <p>The input string</p>
3739
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
3740
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
3741
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt, tr</p>
3742
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
3743
     *
3744
     * @return string the resulting string
3745
     */
3746 46
    public static function lcfirst(
3747
        string $str,
3748
        string $encoding = 'UTF-8',
3749
        bool $clean_utf8 = false,
3750
        string $lang = null,
3751
        bool $try_to_keep_the_string_length = false
3752
    ): string {
3753 46
        if ($clean_utf8 === true) {
3754
            $str = self::clean($str);
3755
        }
3756
3757 46
        $use_mb_functions = ($lang === null && $try_to_keep_the_string_length === false);
3758
3759 46
        if ($encoding === 'UTF-8') {
3760 43
            $str_part_two = (string) \mb_substr($str, 1);
3761
3762 43
            if ($use_mb_functions === true) {
3763 43
                $str_part_one = \mb_strtolower(
3764 43
                    (string) \mb_substr($str, 0, 1)
3765
                );
3766
            } else {
3767
                $str_part_one = self::strtolower(
3768
                    (string) \mb_substr($str, 0, 1),
3769
                    $encoding,
3770
                    false,
3771
                    $lang,
3772 43
                    $try_to_keep_the_string_length
3773
                );
3774
            }
3775
        } else {
3776 3
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
3777
3778 3
            $str_part_two = (string) self::substr($str, 1, null, $encoding);
3779
3780 3
            $str_part_one = self::strtolower(
3781 3
                (string) self::substr($str, 0, 1, $encoding),
3782 3
                $encoding,
3783 3
                false,
3784 3
                $lang,
3785 3
                $try_to_keep_the_string_length
3786
            );
3787
        }
3788
3789 46
        return $str_part_one . $str_part_two;
3790
    }
3791
3792
    /**
3793
     * alias for "UTF8::lcfirst()"
3794
     *
3795
     * @param string      $str
3796
     * @param string      $encoding
3797
     * @param bool        $clean_utf8
3798
     * @param string|null $lang
3799
     * @param bool        $try_to_keep_the_string_length
3800
     *
3801
     * @return string
3802
     *
3803
     * @see UTF8::lcfirst()
3804
     * @deprecated <p>please use "UTF8::lcfirst()"</p>
3805
     */
3806 2
    public static function lcword(
3807
        string $str,
3808
        string $encoding = 'UTF-8',
3809
        bool $clean_utf8 = false,
3810
        string $lang = null,
3811
        bool $try_to_keep_the_string_length = false
3812
    ): string {
3813 2
        return self::lcfirst(
3814 2
            $str,
3815 2
            $encoding,
3816 2
            $clean_utf8,
3817 2
            $lang,
3818 2
            $try_to_keep_the_string_length
3819
        );
3820
    }
3821
3822
    /**
3823
     * Lowercase for all words in the string.
3824
     *
3825
     * @param string      $str                           <p>The input string.</p>
3826
     * @param string[]    $exceptions                    [optional] <p>Exclusion for some words.</p>
3827
     * @param string      $char_list                     [optional] <p>Additional chars that contains to words and do not start
3828
     *                                                   a new word.</p>
3829
     * @param string      $encoding                      [optional] <p>Set the charset.</p>
3830
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
3831
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt, tr</p>
3832
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
3833
     *
3834
     * @return string
3835
     */
3836 2
    public static function lcwords(
3837
        string $str,
3838
        array $exceptions = [],
3839
        string $char_list = '',
3840
        string $encoding = 'UTF-8',
3841
        bool $clean_utf8 = false,
3842
        string $lang = null,
3843
        bool $try_to_keep_the_string_length = false
3844
    ): string {
3845 2
        if (!$str) {
3846 2
            return '';
3847
        }
3848
3849 2
        $words = self::str_to_words($str, $char_list);
3850 2
        $use_exceptions = $exceptions !== [];
3851
3852 2
        foreach ($words as &$word) {
3853 2
            if (!$word) {
3854 2
                continue;
3855
            }
3856
3857
            if (
3858 2
                $use_exceptions === false
3859
                ||
3860 2
                !\in_array($word, $exceptions, true)
3861
            ) {
3862 2
                $word = self::lcfirst($word, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
3863
            }
3864
        }
3865
3866 2
        return \implode('', $words);
3867
    }
3868
3869
    /**
3870
     * alias for "UTF8::lcfirst()"
3871
     *
3872
     * @param string      $str
3873
     * @param string      $encoding
3874
     * @param bool        $clean_utf8
3875
     * @param string|null $lang
3876
     * @param bool        $try_to_keep_the_string_length
3877
     *
3878
     * @return string
3879
     *
3880
     * @see UTF8::lcfirst()
3881
     * @deprecated <p>please use "UTF8::lcfirst()"</p>
3882
     */
3883 5
    public static function lowerCaseFirst(
3884
        string $str,
3885
        string $encoding = 'UTF-8',
3886
        bool $clean_utf8 = false,
3887
        string $lang = null,
3888
        bool $try_to_keep_the_string_length = false
3889
    ): string {
3890 5
        return self::lcfirst(
3891 5
            $str,
3892 5
            $encoding,
3893 5
            $clean_utf8,
3894 5
            $lang,
3895 5
            $try_to_keep_the_string_length
3896
        );
3897
    }
3898
3899
    /**
3900
     * Strip whitespace or other characters from the beginning of a UTF-8 string.
3901
     *
3902
     * @param string      $str   <p>The string to be trimmed</p>
3903
     * @param string|null $chars <p>Optional characters to be stripped</p>
3904
     *
3905
     * @return string the string with unwanted characters stripped from the left
3906
     */
3907 22
    public static function ltrim(string $str = '', string $chars = null): string
3908
    {
3909 22
        if ($str === '') {
3910 3
            return '';
3911
        }
3912
3913 21
        if (self::$SUPPORT['mbstring'] === true) {
3914 21
            if ($chars) {
3915
                /** @noinspection PregQuoteUsageInspection */
3916 10
                $chars = \preg_quote($chars);
3917 10
                $pattern = "^[${chars}]+";
3918
            } else {
3919 14
                $pattern = '^[\\s]+';
3920
            }
3921
3922
            /** @noinspection PhpComposerExtensionStubsInspection */
3923 21
            return (string) \mb_ereg_replace($pattern, '', $str);
3924
        }
3925
3926
        if ($chars) {
3927
            $chars = \preg_quote($chars, '/');
3928
            $pattern = "^[${chars}]+";
3929
        } else {
3930
            $pattern = '^[\\s]+';
3931
        }
3932
3933
        return self::regex_replace($str, $pattern, '', '', '/');
3934
    }
3935
3936
    /**
3937
     * Returns the UTF-8 character with the maximum code point in the given data.
3938
     *
3939
     * @param array<string>|string $arg <p>A UTF-8 encoded string or an array of such strings.</p>
3940
     *
3941
     * @return string|null the character with the highest code point than others, returns null on failure or empty input
3942
     */
3943
    public static function max($arg)
3944
    {
3945 2
        if (\is_array($arg) === true) {
3946 2
            $arg = \implode('', $arg);
3947
        }
3948
3949 2
        $codepoints = self::codepoints($arg, false);
3950 2
        if ($codepoints === []) {
3951 2
            return null;
3952
        }
3953
3954 2
        $codepoint_max = \max($codepoints);
3955
3956 2
        return self::chr($codepoint_max);
3957
    }
3958
3959
    /**
3960
     * Calculates and returns the maximum number of bytes taken by any
3961
     * UTF-8 encoded character in the given string.
3962
     *
3963
     * @param string $str <p>The original Unicode string.</p>
3964
     *
3965
     * @return int max byte lengths of the given chars
3966
     */
3967
    public static function max_chr_width(string $str): int
3968
    {
3969 2
        $bytes = self::chr_size_list($str);
3970 2
        if ($bytes !== []) {
3971 2
            return (int) \max($bytes);
3972
        }
3973
3974 2
        return 0;
3975
    }
3976
3977
    /**
3978
     * Checks whether mbstring is available on the server.
3979
     *
3980
     * @return bool
3981
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
3982
     */
3983
    public static function mbstring_loaded(): bool
3984
    {
3985 26
        return \extension_loaded('mbstring');
3986
    }
3987
3988
    /**
3989
     * Returns the UTF-8 character with the minimum code point in the given data.
3990
     *
3991
     * @param mixed $arg <strong>A UTF-8 encoded string or an array of such strings.</strong>
3992
     *
3993
     * @return string|null the character with the lowest code point than others, returns null on failure or empty input
3994
     */
3995
    public static function min($arg)
3996
    {
3997 2
        if (\is_array($arg) === true) {
3998 2
            $arg = \implode('', $arg);
3999
        }
4000
4001 2
        $codepoints = self::codepoints($arg, false);
4002 2
        if ($codepoints === []) {
4003 2
            return null;
4004
        }
4005
4006 2
        $codepoint_min = \min($codepoints);
4007
4008 2
        return self::chr($codepoint_min);
4009
    }
4010
4011
    /**
4012
     * alias for "UTF8::normalize_encoding()"
4013
     *
4014
     * @param mixed $encoding
4015
     * @param mixed $fallback
4016
     *
4017
     * @return mixed
4018
     *
4019
     * @see UTF8::normalize_encoding()
4020
     * @deprecated <p>please use "UTF8::normalize_encoding()"</p>
4021
     */
4022
    public static function normalizeEncoding($encoding, $fallback = '')
4023
    {
4024 2
        return self::normalize_encoding($encoding, $fallback);
4025
    }
4026
4027
    /**
4028
     * Normalize the encoding-"name" input.
4029
     *
4030
     * @param mixed $encoding <p>e.g.: ISO, UTF8, WINDOWS-1251 etc.</p>
4031
     * @param mixed $fallback <p>e.g.: UTF-8</p>
4032
     *
4033
     * @return mixed e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.<br>Will return a empty string as fallback (by default)
4034
     */
4035
    public static function normalize_encoding($encoding, $fallback = '')
4036
    {
4037 331
        static $STATIC_NORMALIZE_ENCODING_CACHE = [];
4038
4039
        // init
4040 331
        $encoding = (string) $encoding;
4041
4042 331
        if (!$encoding) {
4043 285
            return $fallback;
4044
        }
4045
4046
        if (
4047 51
            $encoding === 'UTF-8'
4048
            ||
4049 51
            $encoding === 'UTF8'
4050
        ) {
4051 28
            return 'UTF-8';
4052
        }
4053
4054
        if (
4055 43
            $encoding === '8BIT'
4056
            ||
4057 43
            $encoding === 'BINARY'
4058
        ) {
4059
            return 'CP850';
4060
        }
4061
4062
        if (
4063 43
            $encoding === 'HTML'
4064
            ||
4065 43
            $encoding === 'HTML-ENTITIES'
4066
        ) {
4067 2
            return 'HTML-ENTITIES';
4068
        }
4069
4070
        if (
4071 43
            $encoding === 'ISO'
4072
            ||
4073 43
            $encoding === 'ISO-8859-1'
4074
        ) {
4075 39
            return 'ISO-8859-1';
4076
        }
4077
4078
        if (
4079 12
            $encoding === '1' // only a fallback, for non "strict_types" usage ...
4080
            ||
4081 12
            $encoding === '0' // only a fallback, for non "strict_types" usage ...
4082
        ) {
4083 1
            return $fallback;
4084
        }
4085
4086 11
        if (isset($STATIC_NORMALIZE_ENCODING_CACHE[$encoding])) {
4087 8
            return $STATIC_NORMALIZE_ENCODING_CACHE[$encoding];
4088
        }
4089
4090 5
        if (self::$ENCODINGS === null) {
4091 1
            self::$ENCODINGS = self::getData('encodings');
4092
        }
4093
4094 5
        if (\in_array($encoding, self::$ENCODINGS, true)) {
4095 3
            $STATIC_NORMALIZE_ENCODING_CACHE[$encoding] = $encoding;
4096
4097 3
            return $encoding;
4098
        }
4099
4100 4
        $encoding_original = $encoding;
4101 4
        $encoding = \strtoupper($encoding);
4102 4
        $encoding_upper_helper = (string) \preg_replace('/[^a-zA-Z0-9]/u', '', $encoding);
4103
4104
        $equivalences = [
4105 4
            'ISO8859'     => 'ISO-8859-1',
4106
            'ISO88591'    => 'ISO-8859-1',
4107
            'ISO'         => 'ISO-8859-1',
4108
            'LATIN'       => 'ISO-8859-1',
4109
            'LATIN1'      => 'ISO-8859-1', // Western European
4110
            'ISO88592'    => 'ISO-8859-2',
4111
            'LATIN2'      => 'ISO-8859-2', // Central European
4112
            'ISO88593'    => 'ISO-8859-3',
4113
            'LATIN3'      => 'ISO-8859-3', // Southern European
4114
            'ISO88594'    => 'ISO-8859-4',
4115
            'LATIN4'      => 'ISO-8859-4', // Northern European
4116
            'ISO88595'    => 'ISO-8859-5',
4117
            'ISO88596'    => 'ISO-8859-6', // Greek
4118
            'ISO88597'    => 'ISO-8859-7',
4119
            'ISO88598'    => 'ISO-8859-8', // Hebrew
4120
            'ISO88599'    => 'ISO-8859-9',
4121
            'LATIN5'      => 'ISO-8859-9', // Turkish
4122
            'ISO885911'   => 'ISO-8859-11',
4123
            'TIS620'      => 'ISO-8859-11', // Thai
4124
            'ISO885910'   => 'ISO-8859-10',
4125
            'LATIN6'      => 'ISO-8859-10', // Nordic
4126
            'ISO885913'   => 'ISO-8859-13',
4127
            'LATIN7'      => 'ISO-8859-13', // Baltic
4128
            'ISO885914'   => 'ISO-8859-14',
4129
            'LATIN8'      => 'ISO-8859-14', // Celtic
4130
            'ISO885915'   => 'ISO-8859-15',
4131
            'LATIN9'      => 'ISO-8859-15', // Western European (with some extra chars e.g. €)
4132
            'ISO885916'   => 'ISO-8859-16',
4133
            'LATIN10'     => 'ISO-8859-16', // Southeast European
4134
            'CP1250'      => 'WINDOWS-1250',
4135
            'WIN1250'     => 'WINDOWS-1250',
4136
            'WINDOWS1250' => 'WINDOWS-1250',
4137
            'CP1251'      => 'WINDOWS-1251',
4138
            'WIN1251'     => 'WINDOWS-1251',
4139
            'WINDOWS1251' => 'WINDOWS-1251',
4140
            'CP1252'      => 'WINDOWS-1252',
4141
            'WIN1252'     => 'WINDOWS-1252',
4142
            'WINDOWS1252' => 'WINDOWS-1252',
4143
            'CP1253'      => 'WINDOWS-1253',
4144
            'WIN1253'     => 'WINDOWS-1253',
4145
            'WINDOWS1253' => 'WINDOWS-1253',
4146
            'CP1254'      => 'WINDOWS-1254',
4147
            'WIN1254'     => 'WINDOWS-1254',
4148
            'WINDOWS1254' => 'WINDOWS-1254',
4149
            'CP1255'      => 'WINDOWS-1255',
4150
            'WIN1255'     => 'WINDOWS-1255',
4151
            'WINDOWS1255' => 'WINDOWS-1255',
4152
            'CP1256'      => 'WINDOWS-1256',
4153
            'WIN1256'     => 'WINDOWS-1256',
4154
            'WINDOWS1256' => 'WINDOWS-1256',
4155
            'CP1257'      => 'WINDOWS-1257',
4156
            'WIN1257'     => 'WINDOWS-1257',
4157
            'WINDOWS1257' => 'WINDOWS-1257',
4158
            'CP1258'      => 'WINDOWS-1258',
4159
            'WIN1258'     => 'WINDOWS-1258',
4160
            'WINDOWS1258' => 'WINDOWS-1258',
4161
            'UTF16'       => 'UTF-16',
4162
            'UTF32'       => 'UTF-32',
4163
            'UTF8'        => 'UTF-8',
4164
            'UTF'         => 'UTF-8',
4165
            'UTF7'        => 'UTF-7',
4166
            '8BIT'        => 'CP850',
4167
            'BINARY'      => 'CP850',
4168
        ];
4169
4170 4
        if (!empty($equivalences[$encoding_upper_helper])) {
4171 3
            $encoding = $equivalences[$encoding_upper_helper];
4172
        }
4173
4174 4
        $STATIC_NORMALIZE_ENCODING_CACHE[$encoding_original] = $encoding;
4175
4176 4
        return $encoding;
4177
    }
4178
4179
    /**
4180
     * Standardize line ending to unix-like.
4181
     *
4182
     * @param string $str
4183
     *
4184
     * @return string
4185
     */
4186
    public static function normalize_line_ending(string $str): string
4187
    {
4188 5
        return \str_replace(["\r\n", "\r"], "\n", $str);
4189
    }
4190
4191
    /**
4192
     * Normalize some MS Word special characters.
4193
     *
4194
     * @param string $str <p>The string to be normalized.</p>
4195
     *
4196
     * @return string
4197
     */
4198
    public static function normalize_msword(string $str): string
4199
    {
4200 10
        return ASCII::normalize_msword($str);
4201
    }
4202
4203
    /**
4204
     * Normalize the whitespace.
4205
     *
4206
     * @param string $str                        <p>The string to be normalized.</p>
4207
     * @param bool   $keep_non_breaking_space    [optional] <p>Set to true, to keep non-breaking-spaces.</p>
4208
     * @param bool   $keep_bidi_unicode_controls [optional] <p>Set to true, to keep non-printable (for the web)
4209
     *                                           bidirectional text chars.</p>
4210
     *
4211
     * @return string
4212
     */
4213
    public static function normalize_whitespace(
4214
        string $str,
4215
        bool $keep_non_breaking_space = false,
4216
        bool $keep_bidi_unicode_controls = false
4217
    ): string {
4218 61
        return ASCII::normalize_whitespace(
4219 61
            $str,
4220 61
            $keep_non_breaking_space,
4221 61
            $keep_bidi_unicode_controls
4222
        );
4223
    }
4224
4225
    /**
4226
     * Calculates Unicode code point of the given UTF-8 encoded character.
4227
     *
4228
     * INFO: opposite to UTF8::chr()
4229
     *
4230
     * @param string $chr      <p>The character of which to calculate code point.<p/>
4231
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
4232
     *
4233
     * @return int
4234
     *             Unicode code point of the given character,<br>
4235
     *             0 on invalid UTF-8 byte sequence
4236
     */
4237
    public static function ord($chr, string $encoding = 'UTF-8'): int
4238
    {
4239 26
        static $CHAR_CACHE = [];
4240
4241
        // init
4242 26
        $chr = (string) $chr;
4243
4244 26
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
4245 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4246
        }
4247
4248 26
        $cache_key = $chr . $encoding;
4249 26
        if (isset($CHAR_CACHE[$cache_key]) === true) {
4250 26
            return $CHAR_CACHE[$cache_key];
4251
        }
4252
4253
        // check again, if it's still not UTF-8
4254 10
        if ($encoding !== 'UTF-8') {
4255 3
            $chr = self::encode($encoding, $chr);
4256
        }
4257
4258 10
        if (self::$ORD === null) {
4259
            self::$ORD = self::getData('ord');
4260
        }
4261
4262 10
        if (isset(self::$ORD[$chr])) {
4263 10
            return $CHAR_CACHE[$cache_key] = self::$ORD[$chr];
4264
        }
4265
4266
        //
4267
        // fallback via "IntlChar"
4268
        //
4269
4270 6
        if (self::$SUPPORT['intlChar'] === true) {
4271
            /** @noinspection PhpComposerExtensionStubsInspection */
4272 5
            $code = \IntlChar::ord($chr);
4273 5
            if ($code) {
4274 5
                return $CHAR_CACHE[$cache_key] = $code;
4275
            }
4276
        }
4277
4278
        //
4279
        // fallback via vanilla php
4280
        //
4281
4282
        /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
4283 1
        $chr = \unpack('C*', (string) \substr($chr, 0, 4));
4284
        /** @noinspection OffsetOperationsInspection */
4285 1
        $code = $chr ? $chr[1] : 0;
4286
4287
        /** @noinspection OffsetOperationsInspection */
4288 1
        if ($code >= 0xF0 && isset($chr[4])) {
4289
            /** @noinspection UnnecessaryCastingInspection */
4290
            /** @noinspection OffsetOperationsInspection */
4291
            return $CHAR_CACHE[$cache_key] = (int) ((($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80);
4292
        }
4293
4294
        /** @noinspection OffsetOperationsInspection */
4295 1
        if ($code >= 0xE0 && isset($chr[3])) {
4296
            /** @noinspection UnnecessaryCastingInspection */
4297
            /** @noinspection OffsetOperationsInspection */
4298 1
            return $CHAR_CACHE[$cache_key] = (int) ((($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80);
4299
        }
4300
4301
        /** @noinspection OffsetOperationsInspection */
4302 1
        if ($code >= 0xC0 && isset($chr[2])) {
4303
            /** @noinspection UnnecessaryCastingInspection */
4304
            /** @noinspection OffsetOperationsInspection */
4305 1
            return $CHAR_CACHE[$cache_key] = (int) ((($code - 0xC0) << 6) + $chr[2] - 0x80);
4306
        }
4307
4308
        return $CHAR_CACHE[$cache_key] = $code;
4309
    }
4310
4311
    /**
4312
     * Parses the string into an array (into the the second parameter).
4313
     *
4314
     * WARNING: Unlike "parse_str()", this method does not (re-)place variables in the current scope,
4315
     *          if the second parameter is not set!
4316
     *
4317
     * @see http://php.net/manual/en/function.parse-str.php
4318
     *
4319
     * @param string $str        <p>The input string.</p>
4320
     * @param array  $result     <p>The result will be returned into this reference parameter.</p>
4321
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4322
     *
4323
     * @return bool
4324
     *              Will return <strong>false</strong> if php can't parse the string and we haven't any $result
4325
     */
4326
    public static function parse_str(string $str, &$result, bool $clean_utf8 = false): bool
4327
    {
4328 2
        if ($clean_utf8 === true) {
4329 2
            $str = self::clean($str);
4330
        }
4331
4332 2
        if (self::$SUPPORT['mbstring'] === true) {
4333 2
            $return = \mb_parse_str($str, $result);
4334
4335 2
            return $return !== false && $result !== [];
4336
        }
4337
4338
        /** @noinspection PhpVoidFunctionResultUsedInspection */
4339
        \parse_str($str, $result);
4340
4341
        return $result !== [];
4342
    }
4343
4344
    /**
4345
     * Checks if \u modifier is available that enables Unicode support in PCRE.
4346
     *
4347
     * @return bool
4348
     *              <strong>true</strong> if support is available,<br>
4349
     *              <strong>false</strong> otherwise
4350
     */
4351
    public static function pcre_utf8_support(): bool
4352
    {
4353
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
4354 102
        return (bool) @\preg_match('//u', '');
4355
    }
4356
4357
    /**
4358
     * Create an array containing a range of UTF-8 characters.
4359
     *
4360
     * @param mixed     $var1      <p>Numeric or hexadecimal code points, or a UTF-8 character to start from.</p>
4361
     * @param mixed     $var2      <p>Numeric or hexadecimal code points, or a UTF-8 character to end at.</p>
4362
     * @param bool      $use_ctype <p>use ctype to detect numeric and hexadecimal, otherwise we will use a simple "is_numeric"</p>
4363
     * @param string    $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
4364
     * @param float|int $step      [optional] <p>
4365
     *                             If a step value is given, it will be used as the
4366
     *                             increment between elements in the sequence. step
4367
     *                             should be given as a positive number. If not specified,
4368
     *                             step will default to 1.
4369
     *                             </p>
4370
     *
4371
     * @return string[]
4372
     */
4373
    public static function range(
4374
        $var1,
4375
        $var2,
4376
        bool $use_ctype = true,
4377
        string $encoding = 'UTF-8',
4378
        $step = 1
4379
    ): array {
4380 2
        if (!$var1 || !$var2) {
4381 2
            return [];
4382
        }
4383
4384 2
        if ($step !== 1) {
4385 1
            if (!\is_numeric($step)) {
0 ignored issues
show
introduced by
The condition is_numeric($step) is always true.
Loading history...
4386
                throw new \InvalidArgumentException('$step need to be a number, type given: ' . \gettype($step));
4387
            }
4388
4389 1
            if ($step <= 0) {
4390
                throw new \InvalidArgumentException('$step need to be a positive number, given: ' . $step);
4391
            }
4392
        }
4393
4394 2
        if ($use_ctype && self::$SUPPORT['ctype'] === false) {
4395
            throw new \RuntimeException('ext-ctype: is not installed');
4396
        }
4397
4398 2
        $is_digit = false;
4399 2
        $is_xdigit = false;
4400
4401
        /** @noinspection PhpComposerExtensionStubsInspection */
4402 2
        if ($use_ctype && \ctype_digit((string) $var1) && \ctype_digit((string) $var2)) {
4403 2
            $is_digit = true;
4404 2
            $start = (int) $var1;
4405 2
        } /** @noinspection PhpComposerExtensionStubsInspection */ elseif ($use_ctype && \ctype_xdigit($var1) && \ctype_xdigit($var2)) {
4406
            $is_xdigit = true;
4407
            $start = (int) self::hex_to_int($var1);
4408 2
        } elseif (!$use_ctype && \is_numeric($var1)) {
4409 1
            $start = (int) $var1;
4410
        } else {
4411 2
            $start = self::ord($var1);
4412
        }
4413
4414 2
        if (!$start) {
4415
            return [];
4416
        }
4417
4418 2
        if ($is_digit) {
4419 2
            $end = (int) $var2;
4420 2
        } elseif ($is_xdigit) {
4421
            $end = (int) self::hex_to_int($var2);
4422 2
        } elseif (!$use_ctype && \is_numeric($var2)) {
4423 1
            $end = (int) $var2;
4424
        } else {
4425 2
            $end = self::ord($var2);
4426
        }
4427
4428 2
        if (!$end) {
4429
            return [];
4430
        }
4431
4432 2
        $array = [];
4433 2
        foreach (\range($start, $end, $step) as $i) {
4434 2
            $array[] = (string) self::chr((int) $i, $encoding);
4435
        }
4436
4437 2
        return $array;
4438
    }
4439
4440
    /**
4441
     * Multi decode HTML entity + fix urlencoded-win1252-chars.
4442
     *
4443
     * e.g:
4444
     * 'test+test'                     => 'test+test'
4445
     * 'D&#252;sseldorf'               => 'Düsseldorf'
4446
     * 'D%FCsseldorf'                  => 'Düsseldorf'
4447
     * 'D&#xFC;sseldorf'               => 'Düsseldorf'
4448
     * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
4449
     * 'Düsseldorf'                   => 'Düsseldorf'
4450
     * 'D%C3%BCsseldorf'               => 'Düsseldorf'
4451
     * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
4452
     * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
4453
     *
4454
     * @param string $str          <p>The input string.</p>
4455
     * @param bool   $multi_decode <p>Decode as often as possible.</p>
4456
     *
4457
     * @return string
4458
     */
4459
    public static function rawurldecode(string $str, bool $multi_decode = true): string
4460
    {
4461 6
        if ($str === '') {
4462 4
            return '';
4463
        }
4464
4465
        if (
4466 6
            \strpos($str, '&') === false
4467
            &&
4468 6
            \strpos($str, '%') === false
4469
            &&
4470 6
            \strpos($str, '+') === false
4471
            &&
4472 6
            \strpos($str, '\u') === false
4473
        ) {
4474 4
            return self::fix_simple_utf8($str);
4475
        }
4476
4477 6
        $str = self::urldecode_unicode_helper($str);
4478
4479
        do {
4480 6
            $str_compare = $str;
4481
4482
            /**
4483
             * @psalm-suppress PossiblyInvalidArgument
4484
             */
4485 6
            $str = self::fix_simple_utf8(
4486 6
                \rawurldecode(
4487 6
                    self::html_entity_decode(
4488 6
                        self::to_utf8($str),
4489 6
                        \ENT_QUOTES | \ENT_HTML5
4490
                    )
4491
                )
4492
            );
4493 6
        } while ($multi_decode === true && $str_compare !== $str);
4494
4495 6
        return $str;
4496
    }
4497
4498
    /**
4499
     * Replaces all occurrences of $pattern in $str by $replacement.
4500
     *
4501
     * @param string $str         <p>The input string.</p>
4502
     * @param string $pattern     <p>The regular expression pattern.</p>
4503
     * @param string $replacement <p>The string to replace with.</p>
4504
     * @param string $options     [optional] <p>Matching conditions to be used.</p>
4505
     * @param string $delimiter   [optional] <p>Delimiter the the regex. Default: '/'</p>
4506
     *
4507
     * @return string
4508
     */
4509
    public static function regex_replace(
4510
        string $str,
4511
        string $pattern,
4512
        string $replacement,
4513
        string $options = '',
4514
        string $delimiter = '/'
4515
    ): string {
4516 18
        if ($options === 'msr') {
4517 9
            $options = 'ms';
4518
        }
4519
4520
        // fallback
4521 18
        if (!$delimiter) {
4522
            $delimiter = '/';
4523
        }
4524
4525 18
        return (string) \preg_replace(
4526 18
            $delimiter . $pattern . $delimiter . 'u' . $options,
4527 18
            $replacement,
4528 18
            $str
4529
        );
4530
    }
4531
4532
    /**
4533
     * alias for "UTF8::remove_bom()"
4534
     *
4535
     * @param string $str
4536
     *
4537
     * @return string
4538
     *
4539
     * @see UTF8::remove_bom()
4540
     * @deprecated <p>please use "UTF8::remove_bom()"</p>
4541
     */
4542
    public static function removeBOM(string $str): string
4543
    {
4544
        return self::remove_bom($str);
4545
    }
4546
4547
    /**
4548
     * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
4549
     *
4550
     * @param string $str <p>The input string.</p>
4551
     *
4552
     * @return string string without UTF-BOM
4553
     */
4554
    public static function remove_bom(string $str): string
4555
    {
4556 55
        if ($str === '') {
4557 9
            return '';
4558
        }
4559
4560 55
        $str_length = \strlen($str);
4561 55
        foreach (self::$BOM as $bom_string => $bom_byte_length) {
4562 55
            if (\strpos($str, $bom_string, 0) === 0) {
4563
                /** @var false|string $str_tmp - needed for PhpStan (stubs error) */
4564 11
                $str_tmp = \substr($str, $bom_byte_length, $str_length);
4565 11
                if ($str_tmp === false) {
4566
                    return '';
4567
                }
4568
4569 11
                $str_length -= (int) $bom_byte_length;
4570
4571 55
                $str = (string) $str_tmp;
4572
            }
4573
        }
4574
4575 55
        return $str;
4576
    }
4577
4578
    /**
4579
     * Removes duplicate occurrences of a string in another string.
4580
     *
4581
     * @param string          $str  <p>The base string.</p>
4582
     * @param string|string[] $what <p>String to search for in the base string.</p>
4583
     *
4584
     * @return string the result string with removed duplicates
4585
     */
4586
    public static function remove_duplicates(string $str, $what = ' '): string
4587
    {
4588 2
        if (\is_string($what) === true) {
4589 2
            $what = [$what];
4590
        }
4591
4592 2
        if (\is_array($what) === true) {
0 ignored issues
show
introduced by
The condition is_array($what) === true is always true.
Loading history...
4593
            /** @noinspection ForeachSourceInspection */
4594 2
            foreach ($what as $item) {
4595 2
                $str = (string) \preg_replace('/(' . \preg_quote($item, '/') . ')+/u', $item, $str);
4596
            }
4597
        }
4598
4599 2
        return $str;
4600
    }
4601
4602
    /**
4603
     * Remove html via "strip_tags()" from the string.
4604
     *
4605
     * @param string $str
4606
     * @param string $allowable_tags [optional] <p>You can use the optional second parameter to specify tags which should
4607
     *                               not be stripped. Default: null
4608
     *                               </p>
4609
     *
4610
     * @return string
4611
     */
4612
    public static function remove_html(string $str, string $allowable_tags = ''): string
4613
    {
4614 6
        return \strip_tags($str, $allowable_tags);
4615
    }
4616
4617
    /**
4618
     * Remove all breaks [<br> | \r\n | \r | \n | ...] from the string.
4619
     *
4620
     * @param string $str
4621
     * @param string $replacement [optional] <p>Default is a empty string.</p>
4622
     *
4623
     * @return string
4624
     */
4625
    public static function remove_html_breaks(string $str, string $replacement = ''): string
4626
    {
4627 6
        return (string) \preg_replace("#/\r\n|\r|\n|<br.*/?>#isU", $replacement, $str);
4628
    }
4629
4630
    /**
4631
     * Remove invisible characters from a string.
4632
     *
4633
     * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
4634
     *
4635
     * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
4636
     *
4637
     * @param string $str
4638
     * @param bool   $url_encoded
4639
     * @param string $replacement
4640
     *
4641
     * @return string
4642
     */
4643
    public static function remove_invisible_characters(
4644
        string $str,
4645
        bool $url_encoded = true,
4646
        string $replacement = ''
4647
    ): string {
4648 89
        return ASCII::remove_invisible_characters(
4649 89
            $str,
4650 89
            $url_encoded,
4651 89
            $replacement
4652
        );
4653
    }
4654
4655
    /**
4656
     * Returns a new string with the prefix $substring removed, if present.
4657
     *
4658
     * @param string $str
4659
     * @param string $substring <p>The prefix to remove.</p>
4660
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
4661
     *
4662
     * @return string string without the prefix $substring
4663
     */
4664
    public static function remove_left(
4665
        string $str,
4666
        string $substring,
4667
        string $encoding = 'UTF-8'
4668
    ): string {
4669 12
        if ($substring && \strpos($str, $substring) === 0) {
4670 6
            if ($encoding === 'UTF-8') {
4671 4
                return (string) \mb_substr(
4672 4
                    $str,
4673 4
                    (int) \mb_strlen($substring)
4674
                );
4675
            }
4676
4677 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4678
4679 2
            return (string) self::substr(
4680 2
                $str,
4681 2
                (int) self::strlen($substring, $encoding),
4682 2
                null,
4683 2
                $encoding
4684
            );
4685
        }
4686
4687 6
        return $str;
4688
    }
4689
4690
    /**
4691
     * Returns a new string with the suffix $substring removed, if present.
4692
     *
4693
     * @param string $str
4694
     * @param string $substring <p>The suffix to remove.</p>
4695
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
4696
     *
4697
     * @return string string having a $str without the suffix $substring
4698
     */
4699
    public static function remove_right(
4700
        string $str,
4701
        string $substring,
4702
        string $encoding = 'UTF-8'
4703
    ): string {
4704 12
        if ($substring && \substr($str, -\strlen($substring)) === $substring) {
4705 6
            if ($encoding === 'UTF-8') {
4706 4
                return (string) \mb_substr(
4707 4
                    $str,
4708 4
                    0,
4709 4
                    (int) \mb_strlen($str) - (int) \mb_strlen($substring)
4710
                );
4711
            }
4712
4713 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4714
4715 2
            return (string) self::substr(
4716 2
                $str,
4717 2
                0,
4718 2
                (int) self::strlen($str, $encoding) - (int) self::strlen($substring, $encoding),
4719 2
                $encoding
4720
            );
4721
        }
4722
4723 6
        return $str;
4724
    }
4725
4726
    /**
4727
     * Replaces all occurrences of $search in $str by $replacement.
4728
     *
4729
     * @param string $str            <p>The input string.</p>
4730
     * @param string $search         <p>The needle to search for.</p>
4731
     * @param string $replacement    <p>The string to replace with.</p>
4732
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4733
     *
4734
     * @return string string after the replacements
4735
     */
4736
    public static function replace(
4737
        string $str,
4738
        string $search,
4739
        string $replacement,
4740
        bool $case_sensitive = true
4741
    ): string {
4742 29
        if ($case_sensitive) {
4743 22
            return \str_replace($search, $replacement, $str);
4744
        }
4745
4746 7
        return self::str_ireplace($search, $replacement, $str);
4747
    }
4748
4749
    /**
4750
     * Replaces all occurrences of $search in $str by $replacement.
4751
     *
4752
     * @param string       $str            <p>The input string.</p>
4753
     * @param array        $search         <p>The elements to search for.</p>
4754
     * @param array|string $replacement    <p>The string to replace with.</p>
4755
     * @param bool         $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4756
     *
4757
     * @return string string after the replacements
4758
     */
4759
    public static function replace_all(
4760
        string $str,
4761
        array $search,
4762
        $replacement,
4763
        bool $case_sensitive = true
4764
    ): string {
4765 30
        if ($case_sensitive) {
4766 23
            return \str_replace($search, $replacement, $str);
4767
        }
4768
4769 7
        return self::str_ireplace($search, $replacement, $str);
4770
    }
4771
4772
    /**
4773
     * Replace the diamond question mark (�) and invalid-UTF8 chars with the replacement.
4774
     *
4775
     * @param string $str                        <p>The input string</p>
4776
     * @param string $replacement_char           <p>The replacement character.</p>
4777
     * @param bool   $process_invalid_utf8_chars <p>Convert invalid UTF-8 chars </p>
4778
     *
4779
     * @return string
4780
     */
4781
    public static function replace_diamond_question_mark(
4782
        string $str,
4783
        string $replacement_char = '',
4784
        bool $process_invalid_utf8_chars = true
4785
    ): string {
4786 35
        if ($str === '') {
4787 9
            return '';
4788
        }
4789
4790 35
        if ($process_invalid_utf8_chars === true) {
4791 35
            $replacement_char_helper = $replacement_char;
4792 35
            if ($replacement_char === '') {
4793 35
                $replacement_char_helper = 'none';
4794
            }
4795
4796 35
            if (self::$SUPPORT['mbstring'] === false) {
4797
                // if there is no native support for "mbstring",
4798
                // then we need to clean the string before ...
4799
                $str = self::clean($str);
4800
            }
4801
4802 35
            $save = \mb_substitute_character();
4803 35
            \mb_substitute_character($replacement_char_helper);
4804
            // the polyfill maybe return false, so cast to string
4805 35
            $str = (string) \mb_convert_encoding($str, 'UTF-8', 'UTF-8');
4806 35
            \mb_substitute_character($save);
4807
        }
4808
4809 35
        return \str_replace(
4810
            [
4811 35
                "\xEF\xBF\xBD",
4812
                '�',
4813
            ],
4814
            [
4815 35
                $replacement_char,
4816 35
                $replacement_char,
4817
            ],
4818 35
            $str
4819
        );
4820
    }
4821
4822
    /**
4823
     * Strip whitespace or other characters from the end of a UTF-8 string.
4824
     *
4825
     * @param string      $str   <p>The string to be trimmed.</p>
4826
     * @param string|null $chars <p>Optional characters to be stripped.</p>
4827
     *
4828
     * @return string the string with unwanted characters stripped from the right
4829
     */
4830
    public static function rtrim(string $str = '', string $chars = null): string
4831
    {
4832 20
        if ($str === '') {
4833 3
            return '';
4834
        }
4835
4836 19
        if (self::$SUPPORT['mbstring'] === true) {
4837 19
            if ($chars) {
4838
                /** @noinspection PregQuoteUsageInspection */
4839 8
                $chars = \preg_quote($chars);
4840 8
                $pattern = "[${chars}]+$";
4841
            } else {
4842 14
                $pattern = '[\\s]+$';
4843
            }
4844
4845
            /** @noinspection PhpComposerExtensionStubsInspection */
4846 19
            return (string) \mb_ereg_replace($pattern, '', $str);
4847
        }
4848
4849
        if ($chars) {
4850
            $chars = \preg_quote($chars, '/');
4851
            $pattern = "[${chars}]+$";
4852
        } else {
4853
            $pattern = '[\\s]+$';
4854
        }
4855
4856
        return self::regex_replace($str, $pattern, '', '', '/');
4857
    }
4858
4859
    /**
4860
     * WARNING: Print native UTF-8 support (libs), e.g. for debugging.
4861
     *
4862
     * @psalm-suppress MissingReturnType
4863
     */
4864
    public static function showSupport()
4865
    {
4866 2
        echo '<pre>';
4867 2
        foreach (self::$SUPPORT as $key => &$value) {
4868 2
            echo $key . ' - ' . \print_r($value, true) . "\n<br>";
4869
        }
4870 2
        unset($value);
4871 2
        echo '</pre>';
4872 2
    }
4873
4874
    /**
4875
     * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
4876
     *
4877
     * @param string $char             <p>The Unicode character to be encoded as numbered entity.</p>
4878
     * @param bool   $keep_ascii_chars <p>Set to <strong>true</strong> to keep ASCII chars.</>
4879
     * @param string $encoding         [optional] <p>Set the charset for e.g. "mb_" function</p>
4880
     *
4881
     * @return string the HTML numbered entity
4882
     */
4883
    public static function single_chr_html_encode(
4884
        string $char,
4885
        bool $keep_ascii_chars = false,
4886
        string $encoding = 'UTF-8'
4887
    ): string {
4888 2
        if ($char === '') {
4889 2
            return '';
4890
        }
4891
4892
        if (
4893 2
            $keep_ascii_chars === true
4894
            &&
4895 2
            ASCII::is_ascii($char) === true
4896
        ) {
4897 2
            return $char;
4898
        }
4899
4900 2
        return '&#' . self::ord($char, $encoding) . ';';
4901
    }
4902
4903
    /**
4904
     * @param string $str
4905
     * @param int    $tab_length
4906
     *
4907
     * @return string
4908
     */
4909
    public static function spaces_to_tabs(string $str, int $tab_length = 4): string
4910
    {
4911 5
        if ($tab_length === 4) {
4912 3
            $tab = '    ';
4913 2
        } elseif ($tab_length === 2) {
4914 1
            $tab = '  ';
4915
        } else {
4916 1
            $tab = \str_repeat(' ', $tab_length);
4917
        }
4918
4919 5
        return \str_replace($tab, "\t", $str);
4920
    }
4921
4922
    /**
4923
     * alias for "UTF8::str_split()"
4924
     *
4925
     * @param string|string[] $str
4926
     * @param int             $length
4927
     * @param bool            $clean_utf8
4928
     *
4929
     * @return string[]
4930
     *
4931
     * @see UTF8::str_split()
4932
     * @deprecated <p>please use "UTF8::str_split()"</p>
4933
     */
4934
    public static function split(
4935
        $str,
4936
        int $length = 1,
4937
        bool $clean_utf8 = false
4938
    ): array {
4939 9
        return self::str_split($str, $length, $clean_utf8);
4940
    }
4941
4942
    /**
4943
     * alias for "UTF8::str_starts_with()"
4944
     *
4945
     * @param string $haystack
4946
     * @param string $needle
4947
     *
4948
     * @return bool
4949
     *
4950
     * @see UTF8::str_starts_with()
4951
     * @deprecated <p>please use "UTF8::str_starts_with()"</p>
4952
     */
4953
    public static function str_begins(string $haystack, string $needle): bool
4954
    {
4955
        return self::str_starts_with($haystack, $needle);
4956
    }
4957
4958
    /**
4959
     * Returns a camelCase version of the string. Trims surrounding spaces,
4960
     * capitalizes letters following digits, spaces, dashes and underscores,
4961
     * and removes spaces, dashes, as well as underscores.
4962
     *
4963
     * @param string      $str                           <p>The input string.</p>
4964
     * @param string      $encoding                      [optional] <p>Default: 'UTF-8'</p>
4965
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
4966
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt, tr</p>
4967
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
4968
     *
4969
     * @return string
4970
     */
4971
    public static function str_camelize(
4972
        string $str,
4973
        string $encoding = 'UTF-8',
4974
        bool $clean_utf8 = false,
4975
        string $lang = null,
4976
        bool $try_to_keep_the_string_length = false
4977
    ): string {
4978 32
        if ($clean_utf8 === true) {
4979
            $str = self::clean($str);
4980
        }
4981
4982 32
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
4983 26
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4984
        }
4985
4986 32
        $str = self::lcfirst(
4987 32
            \trim($str),
4988 32
            $encoding,
4989 32
            false,
4990 32
            $lang,
4991 32
            $try_to_keep_the_string_length
4992
        );
4993 32
        $str = (string) \preg_replace('/^[-_]+/', '', $str);
4994
4995 32
        $use_mb_functions = $lang === null && $try_to_keep_the_string_length === false;
4996
4997 32
        $str = (string) \preg_replace_callback(
4998 32
            '/[-_\\s]+(.)?/u',
4999
            /**
5000
             * @param array $match
5001
             *
5002
             * @return string
5003
             */
5004
            static function (array $match) use ($use_mb_functions, $encoding, $lang, $try_to_keep_the_string_length): string {
5005 27
                if (isset($match[1])) {
5006 27
                    if ($use_mb_functions === true) {
5007 27
                        if ($encoding === 'UTF-8') {
5008 27
                            return \mb_strtoupper($match[1]);
5009
                        }
5010
5011
                        return \mb_strtoupper($match[1], $encoding);
5012
                    }
5013
5014
                    return self::strtoupper($match[1], $encoding, false, $lang, $try_to_keep_the_string_length);
5015
                }
5016
5017 1
                return '';
5018 32
            },
5019 32
            $str
5020
        );
5021
5022 32
        return (string) \preg_replace_callback(
5023 32
            '/[\\p{N}]+(.)?/u',
5024
            /**
5025
             * @param array $match
5026
             *
5027
             * @return string
5028
             */
5029
            static function (array $match) use ($use_mb_functions, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length): string {
5030 6
                if ($use_mb_functions === true) {
5031 6
                    if ($encoding === 'UTF-8') {
5032 6
                        return \mb_strtoupper($match[0]);
5033
                    }
5034
5035
                    return \mb_strtoupper($match[0], $encoding);
5036
                }
5037
5038
                return self::strtoupper($match[0], $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
5039 32
            },
5040 32
            $str
5041
        );
5042
    }
5043
5044
    /**
5045
     * Returns the string with the first letter of each word capitalized,
5046
     * except for when the word is a name which shouldn't be capitalized.
5047
     *
5048
     * @param string $str
5049
     *
5050
     * @return string string with $str capitalized
5051
     */
5052
    public static function str_capitalize_name(string $str): string
5053
    {
5054 1
        return self::str_capitalize_name_helper(
5055 1
            self::str_capitalize_name_helper(
5056 1
                self::collapse_whitespace($str),
5057 1
                ' '
5058
            ),
5059 1
            '-'
5060
        );
5061
    }
5062
5063
    /**
5064
     * Returns true if the string contains $needle, false otherwise. By default
5065
     * the comparison is case-sensitive, but can be made insensitive by setting
5066
     * $case_sensitive to false.
5067
     *
5068
     * @param string $haystack       <p>The input string.</p>
5069
     * @param string $needle         <p>Substring to look for.</p>
5070
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5071
     *
5072
     * @return bool whether or not $haystack contains $needle
5073
     */
5074
    public static function str_contains(
5075
        string $haystack,
5076
        string $needle,
5077
        bool $case_sensitive = true
5078
    ): bool {
5079 21
        if ($case_sensitive) {
5080 11
            return \strpos($haystack, $needle) !== false;
5081
        }
5082
5083 10
        return \mb_stripos($haystack, $needle) !== false;
5084
    }
5085
5086
    /**
5087
     * Returns true if the string contains all $needles, false otherwise. By
5088
     * default the comparison is case-sensitive, but can be made insensitive by
5089
     * setting $case_sensitive to false.
5090
     *
5091
     * @param string $haystack       <p>The input string.</p>
5092
     * @param array  $needles        <p>SubStrings to look for.</p>
5093
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5094
     *
5095
     * @return bool whether or not $haystack contains $needle
5096
     */
5097
    public static function str_contains_all(
5098
        string $haystack,
5099
        array $needles,
5100
        bool $case_sensitive = true
5101
    ): bool {
5102 44
        if ($haystack === '' || $needles === []) {
5103 1
            return false;
5104
        }
5105
5106
        /** @noinspection LoopWhichDoesNotLoopInspection */
5107 43
        foreach ($needles as &$needle) {
5108 43
            if (!$needle) {
5109 1
                return false;
5110
            }
5111
5112 42
            if ($case_sensitive) {
5113 22
                return \strpos($haystack, $needle) !== false;
5114
            }
5115
5116 20
            return \mb_stripos($haystack, $needle) !== false;
5117
        }
5118
5119
        return true;
5120
    }
5121
5122
    /**
5123
     * Returns true if the string contains any $needles, false otherwise. By
5124
     * default the comparison is case-sensitive, but can be made insensitive by
5125
     * setting $case_sensitive to false.
5126
     *
5127
     * @param string $haystack       <p>The input string.</p>
5128
     * @param array  $needles        <p>SubStrings to look for.</p>
5129
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5130
     *
5131
     * @return bool
5132
     *              Whether or not $str contains $needle
5133
     */
5134
    public static function str_contains_any(
5135
        string $haystack,
5136
        array $needles,
5137
        bool $case_sensitive = true
5138
    ): bool {
5139 46
        if ($haystack === '' || $needles === []) {
5140 1
            return false;
5141
        }
5142
5143
        /** @noinspection LoopWhichDoesNotLoopInspection */
5144 45
        foreach ($needles as &$needle) {
5145 45
            if (!$needle) {
5146
                continue;
5147
            }
5148
5149 45
            if ($case_sensitive) {
5150 25
                if (\strpos($haystack, $needle) !== false) {
5151 14
                    return true;
5152
                }
5153
5154 13
                continue;
5155
            }
5156
5157 20
            if (\mb_stripos($haystack, $needle) !== false) {
5158 20
                return true;
5159
            }
5160
        }
5161
5162 19
        return false;
5163
    }
5164
5165
    /**
5166
     * Returns a lowercase and trimmed string separated by dashes. Dashes are
5167
     * inserted before uppercase characters (with the exception of the first
5168
     * character of the string), and in place of spaces as well as underscores.
5169
     *
5170
     * @param string $str      <p>The input string.</p>
5171
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5172
     *
5173
     * @return string
5174
     */
5175
    public static function str_dasherize(string $str, string $encoding = 'UTF-8'): string
5176
    {
5177 19
        return self::str_delimit($str, '-', $encoding);
5178
    }
5179
5180
    /**
5181
     * Returns a lowercase and trimmed string separated by the given delimiter.
5182
     * Delimiters are inserted before uppercase characters (with the exception
5183
     * of the first character of the string), and in place of spaces, dashes,
5184
     * and underscores. Alpha delimiters are not converted to lowercase.
5185
     *
5186
     * @param string      $str                           <p>The input string.</p>
5187
     * @param string      $delimiter                     <p>Sequence used to separate parts of the string.</p>
5188
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
5189
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
5190
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
5191
     *                                                   tr</p>
5192
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ ->
5193
     *                                                   ß</p>
5194
     *
5195
     * @return string
5196
     */
5197
    public static function str_delimit(
5198
        string $str,
5199
        string $delimiter,
5200
        string $encoding = 'UTF-8',
5201
        bool $clean_utf8 = false,
5202
        string $lang = null,
5203
        bool $try_to_keep_the_string_length = false
5204
    ): string {
5205 49
        if (self::$SUPPORT['mbstring'] === true) {
5206
            /** @noinspection PhpComposerExtensionStubsInspection */
5207 49
            $str = (string) \mb_ereg_replace('\\B(\\p{Lu})', '-\1', \trim($str));
5208
5209 49
            $use_mb_functions = $lang === null && $try_to_keep_the_string_length === false;
5210 49
            if ($use_mb_functions === true && $encoding === 'UTF-8') {
5211 22
                $str = \mb_strtolower($str);
5212
            } else {
5213 27
                $str = self::strtolower($str, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
5214
            }
5215
5216
            /** @noinspection PhpComposerExtensionStubsInspection */
5217 49
            return (string) \mb_ereg_replace('[\\-_\\s]+', $delimiter, $str);
5218
        }
5219
5220
        $str = (string) \preg_replace('/\\B(\\p{Lu})/u', '-\1', \trim($str));
5221
5222
        $use_mb_functions = $lang === null && $try_to_keep_the_string_length === false;
5223
        if ($use_mb_functions === true && $encoding === 'UTF-8') {
5224
            $str = \mb_strtolower($str);
5225
        } else {
5226
            $str = self::strtolower($str, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
5227
        }
5228
5229
        return (string) \preg_replace('/[\\-_\\s]+/u', $delimiter, $str);
5230
    }
5231
5232
    /**
5233
     * Optimized "mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
5234
     *
5235
     * @param string $str <p>The input string.</p>
5236
     *
5237
     * @return false|string
5238
     *                      The detected string-encoding e.g. UTF-8 or UTF-16BE,<br>
5239
     *                      otherwise it will return false e.g. for BINARY or not detected encoding.
5240
     */
5241
    public static function str_detect_encoding($str)
5242
    {
5243
        // init
5244 30
        $str = (string) $str;
5245
5246
        //
5247
        // 1.) check binary strings (010001001...) like UTF-16 / UTF-32 / PDF / Images / ...
5248
        //
5249
5250 30
        if (self::is_binary($str, true) === true) {
5251 11
            $is_utf32 = self::is_utf32($str, false);
5252 11
            if ($is_utf32 === 1) {
5253
                return 'UTF-32LE';
5254
            }
5255 11
            if ($is_utf32 === 2) {
5256 1
                return 'UTF-32BE';
5257
            }
5258
5259 11
            $is_utf16 = self::is_utf16($str, false);
5260 11
            if ($is_utf16 === 1) {
5261 3
                return 'UTF-16LE';
5262
            }
5263 11
            if ($is_utf16 === 2) {
5264 2
                return 'UTF-16BE';
5265
            }
5266
5267
            // is binary but not "UTF-16" or "UTF-32"
5268 9
            return false;
5269
        }
5270
5271
        //
5272
        // 2.) simple check for ASCII chars
5273
        //
5274
5275 26
        if (ASCII::is_ascii($str) === true) {
5276 10
            return 'ASCII';
5277
        }
5278
5279
        //
5280
        // 3.) simple check for UTF-8 chars
5281
        //
5282
5283 26
        if (self::is_utf8_string($str) === true) {
5284 19
            return 'UTF-8';
5285
        }
5286
5287
        //
5288
        // 4.) check via "mb_detect_encoding()"
5289
        //
5290
        // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "mb_detect_encoding()"
5291
5292
        $encoding_detecting_order = [
5293 15
            'ISO-8859-1',
5294
            'ISO-8859-2',
5295
            'ISO-8859-3',
5296
            'ISO-8859-4',
5297
            'ISO-8859-5',
5298
            'ISO-8859-6',
5299
            'ISO-8859-7',
5300
            'ISO-8859-8',
5301
            'ISO-8859-9',
5302
            'ISO-8859-10',
5303
            'ISO-8859-13',
5304
            'ISO-8859-14',
5305
            'ISO-8859-15',
5306
            'ISO-8859-16',
5307
            'WINDOWS-1251',
5308
            'WINDOWS-1252',
5309
            'WINDOWS-1254',
5310
            'CP932',
5311
            'CP936',
5312
            'CP950',
5313
            'CP866',
5314
            'CP850',
5315
            'CP51932',
5316
            'CP50220',
5317
            'CP50221',
5318
            'CP50222',
5319
            'ISO-2022-JP',
5320
            'ISO-2022-KR',
5321
            'JIS',
5322
            'JIS-ms',
5323
            'EUC-CN',
5324
            'EUC-JP',
5325
        ];
5326
5327 15
        if (self::$SUPPORT['mbstring'] === true) {
5328
            // info: do not use the symfony polyfill here
5329 15
            $encoding = \mb_detect_encoding($str, $encoding_detecting_order, true);
5330 15
            if ($encoding) {
5331 15
                return $encoding;
5332
            }
5333
        }
5334
5335
        //
5336
        // 5.) check via "iconv()"
5337
        //
5338
5339
        if (self::$ENCODINGS === null) {
5340
            self::$ENCODINGS = self::getData('encodings');
5341
        }
5342
5343
        foreach (self::$ENCODINGS as $encoding_tmp) {
5344
            // INFO: //IGNORE but still throw notice
5345
            /** @noinspection PhpUsageOfSilenceOperatorInspection */
5346
            if ((string) @\iconv($encoding_tmp, $encoding_tmp . '//IGNORE', $str) === $str) {
5347
                return $encoding_tmp;
5348
            }
5349
        }
5350
5351
        return false;
5352
    }
5353
5354
    /**
5355
     * alias for "UTF8::str_ends_with()"
5356
     *
5357
     * @param string $haystack
5358
     * @param string $needle
5359
     *
5360
     * @return bool
5361
     *
5362
     * @see UTF8::str_ends_with()
5363
     * @deprecated <p>please use "UTF8::str_ends_with()"</p>
5364
     */
5365
    public static function str_ends(string $haystack, string $needle): bool
5366
    {
5367
        return self::str_ends_with($haystack, $needle);
5368
    }
5369
5370
    /**
5371
     * Check if the string ends with the given substring.
5372
     *
5373
     * @param string $haystack <p>The string to search in.</p>
5374
     * @param string $needle   <p>The substring to search for.</p>
5375
     *
5376
     * @return bool
5377
     */
5378
    public static function str_ends_with(string $haystack, string $needle): bool
5379
    {
5380 9
        if ($needle === '') {
5381 2
            return true;
5382
        }
5383
5384 9
        if ($haystack === '') {
5385
            return false;
5386
        }
5387
5388 9
        return \substr($haystack, -\strlen($needle)) === $needle;
5389
    }
5390
5391
    /**
5392
     * Returns true if the string ends with any of $substrings, false otherwise.
5393
     *
5394
     * - case-sensitive
5395
     *
5396
     * @param string   $str        <p>The input string.</p>
5397
     * @param string[] $substrings <p>Substrings to look for.</p>
5398
     *
5399
     * @return bool whether or not $str ends with $substring
5400
     */
5401
    public static function str_ends_with_any(string $str, array $substrings): bool
5402
    {
5403 7
        if ($substrings === []) {
5404
            return false;
5405
        }
5406
5407 7
        foreach ($substrings as &$substring) {
5408 7
            if (\substr($str, -\strlen($substring)) === $substring) {
5409 7
                return true;
5410
            }
5411
        }
5412
5413 6
        return false;
5414
    }
5415
5416
    /**
5417
     * Ensures that the string begins with $substring. If it doesn't, it's
5418
     * prepended.
5419
     *
5420
     * @param string $str       <p>The input string.</p>
5421
     * @param string $substring <p>The substring to add if not present.</p>
5422
     *
5423
     * @return string
5424
     */
5425
    public static function str_ensure_left(string $str, string $substring): string
5426
    {
5427
        if (
5428 10
            $substring !== ''
5429
            &&
5430 10
            \strpos($str, $substring) === 0
5431
        ) {
5432 6
            return $str;
5433
        }
5434
5435 4
        return $substring . $str;
5436
    }
5437
5438
    /**
5439
     * Ensures that the string ends with $substring. If it doesn't, it's appended.
5440
     *
5441
     * @param string $str       <p>The input string.</p>
5442
     * @param string $substring <p>The substring to add if not present.</p>
5443
     *
5444
     * @return string
5445
     */
5446
    public static function str_ensure_right(string $str, string $substring): string
5447
    {
5448
        if (
5449 10
            $str === ''
5450
            ||
5451 10
            $substring === ''
5452
            ||
5453 10
            \substr($str, -\strlen($substring)) !== $substring
5454
        ) {
5455 4
            $str .= $substring;
5456
        }
5457
5458 10
        return $str;
5459
    }
5460
5461
    /**
5462
     * Capitalizes the first word of the string, replaces underscores with
5463
     * spaces, and strips '_id'.
5464
     *
5465
     * @param string $str
5466
     *
5467
     * @return string
5468
     */
5469
    public static function str_humanize($str): string
5470
    {
5471 3
        $str = \str_replace(
5472
            [
5473 3
                '_id',
5474
                '_',
5475
            ],
5476
            [
5477 3
                '',
5478
                ' ',
5479
            ],
5480 3
            $str
5481
        );
5482
5483 3
        return self::ucfirst(\trim($str));
5484
    }
5485
5486
    /**
5487
     * alias for "UTF8::str_istarts_with()"
5488
     *
5489
     * @param string $haystack
5490
     * @param string $needle
5491
     *
5492
     * @return bool
5493
     *
5494
     * @see UTF8::str_istarts_with()
5495
     * @deprecated <p>please use "UTF8::str_istarts_with()"</p>
5496
     */
5497
    public static function str_ibegins(string $haystack, string $needle): bool
5498
    {
5499
        return self::str_istarts_with($haystack, $needle);
5500
    }
5501
5502
    /**
5503
     * alias for "UTF8::str_iends_with()"
5504
     *
5505
     * @param string $haystack
5506
     * @param string $needle
5507
     *
5508
     * @return bool
5509
     *
5510
     * @see UTF8::str_iends_with()
5511
     * @deprecated <p>please use "UTF8::str_iends_with()"</p>
5512
     */
5513
    public static function str_iends(string $haystack, string $needle): bool
5514
    {
5515
        return self::str_iends_with($haystack, $needle);
5516
    }
5517
5518
    /**
5519
     * Check if the string ends with the given substring, case-insensitive.
5520
     *
5521
     * @param string $haystack <p>The string to search in.</p>
5522
     * @param string $needle   <p>The substring to search for.</p>
5523
     *
5524
     * @return bool
5525
     */
5526
    public static function str_iends_with(string $haystack, string $needle): bool
5527
    {
5528 12
        if ($needle === '') {
5529 2
            return true;
5530
        }
5531
5532 12
        if ($haystack === '') {
5533
            return false;
5534
        }
5535
5536 12
        return self::strcasecmp(\substr($haystack, -\strlen($needle)), $needle) === 0;
5537
    }
5538
5539
    /**
5540
     * Returns true if the string ends with any of $substrings, false otherwise.
5541
     *
5542
     * - case-insensitive
5543
     *
5544
     * @param string   $str        <p>The input string.</p>
5545
     * @param string[] $substrings <p>Substrings to look for.</p>
5546
     *
5547
     * @return bool
5548
     *              <p>Whether or not $str ends with $substring.</p>
5549
     */
5550
    public static function str_iends_with_any(string $str, array $substrings): bool
5551
    {
5552 4
        if ($substrings === []) {
5553
            return false;
5554
        }
5555
5556 4
        foreach ($substrings as &$substring) {
5557 4
            if (self::str_iends_with($str, $substring)) {
5558 4
                return true;
5559
            }
5560
        }
5561
5562
        return false;
5563
    }
5564
5565
    /**
5566
     * Returns the index of the first occurrence of $needle in the string,
5567
     * and false if not found. Accepts an optional offset from which to begin
5568
     * the search.
5569
     *
5570
     * @param string $str      <p>The input string.</p>
5571
     * @param string $needle   <p>Substring to look for.</p>
5572
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5573
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5574
     *
5575
     * @return false|int
5576
     *                   <p>The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.</p>
5577
     *
5578
     * @see UTF8::stripos()
5579
     * @deprecated <p>please use "UTF8::stripos()"</p>
5580
     */
5581
    public static function str_iindex_first(
5582
        string $str,
5583
        string $needle,
5584
        int $offset = 0,
5585
        string $encoding = 'UTF-8'
5586
    ) {
5587
        return self::stripos(
5588
            $str,
5589
            $needle,
5590
            $offset,
5591
            $encoding
5592
        );
5593
    }
5594
5595
    /**
5596
     * Returns the index of the last occurrence of $needle in the string,
5597
     * and false if not found. Accepts an optional offset from which to begin
5598
     * the search. Offsets may be negative to count from the last character
5599
     * in the string.
5600
     *
5601
     * @param string $str      <p>The input string.</p>
5602
     * @param string $needle   <p>Substring to look for.</p>
5603
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5604
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5605
     *
5606
     * @return false|int
5607
     *                   <p>The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.</p>
5608
     *
5609
     * @see UTF8::strripos()
5610
     * @deprecated <p>please use "UTF8::strripos()"</p>
5611
     */
5612
    public static function str_iindex_last(
5613
        string $str,
5614
        string $needle,
5615
        int $offset = 0,
5616
        string $encoding = 'UTF-8'
5617
    ) {
5618
        return self::strripos(
5619
            $str,
5620
            $needle,
5621
            $offset,
5622
            $encoding
5623
        );
5624
    }
5625
5626
    /**
5627
     * Returns the index of the first occurrence of $needle in the string,
5628
     * and false if not found. Accepts an optional offset from which to begin
5629
     * the search.
5630
     *
5631
     * @param string $str      <p>The input string.</p>
5632
     * @param string $needle   <p>Substring to look for.</p>
5633
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5634
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5635
     *
5636
     * @return false|int
5637
     *                   <p>The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.</p>
5638
     *
5639
     * @see UTF8::strpos()
5640
     * @deprecated <p>please use "UTF8::strpos()"</p>
5641
     */
5642
    public static function str_index_first(
5643
        string $str,
5644
        string $needle,
5645
        int $offset = 0,
5646
        string $encoding = 'UTF-8'
5647
    ) {
5648 10
        return self::strpos(
5649 10
            $str,
5650 10
            $needle,
5651 10
            $offset,
5652 10
            $encoding
5653
        );
5654
    }
5655
5656
    /**
5657
     * Returns the index of the last occurrence of $needle in the string,
5658
     * and false if not found. Accepts an optional offset from which to begin
5659
     * the search. Offsets may be negative to count from the last character
5660
     * in the string.
5661
     *
5662
     * @param string $str      <p>The input string.</p>
5663
     * @param string $needle   <p>Substring to look for.</p>
5664
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5665
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5666
     *
5667
     * @return false|int
5668
     *                   <p>The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.</p>
5669
     *
5670
     * @see UTF8::strrpos()
5671
     * @deprecated <p>please use "UTF8::strrpos()"</p>
5672
     */
5673
    public static function str_index_last(
5674
        string $str,
5675
        string $needle,
5676
        int $offset = 0,
5677
        string $encoding = 'UTF-8'
5678
    ) {
5679 10
        return self::strrpos(
5680 10
            $str,
5681 10
            $needle,
5682 10
            $offset,
5683 10
            $encoding
5684
        );
5685
    }
5686
5687
    /**
5688
     * Inserts $substring into the string at the $index provided.
5689
     *
5690
     * @param string $str       <p>The input string.</p>
5691
     * @param string $substring <p>String to be inserted.</p>
5692
     * @param int    $index     <p>The index at which to insert the substring.</p>
5693
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
5694
     *
5695
     * @return string
5696
     */
5697
    public static function str_insert(
5698
        string $str,
5699
        string $substring,
5700
        int $index,
5701
        string $encoding = 'UTF-8'
5702
    ): string {
5703 8
        if ($encoding === 'UTF-8') {
5704 4
            $len = (int) \mb_strlen($str);
5705 4
            if ($index > $len) {
5706
                return $str;
5707
            }
5708
5709
            /** @noinspection UnnecessaryCastingInspection */
5710 4
            return (string) \mb_substr($str, 0, $index) .
5711 4
                   $substring .
5712 4
                   (string) \mb_substr($str, $index, $len);
5713
        }
5714
5715 4
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
5716
5717 4
        $len = (int) self::strlen($str, $encoding);
5718 4
        if ($index > $len) {
5719 1
            return $str;
5720
        }
5721
5722 3
        return ((string) self::substr($str, 0, $index, $encoding)) .
5723 3
               $substring .
5724 3
               ((string) self::substr($str, $index, $len, $encoding));
5725
    }
5726
5727
    /**
5728
     * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
5729
     *
5730
     * @see http://php.net/manual/en/function.str-ireplace.php
5731
     *
5732
     * @param mixed $search  <p>
5733
     *                       Every replacement with search array is
5734
     *                       performed on the result of previous replacement.
5735
     *                       </p>
5736
     * @param mixed $replace <p>
5737
     *                       </p>
5738
     * @param mixed $subject <p>
5739
     *                       If subject is an array, then the search and
5740
     *                       replace is performed with every entry of
5741
     *                       subject, and the return value is an array as
5742
     *                       well.
5743
     *                       </p>
5744
     * @param int   $count   [optional] <p>
5745
     *                       The number of matched and replaced needles will
5746
     *                       be returned in count which is passed by
5747
     *                       reference.
5748
     *                       </p>
5749
     *
5750
     * @return mixed a string or an array of replacements
5751
     */
5752
    public static function str_ireplace($search, $replace, $subject, &$count = null)
5753
    {
5754 29
        $search = (array) $search;
5755
5756
        /** @noinspection AlterInForeachInspection */
5757 29
        foreach ($search as &$s) {
5758 29
            $s = (string) $s;
5759 29
            if ($s === '') {
5760 6
                $s = '/^(?<=.)$/';
5761
            } else {
5762 29
                $s = '/' . \preg_quote($s, '/') . '/ui';
5763
            }
5764
        }
5765
5766 29
        $subject = \preg_replace($search, $replace, $subject, -1, $replace);
5767 29
        $count = $replace; // used as reference parameter
5768
5769 29
        return $subject;
5770
    }
5771
5772
    /**
5773
     * Replaces $search from the beginning of string with $replacement.
5774
     *
5775
     * @param string $str         <p>The input string.</p>
5776
     * @param string $search      <p>The string to search for.</p>
5777
     * @param string $replacement <p>The replacement.</p>
5778
     *
5779
     * @return string string after the replacements
5780
     */
5781
    public static function str_ireplace_beginning(string $str, string $search, string $replacement): string
5782
    {
5783 17
        if ($str === '') {
5784 4
            if ($replacement === '') {
5785 2
                return '';
5786
            }
5787
5788 2
            if ($search === '') {
5789 2
                return $replacement;
5790
            }
5791
        }
5792
5793 13
        if ($search === '') {
5794 2
            return $str . $replacement;
5795
        }
5796
5797 11
        if (\stripos($str, $search) === 0) {
5798 10
            return $replacement . \substr($str, \strlen($search));
5799
        }
5800
5801 1
        return $str;
5802
    }
5803
5804
    /**
5805
     * Replaces $search from the ending of string with $replacement.
5806
     *
5807
     * @param string $str         <p>The input string.</p>
5808
     * @param string $search      <p>The string to search for.</p>
5809
     * @param string $replacement <p>The replacement.</p>
5810
     *
5811
     * @return string string after the replacements
5812
     */
5813
    public static function str_ireplace_ending(string $str, string $search, string $replacement): string
5814
    {
5815 17
        if ($str === '') {
5816 4
            if ($replacement === '') {
5817 2
                return '';
5818
            }
5819
5820 2
            if ($search === '') {
5821 2
                return $replacement;
5822
            }
5823
        }
5824
5825 13
        if ($search === '') {
5826 2
            return $str . $replacement;
5827
        }
5828
5829 11
        if (\stripos($str, $search, \strlen($str) - \strlen($search)) !== false) {
5830 9
            $str = \substr($str, 0, -\strlen($search)) . $replacement;
5831
        }
5832
5833 11
        return $str;
5834
    }
5835
5836
    /**
5837
     * Check if the string starts with the given substring, case-insensitive.
5838
     *
5839
     * @param string $haystack <p>The string to search in.</p>
5840
     * @param string $needle   <p>The substring to search for.</p>
5841
     *
5842
     * @return bool
5843
     */
5844
    public static function str_istarts_with(string $haystack, string $needle): bool
5845
    {
5846 12
        if ($needle === '') {
5847 2
            return true;
5848
        }
5849
5850 12
        if ($haystack === '') {
5851
            return false;
5852
        }
5853
5854 12
        return self::stripos($haystack, $needle) === 0;
5855
    }
5856
5857
    /**
5858
     * Returns true if the string begins with any of $substrings, false otherwise.
5859
     *
5860
     * - case-insensitive
5861
     *
5862
     * @param string $str        <p>The input string.</p>
5863
     * @param array  $substrings <p>Substrings to look for.</p>
5864
     *
5865
     * @return bool whether or not $str starts with $substring
5866
     */
5867
    public static function str_istarts_with_any(string $str, array $substrings): bool
5868
    {
5869 4
        if ($str === '') {
5870
            return false;
5871
        }
5872
5873 4
        if ($substrings === []) {
5874
            return false;
5875
        }
5876
5877 4
        foreach ($substrings as &$substring) {
5878 4
            if (self::str_istarts_with($str, $substring)) {
5879 4
                return true;
5880
            }
5881
        }
5882
5883
        return false;
5884
    }
5885
5886
    /**
5887
     * Gets the substring after the first occurrence of a separator.
5888
     *
5889
     * @param string $str       <p>The input string.</p>
5890
     * @param string $separator <p>The string separator.</p>
5891
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
5892
     *
5893
     * @return string
5894
     */
5895
    public static function str_isubstr_after_first_separator(
5896
        string $str,
5897
        string $separator,
5898
        string $encoding = 'UTF-8'
5899
    ): string {
5900 1
        if ($separator === '' || $str === '') {
5901 1
            return '';
5902
        }
5903
5904 1
        $offset = self::stripos($str, $separator);
5905 1
        if ($offset === false) {
5906 1
            return '';
5907
        }
5908
5909 1
        if ($encoding === 'UTF-8') {
5910 1
            return (string) \mb_substr(
5911 1
                $str,
5912 1
                $offset + (int) \mb_strlen($separator)
5913
            );
5914
        }
5915
5916
        return (string) self::substr(
5917
            $str,
5918
            $offset + (int) self::strlen($separator, $encoding),
5919
            null,
5920
            $encoding
5921
        );
5922
    }
5923
5924
    /**
5925
     * Gets the substring after the last occurrence of a separator.
5926
     *
5927
     * @param string $str       <p>The input string.</p>
5928
     * @param string $separator <p>The string separator.</p>
5929
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
5930
     *
5931
     * @return string
5932
     */
5933
    public static function str_isubstr_after_last_separator(
5934
        string $str,
5935
        string $separator,
5936
        string $encoding = 'UTF-8'
5937
    ): string {
5938 1
        if ($separator === '' || $str === '') {
5939 1
            return '';
5940
        }
5941
5942 1
        $offset = self::strripos($str, $separator);
5943 1
        if ($offset === false) {
5944 1
            return '';
5945
        }
5946
5947 1
        if ($encoding === 'UTF-8') {
5948 1
            return (string) \mb_substr(
5949 1
                $str,
5950 1
                $offset + (int) self::strlen($separator)
5951
            );
5952
        }
5953
5954
        return (string) self::substr(
5955
            $str,
5956
            $offset + (int) self::strlen($separator, $encoding),
5957
            null,
5958
            $encoding
5959
        );
5960
    }
5961
5962
    /**
5963
     * Gets the substring before the first occurrence of a separator.
5964
     *
5965
     * @param string $str       <p>The input string.</p>
5966
     * @param string $separator <p>The string separator.</p>
5967
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
5968
     *
5969
     * @return string
5970
     */
5971
    public static function str_isubstr_before_first_separator(
5972
        string $str,
5973
        string $separator,
5974
        string $encoding = 'UTF-8'
5975
    ): string {
5976 1
        if ($separator === '' || $str === '') {
5977 1
            return '';
5978
        }
5979
5980 1
        $offset = self::stripos($str, $separator);
5981 1
        if ($offset === false) {
5982 1
            return '';
5983
        }
5984
5985 1
        if ($encoding === 'UTF-8') {
5986 1
            return (string) \mb_substr($str, 0, $offset);
5987
        }
5988
5989
        return (string) self::substr($str, 0, $offset, $encoding);
5990
    }
5991
5992
    /**
5993
     * Gets the substring before the last occurrence of a separator.
5994
     *
5995
     * @param string $str       <p>The input string.</p>
5996
     * @param string $separator <p>The string separator.</p>
5997
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
5998
     *
5999
     * @return string
6000
     */
6001
    public static function str_isubstr_before_last_separator(
6002
        string $str,
6003
        string $separator,
6004
        string $encoding = 'UTF-8'
6005
    ): string {
6006 1
        if ($separator === '' || $str === '') {
6007 1
            return '';
6008
        }
6009
6010 1
        if ($encoding === 'UTF-8') {
6011 1
            $offset = \mb_strripos($str, $separator);
6012 1
            if ($offset === false) {
6013 1
                return '';
6014
            }
6015
6016 1
            return (string) \mb_substr($str, 0, $offset);
6017
        }
6018
6019
        $offset = self::strripos($str, $separator, 0, $encoding);
6020
        if ($offset === false) {
6021
            return '';
6022
        }
6023
6024
        return (string) self::substr($str, 0, $offset, $encoding);
6025
    }
6026
6027
    /**
6028
     * Gets the substring after (or before via "$before_needle") the first occurrence of the "$needle".
6029
     *
6030
     * @param string $str           <p>The input string.</p>
6031
     * @param string $needle        <p>The string to look for.</p>
6032
     * @param bool   $before_needle [optional] <p>Default: false</p>
6033
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
6034
     *
6035
     * @return string
6036
     */
6037
    public static function str_isubstr_first(
6038
        string $str,
6039
        string $needle,
6040
        bool $before_needle = false,
6041
        string $encoding = 'UTF-8'
6042
    ): string {
6043
        if (
6044 2
            $needle === ''
6045
            ||
6046 2
            $str === ''
6047
        ) {
6048 2
            return '';
6049
        }
6050
6051 2
        $part = self::stristr(
6052 2
            $str,
6053 2
            $needle,
6054 2
            $before_needle,
6055 2
            $encoding
6056
        );
6057 2
        if ($part === false) {
6058 2
            return '';
6059
        }
6060
6061 2
        return $part;
6062
    }
6063
6064
    /**
6065
     * Gets the substring after (or before via "$before_needle") the last occurrence of the "$needle".
6066
     *
6067
     * @param string $str           <p>The input string.</p>
6068
     * @param string $needle        <p>The string to look for.</p>
6069
     * @param bool   $before_needle [optional] <p>Default: false</p>
6070
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
6071
     *
6072
     * @return string
6073
     */
6074
    public static function str_isubstr_last(
6075
        string $str,
6076
        string $needle,
6077
        bool $before_needle = false,
6078
        string $encoding = 'UTF-8'
6079
    ): string {
6080
        if (
6081 1
            $needle === ''
6082
            ||
6083 1
            $str === ''
6084
        ) {
6085 1
            return '';
6086
        }
6087
6088 1
        $part = self::strrichr(
6089 1
            $str,
6090 1
            $needle,
6091 1
            $before_needle,
6092 1
            $encoding
6093
        );
6094 1
        if ($part === false) {
6095 1
            return '';
6096
        }
6097
6098 1
        return $part;
6099
    }
6100
6101
    /**
6102
     * Returns the last $n characters of the string.
6103
     *
6104
     * @param string $str      <p>The input string.</p>
6105
     * @param int    $n        <p>Number of characters to retrieve from the end.</p>
6106
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6107
     *
6108
     * @return string
6109
     */
6110
    public static function str_last_char(
6111
        string $str,
6112
        int $n = 1,
6113
        string $encoding = 'UTF-8'
6114
    ): string {
6115 12
        if ($str === '' || $n <= 0) {
6116 4
            return '';
6117
        }
6118
6119 8
        if ($encoding === 'UTF-8') {
6120 4
            return (string) \mb_substr($str, -$n);
6121
        }
6122
6123 4
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6124
6125 4
        return (string) self::substr($str, -$n, null, $encoding);
6126
    }
6127
6128
    /**
6129
     * Limit the number of characters in a string.
6130
     *
6131
     * @param string $str        <p>The input string.</p>
6132
     * @param int    $length     [optional] <p>Default: 100</p>
6133
     * @param string $str_add_on [optional] <p>Default: …</p>
6134
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
6135
     *
6136
     * @return string
6137
     */
6138
    public static function str_limit(
6139
        string $str,
6140
        int $length = 100,
6141
        string $str_add_on = '…',
6142
        string $encoding = 'UTF-8'
6143
    ): string {
6144 2
        if ($str === '' || $length <= 0) {
6145 2
            return '';
6146
        }
6147
6148 2
        if ($encoding === 'UTF-8') {
6149 2
            if ((int) \mb_strlen($str) <= $length) {
6150 2
                return $str;
6151
            }
6152
6153
            /** @noinspection UnnecessaryCastingInspection */
6154 2
            return (string) \mb_substr($str, 0, $length - (int) self::strlen($str_add_on)) . $str_add_on;
6155
        }
6156
6157
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6158
6159
        if ((int) self::strlen($str, $encoding) <= $length) {
6160
            return $str;
6161
        }
6162
6163
        return ((string) self::substr($str, 0, $length - (int) self::strlen($str_add_on), $encoding)) . $str_add_on;
6164
    }
6165
6166
    /**
6167
     * Limit the number of characters in a string, but also after the next word.
6168
     *
6169
     * @param string $str        <p>The input string.</p>
6170
     * @param int    $length     [optional] <p>Default: 100</p>
6171
     * @param string $str_add_on [optional] <p>Default: …</p>
6172
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
6173
     *
6174
     * @return string
6175
     */
6176
    public static function str_limit_after_word(
6177
        string $str,
6178
        int $length = 100,
6179
        string $str_add_on = '…',
6180
        string $encoding = 'UTF-8'
6181
    ): string {
6182 6
        if ($str === '' || $length <= 0) {
6183 2
            return '';
6184
        }
6185
6186 6
        if ($encoding === 'UTF-8') {
6187
            /** @noinspection UnnecessaryCastingInspection */
6188 2
            if ((int) \mb_strlen($str) <= $length) {
6189 2
                return $str;
6190
            }
6191
6192 2
            if (\mb_substr($str, $length - 1, 1) === ' ') {
6193 2
                return ((string) \mb_substr($str, 0, $length - 1)) . $str_add_on;
6194
            }
6195
6196 2
            $str = \mb_substr($str, 0, $length);
6197
6198 2
            $array = \explode(' ', $str);
6199 2
            \array_pop($array);
6200 2
            $new_str = \implode(' ', $array);
6201
6202 2
            if ($new_str === '') {
6203 2
                return ((string) \mb_substr($str, 0, $length - 1)) . $str_add_on;
6204
            }
6205
        } else {
6206 4
            if ((int) self::strlen($str, $encoding) <= $length) {
6207
                return $str;
6208
            }
6209
6210 4
            if (self::substr($str, $length - 1, 1, $encoding) === ' ') {
6211 3
                return ((string) self::substr($str, 0, $length - 1, $encoding)) . $str_add_on;
6212
            }
6213
6214
            /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
6215 1
            $str = self::substr($str, 0, $length, $encoding);
6216
            /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
6217 1
            if ($str === false) {
6218
                return '' . $str_add_on;
6219
            }
6220
6221 1
            $array = \explode(' ', $str);
6222 1
            \array_pop($array);
6223 1
            $new_str = \implode(' ', $array);
6224
6225 1
            if ($new_str === '') {
6226
                return ((string) self::substr($str, 0, $length - 1, $encoding)) . $str_add_on;
6227
            }
6228
        }
6229
6230 3
        return $new_str . $str_add_on;
6231
    }
6232
6233
    /**
6234
     * Returns the longest common prefix between the $str1 and $str2.
6235
     *
6236
     * @param string $str1     <p>The input sting.</p>
6237
     * @param string $str2     <p>Second string for comparison.</p>
6238
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6239
     *
6240
     * @return string
6241
     */
6242
    public static function str_longest_common_prefix(
6243
        string $str1,
6244
        string $str2,
6245
        string $encoding = 'UTF-8'
6246
    ): string {
6247
        // init
6248 10
        $longest_common_prefix = '';
6249
6250 10
        if ($encoding === 'UTF-8') {
6251 5
            $max_length = (int) \min(
6252 5
                \mb_strlen($str1),
6253 5
                \mb_strlen($str2)
6254
            );
6255
6256 5
            for ($i = 0; $i < $max_length; ++$i) {
6257 4
                $char = \mb_substr($str1, $i, 1);
6258
6259
                if (
6260 4
                    $char !== false
6261
                    &&
6262 4
                    $char === \mb_substr($str2, $i, 1)
6263
                ) {
6264 3
                    $longest_common_prefix .= $char;
6265
                } else {
6266 3
                    break;
6267
                }
6268
            }
6269
        } else {
6270 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
6271
6272 5
            $max_length = (int) \min(
6273 5
                self::strlen($str1, $encoding),
6274 5
                self::strlen($str2, $encoding)
6275
            );
6276
6277 5
            for ($i = 0; $i < $max_length; ++$i) {
6278 4
                $char = self::substr($str1, $i, 1, $encoding);
6279
6280
                if (
6281 4
                    $char !== false
6282
                    &&
6283 4
                    $char === self::substr($str2, $i, 1, $encoding)
6284
                ) {
6285 3
                    $longest_common_prefix .= $char;
6286
                } else {
6287 3
                    break;
6288
                }
6289
            }
6290
        }
6291
6292 10
        return $longest_common_prefix;
6293
    }
6294
6295
    /**
6296
     * Returns the longest common substring between the $str1 and $str2.
6297
     * In the case of ties, it returns that which occurs first.
6298
     *
6299
     * @param string $str1
6300
     * @param string $str2     <p>Second string for comparison.</p>
6301
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6302
     *
6303
     * @return string string with its $str being the longest common substring
6304
     */
6305
    public static function str_longest_common_substring(
6306
        string $str1,
6307
        string $str2,
6308
        string $encoding = 'UTF-8'
6309
    ): string {
6310 11
        if ($str1 === '' || $str2 === '') {
6311 2
            return '';
6312
        }
6313
6314
        // Uses dynamic programming to solve
6315
        // http://en.wikipedia.org/wiki/Longest_common_substring_problem
6316
6317 9
        if ($encoding === 'UTF-8') {
6318 4
            $str_length = (int) \mb_strlen($str1);
6319 4
            $other_length = (int) \mb_strlen($str2);
6320
        } else {
6321 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
6322
6323 5
            $str_length = (int) self::strlen($str1, $encoding);
6324 5
            $other_length = (int) self::strlen($str2, $encoding);
6325
        }
6326
6327
        // Return if either string is empty
6328 9
        if ($str_length === 0 || $other_length === 0) {
6329
            return '';
6330
        }
6331
6332 9
        $len = 0;
6333 9
        $end = 0;
6334 9
        $table = \array_fill(
6335 9
            0,
6336 9
            $str_length + 1,
6337 9
            \array_fill(0, $other_length + 1, 0)
6338
        );
6339
6340 9
        if ($encoding === 'UTF-8') {
6341 9
            for ($i = 1; $i <= $str_length; ++$i) {
6342 9
                for ($j = 1; $j <= $other_length; ++$j) {
6343 9
                    $str_char = \mb_substr($str1, $i - 1, 1);
6344 9
                    $other_char = \mb_substr($str2, $j - 1, 1);
6345
6346 9
                    if ($str_char === $other_char) {
6347 8
                        $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
6348 8
                        if ($table[$i][$j] > $len) {
6349 8
                            $len = $table[$i][$j];
6350 8
                            $end = $i;
6351
                        }
6352
                    } else {
6353 9
                        $table[$i][$j] = 0;
6354
                    }
6355
                }
6356
            }
6357
        } else {
6358
            for ($i = 1; $i <= $str_length; ++$i) {
6359
                for ($j = 1; $j <= $other_length; ++$j) {
6360
                    $str_char = self::substr($str1, $i - 1, 1, $encoding);
6361
                    $other_char = self::substr($str2, $j - 1, 1, $encoding);
6362
6363
                    if ($str_char === $other_char) {
6364
                        $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
6365
                        if ($table[$i][$j] > $len) {
6366
                            $len = $table[$i][$j];
6367
                            $end = $i;
6368
                        }
6369
                    } else {
6370
                        $table[$i][$j] = 0;
6371
                    }
6372
                }
6373
            }
6374
        }
6375
6376 9
        if ($encoding === 'UTF-8') {
6377 9
            return (string) \mb_substr($str1, $end - $len, $len);
6378
        }
6379
6380
        return (string) self::substr($str1, $end - $len, $len, $encoding);
6381
    }
6382
6383
    /**
6384
     * Returns the longest common suffix between the $str1 and $str2.
6385
     *
6386
     * @param string $str1
6387
     * @param string $str2     <p>Second string for comparison.</p>
6388
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6389
     *
6390
     * @return string
6391
     */
6392
    public static function str_longest_common_suffix(
6393
        string $str1,
6394
        string $str2,
6395
        string $encoding = 'UTF-8'
6396
    ): string {
6397 10
        if ($str1 === '' || $str2 === '') {
6398 2
            return '';
6399
        }
6400
6401 8
        if ($encoding === 'UTF-8') {
6402 4
            $max_length = (int) \min(
6403 4
                \mb_strlen($str1, $encoding),
6404 4
                \mb_strlen($str2, $encoding)
6405
            );
6406
6407 4
            $longest_common_suffix = '';
6408 4
            for ($i = 1; $i <= $max_length; ++$i) {
6409 4
                $char = \mb_substr($str1, -$i, 1);
6410
6411
                if (
6412 4
                    $char !== false
6413
                    &&
6414 4
                    $char === \mb_substr($str2, -$i, 1)
6415
                ) {
6416 3
                    $longest_common_suffix = $char . $longest_common_suffix;
6417
                } else {
6418 3
                    break;
6419
                }
6420
            }
6421
        } else {
6422 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
6423
6424 4
            $max_length = (int) \min(
6425 4
                self::strlen($str1, $encoding),
6426 4
                self::strlen($str2, $encoding)
6427
            );
6428
6429 4
            $longest_common_suffix = '';
6430 4
            for ($i = 1; $i <= $max_length; ++$i) {
6431 4
                $char = self::substr($str1, -$i, 1, $encoding);
6432
6433
                if (
6434 4
                    $char !== false
6435
                    &&
6436 4
                    $char === self::substr($str2, -$i, 1, $encoding)
6437
                ) {
6438 3
                    $longest_common_suffix = $char . $longest_common_suffix;
6439
                } else {
6440 3
                    break;
6441
                }
6442
            }
6443
        }
6444
6445 8
        return $longest_common_suffix;
6446
    }
6447
6448
    /**
6449
     * Returns true if $str matches the supplied pattern, false otherwise.
6450
     *
6451
     * @param string $str     <p>The input string.</p>
6452
     * @param string $pattern <p>Regex pattern to match against.</p>
6453
     *
6454
     * @return bool whether or not $str matches the pattern
6455
     */
6456
    public static function str_matches_pattern(string $str, string $pattern): bool
6457
    {
6458
        return (bool) \preg_match('/' . $pattern . '/u', $str);
6459
    }
6460
6461
    /**
6462
     * Returns whether or not a character exists at an index. Offsets may be
6463
     * negative to count from the last character in the string. Implements
6464
     * part of the ArrayAccess interface.
6465
     *
6466
     * @param string $str      <p>The input string.</p>
6467
     * @param int    $offset   <p>The index to check.</p>
6468
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6469
     *
6470
     * @return bool whether or not the index exists
6471
     */
6472
    public static function str_offset_exists(string $str, int $offset, string $encoding = 'UTF-8'): bool
6473
    {
6474
        // init
6475 6
        $length = (int) self::strlen($str, $encoding);
6476
6477 6
        if ($offset >= 0) {
6478 3
            return $length > $offset;
6479
        }
6480
6481 3
        return $length >= \abs($offset);
6482
    }
6483
6484
    /**
6485
     * Returns the character at the given index. Offsets may be negative to
6486
     * count from the last character in the string. Implements part of the
6487
     * ArrayAccess interface, and throws an OutOfBoundsException if the index
6488
     * does not exist.
6489
     *
6490
     * @param string $str      <p>The input string.</p>
6491
     * @param int    $index    <p>The <strong>index</strong> from which to retrieve the char.</p>
6492
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6493
     *
6494
     * @throws \OutOfBoundsException if the positive or negative offset does not exist
6495
     *
6496
     * @return string the character at the specified index
6497
     */
6498
    public static function str_offset_get(string $str, int $index, string $encoding = 'UTF-8'): string
6499
    {
6500
        // init
6501 2
        $length = (int) self::strlen($str);
6502
6503
        if (
6504 2
            ($index >= 0 && $length <= $index)
6505
            ||
6506 2
            $length < \abs($index)
6507
        ) {
6508 1
            throw new \OutOfBoundsException('No character exists at the index');
6509
        }
6510
6511 1
        return self::char_at($str, $index, $encoding);
6512
    }
6513
6514
    /**
6515
     * Pad a UTF-8 string to a given length with another string.
6516
     *
6517
     * @param string     $str        <p>The input string.</p>
6518
     * @param int        $pad_length <p>The length of return string.</p>
6519
     * @param string     $pad_string [optional] <p>String to use for padding the input string.</p>
6520
     * @param int|string $pad_type   [optional] <p>
6521
     *                               Can be <strong>STR_PAD_RIGHT</strong> (default), [or string "right"]<br>
6522
     *                               <strong>STR_PAD_LEFT</strong> [or string "left"] or<br>
6523
     *                               <strong>STR_PAD_BOTH</strong> [or string "both"]
6524
     *                               </p>
6525
     * @param string     $encoding   [optional] <p>Default: 'UTF-8'</p>
6526
     *
6527
     * @return string returns the padded string
6528
     */
6529
    public static function str_pad(
6530
        string $str,
6531
        int $pad_length,
6532
        string $pad_string = ' ',
6533
        $pad_type = \STR_PAD_RIGHT,
6534
        string $encoding = 'UTF-8'
6535
    ): string {
6536 41
        if ($pad_length === 0 || $pad_string === '') {
6537 1
            return $str;
6538
        }
6539
6540 41
        if ($pad_type !== (int) $pad_type) {
6541 13
            if ($pad_type === 'left') {
6542 3
                $pad_type = \STR_PAD_LEFT;
6543 10
            } elseif ($pad_type === 'right') {
6544 6
                $pad_type = \STR_PAD_RIGHT;
6545 4
            } elseif ($pad_type === 'both') {
6546 3
                $pad_type = \STR_PAD_BOTH;
6547
            } else {
6548 1
                throw new \InvalidArgumentException(
6549 1
                    'Pad expects $pad_type to be "STR_PAD_*" or ' . "to be one of 'left', 'right' or 'both'"
6550
                );
6551
            }
6552
        }
6553
6554 40
        if ($encoding === 'UTF-8') {
6555 25
            $str_length = (int) \mb_strlen($str);
6556
6557 25
            if ($pad_length >= $str_length) {
6558
                switch ($pad_type) {
6559 25
                    case \STR_PAD_LEFT:
6560 8
                        $ps_length = (int) \mb_strlen($pad_string);
6561
6562 8
                        $diff = ($pad_length - $str_length);
6563
6564 8
                        $pre = (string) \mb_substr(
6565 8
                            \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
6566 8
                            0,
6567 8
                            $diff
6568
                        );
6569 8
                        $post = '';
6570
6571 8
                        break;
6572
6573 20
                    case \STR_PAD_BOTH:
6574 14
                        $diff = ($pad_length - $str_length);
6575
6576 14
                        $ps_length_left = (int) \floor($diff / 2);
6577
6578 14
                        $ps_length_right = (int) \ceil($diff / 2);
6579
6580 14
                        $pre = (string) \mb_substr(
6581 14
                            \str_repeat($pad_string, $ps_length_left),
6582 14
                            0,
6583 14
                            $ps_length_left
6584
                        );
6585 14
                        $post = (string) \mb_substr(
6586 14
                            \str_repeat($pad_string, $ps_length_right),
6587 14
                            0,
6588 14
                            $ps_length_right
6589
                        );
6590
6591 14
                        break;
6592
6593 9
                    case \STR_PAD_RIGHT:
6594
                    default:
6595 9
                        $ps_length = (int) \mb_strlen($pad_string);
6596
6597 9
                        $diff = ($pad_length - $str_length);
6598
6599 9
                        $post = (string) \mb_substr(
6600 9
                            \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
6601 9
                            0,
6602 9
                            $diff
6603
                        );
6604 9
                        $pre = '';
6605
                }
6606
6607 25
                return $pre . $str . $post;
6608
            }
6609
6610 3
            return $str;
6611
        }
6612
6613 15
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6614
6615 15
        $str_length = (int) self::strlen($str, $encoding);
6616
6617 15
        if ($pad_length >= $str_length) {
6618
            switch ($pad_type) {
6619 14
                case \STR_PAD_LEFT:
6620 5
                    $ps_length = (int) self::strlen($pad_string, $encoding);
6621
6622 5
                    $diff = ($pad_length - $str_length);
6623
6624 5
                    $pre = (string) self::substr(
6625 5
                        \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
6626 5
                        0,
6627 5
                        $diff,
6628 5
                        $encoding
6629
                    );
6630 5
                    $post = '';
6631
6632 5
                    break;
6633
6634 9
                case \STR_PAD_BOTH:
6635 3
                    $diff = ($pad_length - $str_length);
6636
6637 3
                    $ps_length_left = (int) \floor($diff / 2);
6638
6639 3
                    $ps_length_right = (int) \ceil($diff / 2);
6640
6641 3
                    $pre = (string) self::substr(
6642 3
                        \str_repeat($pad_string, $ps_length_left),
6643 3
                        0,
6644 3
                        $ps_length_left,
6645 3
                        $encoding
6646
                    );
6647 3
                    $post = (string) self::substr(
6648 3
                        \str_repeat($pad_string, $ps_length_right),
6649 3
                        0,
6650 3
                        $ps_length_right,
6651 3
                        $encoding
6652
                    );
6653
6654 3
                    break;
6655
6656 6
                case \STR_PAD_RIGHT:
6657
                default:
6658 6
                    $ps_length = (int) self::strlen($pad_string, $encoding);
6659
6660 6
                    $diff = ($pad_length - $str_length);
6661
6662 6
                    $post = (string) self::substr(
6663 6
                        \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
6664 6
                        0,
6665 6
                        $diff,
6666 6
                        $encoding
6667
                    );
6668 6
                    $pre = '';
6669
            }
6670
6671 14
            return $pre . $str . $post;
6672
        }
6673
6674 1
        return $str;
6675
    }
6676
6677
    /**
6678
     * Returns a new string of a given length such that both sides of the
6679
     * string are padded. Alias for "UTF8::str_pad()" with a $pad_type of 'both'.
6680
     *
6681
     * @param string $str
6682
     * @param int    $length   <p>Desired string length after padding.</p>
6683
     * @param string $pad_str  [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6684
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6685
     *
6686
     * @return string
6687
     *                <p>The string with padding applied.</p>
6688
     */
6689
    public static function str_pad_both(
6690
        string $str,
6691
        int $length,
6692
        string $pad_str = ' ',
6693
        string $encoding = 'UTF-8'
6694
    ): string {
6695 11
        return self::str_pad(
6696 11
            $str,
6697 11
            $length,
6698 11
            $pad_str,
6699 11
            \STR_PAD_BOTH,
6700 11
            $encoding
6701
        );
6702
    }
6703
6704
    /**
6705
     * Returns a new string of a given length such that the beginning of the
6706
     * string is padded. Alias for "UTF8::str_pad()" with a $pad_type of 'left'.
6707
     *
6708
     * @param string $str
6709
     * @param int    $length   <p>Desired string length after padding.</p>
6710
     * @param string $pad_str  [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6711
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6712
     *
6713
     * @return string
6714
     *                <p>The string with left padding.</p>
6715
     */
6716
    public static function str_pad_left(
6717
        string $str,
6718
        int $length,
6719
        string $pad_str = ' ',
6720
        string $encoding = 'UTF-8'
6721
    ): string {
6722 7
        return self::str_pad(
6723 7
            $str,
6724 7
            $length,
6725 7
            $pad_str,
6726 7
            \STR_PAD_LEFT,
6727 7
            $encoding
6728
        );
6729
    }
6730
6731
    /**
6732
     * Returns a new string of a given length such that the end of the string
6733
     * is padded. Alias for "UTF8::str_pad()" with a $pad_type of 'right'.
6734
     *
6735
     * @param string $str
6736
     * @param int    $length   <p>Desired string length after padding.</p>
6737
     * @param string $pad_str  [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6738
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6739
     *
6740
     * @return string
6741
     *                <p>The string with right padding.</p>
6742
     */
6743
    public static function str_pad_right(
6744
        string $str,
6745
        int $length,
6746
        string $pad_str = ' ',
6747
        string $encoding = 'UTF-8'
6748
    ): string {
6749 7
        return self::str_pad(
6750 7
            $str,
6751 7
            $length,
6752 7
            $pad_str,
6753 7
            \STR_PAD_RIGHT,
6754 7
            $encoding
6755
        );
6756
    }
6757
6758
    /**
6759
     * Repeat a string.
6760
     *
6761
     * @param string $str        <p>
6762
     *                           The string to be repeated.
6763
     *                           </p>
6764
     * @param int    $multiplier <p>
6765
     *                           Number of time the input string should be
6766
     *                           repeated.
6767
     *                           </p>
6768
     *                           <p>
6769
     *                           multiplier has to be greater than or equal to 0.
6770
     *                           If the multiplier is set to 0, the function
6771
     *                           will return an empty string.
6772
     *                           </p>
6773
     *
6774
     * @return string
6775
     *                <p>The repeated string.</P>
6776
     */
6777
    public static function str_repeat(string $str, int $multiplier): string
6778
    {
6779 9
        $str = self::filter($str);
6780
6781 9
        return \str_repeat($str, $multiplier);
6782
    }
6783
6784
    /**
6785
     * INFO: This is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe.
6786
     *
6787
     * Replace all occurrences of the search string with the replacement string
6788
     *
6789
     * @see http://php.net/manual/en/function.str-replace.php
6790
     *
6791
     * @param mixed $search  <p>
6792
     *                       The value being searched for, otherwise known as the needle.
6793
     *                       An array may be used to designate multiple needles.
6794
     *                       </p>
6795
     * @param mixed $replace <p>
6796
     *                       The replacement value that replaces found search
6797
     *                       values. An array may be used to designate multiple replacements.
6798
     *                       </p>
6799
     * @param mixed $subject <p>
6800
     *                       The string or array being searched and replaced on,
6801
     *                       otherwise known as the haystack.
6802
     *                       </p>
6803
     *                       <p>
6804
     *                       If subject is an array, then the search and
6805
     *                       replace is performed with every entry of
6806
     *                       subject, and the return value is an array as
6807
     *                       well.
6808
     *                       </p>
6809
     * @param int   $count   [optional] If passed, this will hold the number of matched and replaced needles
6810
     *
6811
     * @return mixed this function returns a string or an array with the replaced values
6812
     */
6813
    public static function str_replace(
6814
        $search,
6815
        $replace,
6816
        $subject,
6817
        int &$count = null
6818
    ) {
6819
        /**
6820
         * @psalm-suppress PossiblyNullArgument
6821
         */
6822 12
        return \str_replace(
6823 12
            $search,
6824 12
            $replace,
6825 12
            $subject,
6826 12
            $count
6827
        );
6828
    }
6829
6830
    /**
6831
     * Replaces $search from the beginning of string with $replacement.
6832
     *
6833
     * @param string $str         <p>The input string.</p>
6834
     * @param string $search      <p>The string to search for.</p>
6835
     * @param string $replacement <p>The replacement.</p>
6836
     *
6837
     * @return string string after the replacements
6838
     */
6839
    public static function str_replace_beginning(
6840
        string $str,
6841
        string $search,
6842
        string $replacement
6843
    ): string {
6844 17
        if ($str === '') {
6845 4
            if ($replacement === '') {
6846 2
                return '';
6847
            }
6848
6849 2
            if ($search === '') {
6850 2
                return $replacement;
6851
            }
6852
        }
6853
6854 13
        if ($search === '') {
6855 2
            return $str . $replacement;
6856
        }
6857
6858 11
        if (\strpos($str, $search) === 0) {
6859 9
            return $replacement . \substr($str, \strlen($search));
6860
        }
6861
6862 2
        return $str;
6863
    }
6864
6865
    /**
6866
     * Replaces $search from the ending of string with $replacement.
6867
     *
6868
     * @param string $str         <p>The input string.</p>
6869
     * @param string $search      <p>The string to search for.</p>
6870
     * @param string $replacement <p>The replacement.</p>
6871
     *
6872
     * @return string string after the replacements
6873
     */
6874
    public static function str_replace_ending(
6875
        string $str,
6876
        string $search,
6877
        string $replacement
6878
    ): string {
6879 17
        if ($str === '') {
6880 4
            if ($replacement === '') {
6881 2
                return '';
6882
            }
6883
6884 2
            if ($search === '') {
6885 2
                return $replacement;
6886
            }
6887
        }
6888
6889 13
        if ($search === '') {
6890 2
            return $str . $replacement;
6891
        }
6892
6893 11
        if (\strpos($str, $search, \strlen($str) - \strlen($search)) !== false) {
6894 8
            $str = \substr($str, 0, -\strlen($search)) . $replacement;
6895
        }
6896
6897 11
        return $str;
6898
    }
6899
6900
    /**
6901
     * Replace the first "$search"-term with the "$replace"-term.
6902
     *
6903
     * @param string $search
6904
     * @param string $replace
6905
     * @param string $subject
6906
     *
6907
     * @return string
6908
     *
6909
     * @psalm-suppress InvalidReturnType
6910
     */
6911
    public static function str_replace_first(
6912
        string $search,
6913
        string $replace,
6914
        string $subject
6915
    ): string {
6916 2
        $pos = self::strpos($subject, $search);
6917
6918 2
        if ($pos !== false) {
6919
            /**
6920
             * @psalm-suppress InvalidReturnStatement
6921
             */
6922 2
            return self::substr_replace(
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr_repl...)self::strlen($search)) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
6923 2
                $subject,
6924 2
                $replace,
6925 2
                $pos,
6926 2
                (int) self::strlen($search)
6927
            );
6928
        }
6929
6930 2
        return $subject;
6931
    }
6932
6933
    /**
6934
     * Replace the last "$search"-term with the "$replace"-term.
6935
     *
6936
     * @param string $search
6937
     * @param string $replace
6938
     * @param string $subject
6939
     *
6940
     * @return string
6941
     *
6942
     * @psalm-suppress InvalidReturnType
6943
     */
6944
    public static function str_replace_last(
6945
        string $search,
6946
        string $replace,
6947
        string $subject
6948
    ): string {
6949 2
        $pos = self::strrpos($subject, $search);
6950 2
        if ($pos !== false) {
6951
            /**
6952
             * @psalm-suppress InvalidReturnStatement
6953
             */
6954 2
            return self::substr_replace(
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr_repl...)self::strlen($search)) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
6955 2
                $subject,
6956 2
                $replace,
6957 2
                $pos,
6958 2
                (int) self::strlen($search)
6959
            );
6960
        }
6961
6962 2
        return $subject;
6963
    }
6964
6965
    /**
6966
     * Shuffles all the characters in the string.
6967
     *
6968
     * PS: uses random algorithm which is weak for cryptography purposes
6969
     *
6970
     * @param string $str      <p>The input string</p>
6971
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6972
     *
6973
     * @return string the shuffled string
6974
     */
6975
    public static function str_shuffle(string $str, string $encoding = 'UTF-8'): string
6976
    {
6977 5
        if ($encoding === 'UTF-8') {
6978 5
            $indexes = \range(0, (int) \mb_strlen($str) - 1);
6979
            /** @noinspection NonSecureShuffleUsageInspection */
6980 5
            \shuffle($indexes);
6981
6982
            // init
6983 5
            $shuffled_str = '';
6984
6985 5
            foreach ($indexes as &$i) {
6986 5
                $tmp_sub_str = \mb_substr($str, $i, 1);
6987 5
                if ($tmp_sub_str !== false) {
6988 5
                    $shuffled_str .= $tmp_sub_str;
6989
                }
6990
            }
6991
        } else {
6992
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
6993
6994
            $indexes = \range(0, (int) self::strlen($str, $encoding) - 1);
6995
            /** @noinspection NonSecureShuffleUsageInspection */
6996
            \shuffle($indexes);
6997
6998
            // init
6999
            $shuffled_str = '';
7000
7001
            foreach ($indexes as &$i) {
7002
                $tmp_sub_str = self::substr($str, $i, 1, $encoding);
7003
                if ($tmp_sub_str !== false) {
7004
                    $shuffled_str .= $tmp_sub_str;
7005
                }
7006
            }
7007
        }
7008
7009 5
        return $shuffled_str;
7010
    }
7011
7012
    /**
7013
     * Returns the substring beginning at $start, and up to, but not including
7014
     * the index specified by $end. If $end is omitted, the function extracts
7015
     * the remaining string. If $end is negative, it is computed from the end
7016
     * of the string.
7017
     *
7018
     * @param string $str
7019
     * @param int    $start    <p>Initial index from which to begin extraction.</p>
7020
     * @param int    $end      [optional] <p>Index at which to end extraction. Default: null</p>
7021
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7022
     *
7023
     * @return false|string
7024
     *                      <p>The extracted substring.</p><p>If <i>str</i> is shorter than <i>start</i>
7025
     *                      characters long, <b>FALSE</b> will be returned.
7026
     */
7027
    public static function str_slice(
7028
        string $str,
7029
        int $start,
7030
        int $end = null,
7031
        string $encoding = 'UTF-8'
7032
    ) {
7033 18
        if ($encoding === 'UTF-8') {
7034 7
            if ($end === null) {
7035 1
                $length = (int) \mb_strlen($str);
7036 6
            } elseif ($end >= 0 && $end <= $start) {
7037 2
                return '';
7038 4
            } elseif ($end < 0) {
7039 1
                $length = (int) \mb_strlen($str) + $end - $start;
7040
            } else {
7041 3
                $length = $end - $start;
7042
            }
7043
7044 5
            return \mb_substr($str, $start, $length);
7045
        }
7046
7047 11
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
7048
7049 11
        if ($end === null) {
7050 5
            $length = (int) self::strlen($str, $encoding);
7051 6
        } elseif ($end >= 0 && $end <= $start) {
7052 2
            return '';
7053 4
        } elseif ($end < 0) {
7054 1
            $length = (int) self::strlen($str, $encoding) + $end - $start;
7055
        } else {
7056 3
            $length = $end - $start;
7057
        }
7058
7059 9
        return self::substr($str, $start, $length, $encoding);
7060
    }
7061
7062
    /**
7063
     * Convert a string to e.g.: "snake_case"
7064
     *
7065
     * @param string $str
7066
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7067
     *
7068
     * @return string string in snake_case
7069
     */
7070
    public static function str_snakeize(string $str, string $encoding = 'UTF-8'): string
7071
    {
7072 22
        if ($str === '') {
7073
            return '';
7074
        }
7075
7076 22
        $str = \str_replace(
7077 22
            '-',
7078 22
            '_',
7079 22
            self::normalize_whitespace($str)
7080
        );
7081
7082 22
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7083 19
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7084
        }
7085
7086 22
        $str = (string) \preg_replace_callback(
7087 22
            '/([\\p{N}|\\p{Lu}])/u',
7088
            /**
7089
             * @param string[] $matches
7090
             *
7091
             * @return string
7092
             */
7093
            static function (array $matches) use ($encoding): string {
7094 9
                $match = $matches[1];
7095 9
                $match_int = (int) $match;
7096
7097 9
                if ((string) $match_int === $match) {
7098 4
                    return '_' . $match . '_';
7099
                }
7100
7101 5
                if ($encoding === 'UTF-8') {
7102 5
                    return '_' . \mb_strtolower($match);
7103
                }
7104
7105
                return '_' . self::strtolower($match, $encoding);
7106 22
            },
7107 22
            $str
7108
        );
7109
7110 22
        $str = (string) \preg_replace(
7111
            [
7112 22
                '/\\s+/u',           // convert spaces to "_"
7113
                '/^\\s+|\\s+$/u', // trim leading & trailing spaces
7114
                '/_+/',                 // remove double "_"
7115
            ],
7116
            [
7117 22
                '_',
7118
                '',
7119
                '_',
7120
            ],
7121 22
            $str
7122
        );
7123
7124 22
        return \trim(\trim($str, '_')); // trim leading & trailing "_" + whitespace
7125
    }
7126
7127
    /**
7128
     * Sort all characters according to code points.
7129
     *
7130
     * @param string $str    <p>A UTF-8 string.</p>
7131
     * @param bool   $unique <p>Sort unique. If <strong>true</strong>, repeated characters are ignored.</p>
7132
     * @param bool   $desc   <p>If <strong>true</strong>, will sort characters in reverse code point order.</p>
7133
     *
7134
     * @return string string of sorted characters
7135
     */
7136
    public static function str_sort(string $str, bool $unique = false, bool $desc = false): string
7137
    {
7138 2
        $array = self::codepoints($str);
7139
7140 2
        if ($unique) {
7141 2
            $array = \array_flip(\array_flip($array));
7142
        }
7143
7144 2
        if ($desc) {
7145 2
            \arsort($array);
0 ignored issues
show
Bug introduced by
It seems like $array can also be of type null; however, parameter $array of arsort() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

7145
            \arsort(/** @scrutinizer ignore-type */ $array);
Loading history...
7146
        } else {
7147 2
            \asort($array);
0 ignored issues
show
Bug introduced by
It seems like $array can also be of type null; however, parameter $array of asort() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

7147
            \asort(/** @scrutinizer ignore-type */ $array);
Loading history...
7148
        }
7149
7150 2
        return self::string($array);
7151
    }
7152
7153
    /**
7154
     * Convert a string to an array of Unicode characters.
7155
     *
7156
     * @param int|int[]|string|string[] $str                     <p>The string to split into array.</p>
7157
     * @param int                       $length                  [optional] <p>Max character length of each array
7158
     *                                                           element.</p>
7159
     * @param bool                      $clean_utf8              [optional] <p>Remove non UTF-8 chars from the string.</p>
7160
     * @param bool                      $try_to_use_mb_functions [optional] <p>Set to false, if you don't want to use
7161
     *                                                           "mb_substr"</p>
7162
     *
7163
     * @return array
7164
     *               <p>An array containing chunks of the input.</p>
7165
     */
7166
    public static function str_split(
7167
        $str,
7168
        int $length = 1,
7169
        bool $clean_utf8 = false,
7170
        bool $try_to_use_mb_functions = true
7171
    ): array {
7172 89
        if ($length <= 0) {
7173 3
            return [];
7174
        }
7175
7176 88
        if (\is_array($str) === true) {
7177 2
            foreach ($str as $k => &$v) {
7178 2
                $v = self::str_split(
7179 2
                    $v,
7180 2
                    $length,
7181 2
                    $clean_utf8,
7182 2
                    $try_to_use_mb_functions
7183
                );
7184
            }
7185
7186 2
            return $str;
7187
        }
7188
7189
        // init
7190 88
        $str = (string) $str;
7191
7192 88
        if ($str === '') {
7193 13
            return [];
7194
        }
7195
7196 85
        if ($clean_utf8 === true) {
7197 19
            $str = self::clean($str);
7198
        }
7199
7200
        if (
7201 85
            $try_to_use_mb_functions === true
7202
            &&
7203 85
            self::$SUPPORT['mbstring'] === true
7204
        ) {
7205 81
            $i_max = \mb_strlen($str);
7206 81
            if ($i_max <= 127) {
7207 75
                $ret = [];
7208 75
                for ($i = 0; $i < $i_max; ++$i) {
7209 75
                    $ret[] = \mb_substr($str, $i, 1);
7210
                }
7211
            } else {
7212 16
                $return_array = [];
7213 16
                \preg_match_all('/./us', $str, $return_array);
7214 81
                $ret = $return_array[0] ?? [];
7215
            }
7216 23
        } elseif (self::$SUPPORT['pcre_utf8'] === true) {
7217 17
            $return_array = [];
7218 17
            \preg_match_all('/./us', $str, $return_array);
7219 17
            $ret = $return_array[0] ?? [];
7220
        } else {
7221
7222
            // fallback
7223
7224 8
            $ret = [];
7225 8
            $len = \strlen($str);
7226
7227
            /** @noinspection ForeachInvariantsInspection */
7228 8
            for ($i = 0; $i < $len; ++$i) {
7229 8
                if (($str[$i] & "\x80") === "\x00") {
7230 8
                    $ret[] = $str[$i];
7231
                } elseif (
7232 8
                    isset($str[$i + 1])
7233
                    &&
7234 8
                    ($str[$i] & "\xE0") === "\xC0"
7235
                ) {
7236 4
                    if (($str[$i + 1] & "\xC0") === "\x80") {
7237 4
                        $ret[] = $str[$i] . $str[$i + 1];
7238
7239 4
                        ++$i;
7240
                    }
7241
                } elseif (
7242 6
                    isset($str[$i + 2])
7243
                    &&
7244 6
                    ($str[$i] & "\xF0") === "\xE0"
7245
                ) {
7246
                    if (
7247 6
                        ($str[$i + 1] & "\xC0") === "\x80"
7248
                        &&
7249 6
                        ($str[$i + 2] & "\xC0") === "\x80"
7250
                    ) {
7251 6
                        $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2];
7252
7253 6
                        $i += 2;
7254
                    }
7255
                } elseif (
7256
                    isset($str[$i + 3])
7257
                    &&
7258
                    ($str[$i] & "\xF8") === "\xF0"
7259
                ) {
7260
                    if (
7261
                        ($str[$i + 1] & "\xC0") === "\x80"
7262
                        &&
7263
                        ($str[$i + 2] & "\xC0") === "\x80"
7264
                        &&
7265
                        ($str[$i + 3] & "\xC0") === "\x80"
7266
                    ) {
7267
                        $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3];
7268
7269
                        $i += 3;
7270
                    }
7271
                }
7272
            }
7273
        }
7274
7275 85
        if ($length > 1) {
7276 11
            $ret = \array_chunk($ret, $length);
7277
7278 11
            return \array_map(
7279
                static function (array &$item): string {
7280 11
                    return \implode('', $item);
7281 11
                },
7282 11
                $ret
7283
            );
7284
        }
7285
7286 78
        if (isset($ret[0]) && $ret[0] === '') {
7287
            return [];
7288
        }
7289
7290 78
        return $ret;
7291
    }
7292
7293
    /**
7294
     * Splits the string with the provided regular expression, returning an
7295
     * array of strings. An optional integer $limit will truncate the
7296
     * results.
7297
     *
7298
     * @param string $str
7299
     * @param string $pattern <p>The regex with which to split the string.</p>
7300
     * @param int    $limit   [optional] <p>Maximum number of results to return. Default: -1 === no limit</p>
7301
     *
7302
     * @return string[] an array of strings
7303
     */
7304
    public static function str_split_pattern(string $str, string $pattern, int $limit = -1): array
7305
    {
7306 16
        if ($limit === 0) {
7307 2
            return [];
7308
        }
7309
7310 14
        if ($pattern === '') {
7311 1
            return [$str];
7312
        }
7313
7314 13
        if (self::$SUPPORT['mbstring'] === true) {
7315 13
            if ($limit >= 0) {
7316
                /** @noinspection PhpComposerExtensionStubsInspection */
7317 8
                $result_tmp = \mb_split($pattern, $str);
7318
7319 8
                $result = [];
7320 8
                foreach ($result_tmp as $item_tmp) {
7321 8
                    if ($limit === 0) {
7322 4
                        break;
7323
                    }
7324 8
                    --$limit;
7325
7326 8
                    $result[] = $item_tmp;
7327
                }
7328
7329 8
                return $result;
7330
            }
7331
7332
            /** @noinspection PhpComposerExtensionStubsInspection */
7333 5
            return \mb_split($pattern, $str);
7334
        }
7335
7336
        if ($limit > 0) {
7337
            ++$limit;
7338
        } else {
7339
            $limit = -1;
7340
        }
7341
7342
        $array = \preg_split('/' . \preg_quote($pattern, '/') . '/u', $str, $limit);
7343
7344
        if ($array === false) {
7345
            return [];
7346
        }
7347
7348
        if ($limit > 0 && \count($array) === $limit) {
7349
            \array_pop($array);
7350
        }
7351
7352
        return $array;
7353
    }
7354
7355
    /**
7356
     * Check if the string starts with the given substring.
7357
     *
7358
     * @param string $haystack <p>The string to search in.</p>
7359
     * @param string $needle   <p>The substring to search for.</p>
7360
     *
7361
     * @return bool
7362
     */
7363
    public static function str_starts_with(string $haystack, string $needle): bool
7364
    {
7365 19
        if ($needle === '') {
7366 2
            return true;
7367
        }
7368
7369 19
        if ($haystack === '') {
7370
            return false;
7371
        }
7372
7373 19
        return \strpos($haystack, $needle) === 0;
7374
    }
7375
7376
    /**
7377
     * Returns true if the string begins with any of $substrings, false otherwise.
7378
     *
7379
     * - case-sensitive
7380
     *
7381
     * @param string $str        <p>The input string.</p>
7382
     * @param array  $substrings <p>Substrings to look for.</p>
7383
     *
7384
     * @return bool whether or not $str starts with $substring
7385
     */
7386
    public static function str_starts_with_any(string $str, array $substrings): bool
7387
    {
7388 8
        if ($str === '') {
7389
            return false;
7390
        }
7391
7392 8
        if ($substrings === []) {
7393
            return false;
7394
        }
7395
7396 8
        foreach ($substrings as &$substring) {
7397 8
            if (self::str_starts_with($str, $substring)) {
7398 8
                return true;
7399
            }
7400
        }
7401
7402 6
        return false;
7403
    }
7404
7405
    /**
7406
     * Gets the substring after the first occurrence of a separator.
7407
     *
7408
     * @param string $str       <p>The input string.</p>
7409
     * @param string $separator <p>The string separator.</p>
7410
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7411
     *
7412
     * @return string
7413
     */
7414
    public static function str_substr_after_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
7415
    {
7416 1
        if ($separator === '' || $str === '') {
7417 1
            return '';
7418
        }
7419
7420 1
        if ($encoding === 'UTF-8') {
7421 1
            $offset = \mb_strpos($str, $separator);
7422 1
            if ($offset === false) {
7423 1
                return '';
7424
            }
7425
7426 1
            return (string) \mb_substr(
7427 1
                $str,
7428 1
                $offset + (int) \mb_strlen($separator)
7429
            );
7430
        }
7431
7432
        $offset = self::strpos($str, $separator, 0, $encoding);
7433
        if ($offset === false) {
7434
            return '';
7435
        }
7436
7437
        return (string) \mb_substr(
7438
            $str,
7439
            $offset + (int) self::strlen($separator, $encoding),
7440
            null,
7441
            $encoding
7442
        );
7443
    }
7444
7445
    /**
7446
     * Gets the substring after the last occurrence of a separator.
7447
     *
7448
     * @param string $str       <p>The input string.</p>
7449
     * @param string $separator <p>The string separator.</p>
7450
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7451
     *
7452
     * @return string
7453
     */
7454
    public static function str_substr_after_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
7455
    {
7456 1
        if ($separator === '' || $str === '') {
7457 1
            return '';
7458
        }
7459
7460 1
        if ($encoding === 'UTF-8') {
7461 1
            $offset = \mb_strrpos($str, $separator);
7462 1
            if ($offset === false) {
7463 1
                return '';
7464
            }
7465
7466 1
            return (string) \mb_substr(
7467 1
                $str,
7468 1
                $offset + (int) \mb_strlen($separator)
7469
            );
7470
        }
7471
7472
        $offset = self::strrpos($str, $separator, 0, $encoding);
7473
        if ($offset === false) {
7474
            return '';
7475
        }
7476
7477
        return (string) self::substr(
7478
            $str,
7479
            $offset + (int) self::strlen($separator, $encoding),
7480
            null,
7481
            $encoding
7482
        );
7483
    }
7484
7485
    /**
7486
     * Gets the substring before the first occurrence of a separator.
7487
     *
7488
     * @param string $str       <p>The input string.</p>
7489
     * @param string $separator <p>The string separator.</p>
7490
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7491
     *
7492
     * @return string
7493
     */
7494
    public static function str_substr_before_first_separator(
7495
        string $str,
7496
        string $separator,
7497
        string $encoding = 'UTF-8'
7498
    ): string {
7499 1
        if ($separator === '' || $str === '') {
7500 1
            return '';
7501
        }
7502
7503 1
        if ($encoding === 'UTF-8') {
7504 1
            $offset = \mb_strpos($str, $separator);
7505 1
            if ($offset === false) {
7506 1
                return '';
7507
            }
7508
7509 1
            return (string) \mb_substr(
7510 1
                $str,
7511 1
                0,
7512 1
                $offset
7513
            );
7514
        }
7515
7516
        $offset = self::strpos($str, $separator, 0, $encoding);
7517
        if ($offset === false) {
7518
            return '';
7519
        }
7520
7521
        return (string) self::substr(
7522
            $str,
7523
            0,
7524
            $offset,
7525
            $encoding
7526
        );
7527
    }
7528
7529
    /**
7530
     * Gets the substring before the last occurrence of a separator.
7531
     *
7532
     * @param string $str       <p>The input string.</p>
7533
     * @param string $separator <p>The string separator.</p>
7534
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7535
     *
7536
     * @return string
7537
     */
7538
    public static function str_substr_before_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
7539
    {
7540 1
        if ($separator === '' || $str === '') {
7541 1
            return '';
7542
        }
7543
7544 1
        if ($encoding === 'UTF-8') {
7545 1
            $offset = \mb_strrpos($str, $separator);
7546 1
            if ($offset === false) {
7547 1
                return '';
7548
            }
7549
7550 1
            return (string) \mb_substr(
7551 1
                $str,
7552 1
                0,
7553 1
                $offset
7554
            );
7555
        }
7556
7557
        $offset = self::strrpos($str, $separator, 0, $encoding);
7558
        if ($offset === false) {
7559
            return '';
7560
        }
7561
7562
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
7563
7564
        return (string) self::substr(
7565
            $str,
7566
            0,
7567
            $offset,
7568
            $encoding
7569
        );
7570
    }
7571
7572
    /**
7573
     * Gets the substring after (or before via "$before_needle") the first occurrence of the "$needle".
7574
     *
7575
     * @param string $str           <p>The input string.</p>
7576
     * @param string $needle        <p>The string to look for.</p>
7577
     * @param bool   $before_needle [optional] <p>Default: false</p>
7578
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
7579
     *
7580
     * @return string
7581
     */
7582
    public static function str_substr_first(
7583
        string $str,
7584
        string $needle,
7585
        bool $before_needle = false,
7586
        string $encoding = 'UTF-8'
7587
    ): string {
7588 2
        if ($str === '' || $needle === '') {
7589 2
            return '';
7590
        }
7591
7592 2
        if ($encoding === 'UTF-8') {
7593 2
            if ($before_needle === true) {
7594 1
                $part = \mb_strstr(
7595 1
                    $str,
7596 1
                    $needle,
7597 1
                    $before_needle
7598
                );
7599
            } else {
7600 1
                $part = \mb_strstr(
7601 1
                    $str,
7602 2
                    $needle
7603
                );
7604
            }
7605
        } else {
7606
            $part = self::strstr(
7607
                $str,
7608
                $needle,
7609
                $before_needle,
7610
                $encoding
7611
            );
7612
        }
7613
7614 2
        return $part === false ? '' : $part;
7615
    }
7616
7617
    /**
7618
     * Gets the substring after (or before via "$before_needle") the last occurrence of the "$needle".
7619
     *
7620
     * @param string $str           <p>The input string.</p>
7621
     * @param string $needle        <p>The string to look for.</p>
7622
     * @param bool   $before_needle [optional] <p>Default: false</p>
7623
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
7624
     *
7625
     * @return string
7626
     */
7627
    public static function str_substr_last(
7628
        string $str,
7629
        string $needle,
7630
        bool $before_needle = false,
7631
        string $encoding = 'UTF-8'
7632
    ): string {
7633 2
        if ($str === '' || $needle === '') {
7634 2
            return '';
7635
        }
7636
7637 2
        if ($encoding === 'UTF-8') {
7638 2
            if ($before_needle === true) {
7639 1
                $part = \mb_strrchr(
7640 1
                    $str,
7641 1
                    $needle,
7642 1
                    $before_needle
7643
                );
7644
            } else {
7645 1
                $part = \mb_strrchr(
7646 1
                    $str,
7647 2
                    $needle
7648
                );
7649
            }
7650
        } else {
7651
            $part = self::strrchr(
7652
                $str,
7653
                $needle,
7654
                $before_needle,
7655
                $encoding
7656
            );
7657
        }
7658
7659 2
        return $part === false ? '' : $part;
7660
    }
7661
7662
    /**
7663
     * Surrounds $str with the given substring.
7664
     *
7665
     * @param string $str
7666
     * @param string $substring <p>The substring to add to both sides.</P>
7667
     *
7668
     * @return string string with the substring both prepended and appended
7669
     */
7670
    public static function str_surround(string $str, string $substring): string
7671
    {
7672 5
        return $substring . $str . $substring;
7673
    }
7674
7675
    /**
7676
     * Returns a trimmed string with the first letter of each word capitalized.
7677
     * Also accepts an array, $ignore, allowing you to list words not to be
7678
     * capitalized.
7679
     *
7680
     * @param string              $str
7681
     * @param array|string[]|null $ignore                        [optional] <p>An array of words not to capitalize or null.
7682
     *                                                           Default: null</p>
7683
     * @param string              $encoding                      [optional] <p>Default: 'UTF-8'</p>
7684
     * @param bool                $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
7685
     * @param string|null         $lang                          [optional] <p>Set the language for special cases: az, el, lt,
7686
     *                                                           tr</p>
7687
     * @param bool                $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ ->
7688
     *                                                           ß</p>
7689
     * @param bool                $use_trim_first                [optional] <p>true === trim the input string, first</p>
7690
     * @param string|null         $word_define_chars             [optional] <p>An string of chars that will be used as whitespace separator === words.</p>
7691
     *
7692
     * @return string
7693
     *                <p>The titleized string.</p>
7694
     */
7695
    public static function str_titleize(
7696
        string $str,
7697
        array $ignore = null,
7698
        string $encoding = 'UTF-8',
7699
        bool $clean_utf8 = false,
7700
        string $lang = null,
7701
        bool $try_to_keep_the_string_length = false,
7702
        bool $use_trim_first = true,
7703
        string $word_define_chars = null
7704
    ): string {
7705 10
        if ($str === '') {
7706
            return '';
7707
        }
7708
7709 10
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7710 9
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7711
        }
7712
7713 10
        if ($use_trim_first === true) {
7714 10
            $str = \trim($str);
7715
        }
7716
7717 10
        if ($clean_utf8 === true) {
7718
            $str = self::clean($str);
7719
        }
7720
7721 10
        $use_mb_functions = $lang === null && $try_to_keep_the_string_length === false;
7722
7723 10
        if ($word_define_chars) {
7724 4
            $word_define_chars = \preg_quote($word_define_chars, '/');
7725
        } else {
7726 6
            $word_define_chars = '';
7727
        }
7728
7729 10
        $str = (string) \preg_replace_callback(
7730 10
            '/([^\\s' . $word_define_chars . ']+)/u',
7731
            static function (array $match) use ($try_to_keep_the_string_length, $lang, $ignore, $use_mb_functions, $encoding): string {
7732 10
                if ($ignore !== null && \in_array($match[0], $ignore, true)) {
7733 4
                    return $match[0];
7734
                }
7735
7736 10
                if ($use_mb_functions === true) {
7737 10
                    if ($encoding === 'UTF-8') {
7738 10
                        return \mb_strtoupper(\mb_substr($match[0], 0, 1))
7739 10
                               . \mb_strtolower(\mb_substr($match[0], 1));
7740
                    }
7741
7742
                    return \mb_strtoupper(\mb_substr($match[0], 0, 1, $encoding), $encoding)
7743
                           . \mb_strtolower(\mb_substr($match[0], 1, null, $encoding), $encoding);
7744
                }
7745
7746
                return self::ucfirst(
7747
                    self::strtolower(
7748
                        $match[0],
7749
                        $encoding,
7750
                        false,
7751
                        $lang,
7752
                        $try_to_keep_the_string_length
7753
                    ),
7754
                    $encoding,
7755
                    false,
7756
                    $lang,
7757
                    $try_to_keep_the_string_length
7758
                );
7759 10
            },
7760 10
            $str
7761
        );
7762
7763 10
        return $str;
7764
    }
7765
7766
    /**
7767
     * Returns a trimmed string in proper title case.
7768
     *
7769
     * Also accepts an array, $ignore, allowing you to list words not to be
7770
     * capitalized.
7771
     *
7772
     * Adapted from John Gruber's script.
7773
     *
7774
     * @see https://gist.github.com/gruber/9f9e8650d68b13ce4d78
7775
     *
7776
     * @param string $str
7777
     * @param array  $ignore   <p>An array of words not to capitalize.</p>
7778
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7779
     *
7780
     * @return string the titleized string
7781
     */
7782
    public static function str_titleize_for_humans(
7783
        string $str,
7784
        array $ignore = [],
7785
        string $encoding = 'UTF-8'
7786
    ): string {
7787 35
        $small_words = \array_merge(
7788
            [
7789 35
                '(?<!q&)a',
7790
                'an',
7791
                'and',
7792
                'as',
7793
                'at(?!&t)',
7794
                'but',
7795
                'by',
7796
                'en',
7797
                'for',
7798
                'if',
7799
                'in',
7800
                'of',
7801
                'on',
7802
                'or',
7803
                'the',
7804
                'to',
7805
                'v[.]?',
7806
                'via',
7807
                'vs[.]?',
7808
            ],
7809 35
            $ignore
7810
        );
7811
7812 35
        $small_words_rx = \implode('|', $small_words);
7813 35
        $apostrophe_rx = '(?x: [\'’] [[:lower:]]* )?';
7814
7815 35
        $str = \trim($str);
7816
7817 35
        if (self::has_lowercase($str) === false) {
7818 2
            $str = self::strtolower($str, $encoding);
7819
        }
7820
7821
        // the main substitutions
7822 35
        $str = (string) \preg_replace_callback(
7823
            '~\\b (_*) (?:                                                         # 1. Leading underscore and
7824
                        ( (?<=[ ][/\\\\]) [[:alpha:]]+ [-_[:alpha:]/\\\\]+ |              # 2. file path or 
7825 35
                          [-_[:alpha:]]+ [@.:] [-_[:alpha:]@.:/]+ ' . $apostrophe_rx . ' ) #    URL, domain, or email
7826
                        |
7827 35
                        ( (?i: ' . $small_words_rx . ' ) ' . $apostrophe_rx . ' )            # 3. or small word (case-insensitive)
7828
                        |
7829 35
                        ( [[:alpha:]] [[:lower:]\'’()\[\]{}]* ' . $apostrophe_rx . ' )     # 4. or word w/o internal caps
7830
                        |
7831 35
                        ( [[:alpha:]] [[:alpha:]\'’()\[\]{}]* ' . $apostrophe_rx . ' )     # 5. or some other word
7832
                      ) (_*) \\b                                                          # 6. With trailing underscore
7833
                    ~ux',
7834
            /**
7835
             * @param string[] $matches
7836
             *
7837
             * @return string
7838
             */
7839
            static function (array $matches) use ($encoding): string {
7840
                // preserve leading underscore
7841 35
                $str = $matches[1];
7842 35
                if ($matches[2]) {
7843
                    // preserve URLs, domains, emails and file paths
7844 5
                    $str .= $matches[2];
7845 35
                } elseif ($matches[3]) {
7846
                    // lower-case small words
7847 25
                    $str .= self::strtolower($matches[3], $encoding);
7848 35
                } elseif ($matches[4]) {
7849
                    // capitalize word w/o internal caps
7850 34
                    $str .= static::ucfirst($matches[4], $encoding);
7851
                } else {
7852
                    // preserve other kinds of word (iPhone)
7853 7
                    $str .= $matches[5];
7854
                }
7855
                // preserve trailing underscore
7856 35
                $str .= $matches[6];
7857
7858 35
                return $str;
7859 35
            },
7860 35
            $str
7861
        );
7862
7863
        // Exceptions for small words: capitalize at start of title...
7864 35
        $str = (string) \preg_replace_callback(
7865
            '~(  \\A [[:punct:]]*            # start of title...
7866
                      |  [:.;?!][ ]+                # or of subsentence...
7867
                      |  [ ][\'"“‘(\[][ ]* )        # or of inserted subphrase...
7868 35
                      ( ' . $small_words_rx . ' ) \\b # ...followed by small word
7869
                     ~uxi',
7870
            /**
7871
             * @param string[] $matches
7872
             *
7873
             * @return string
7874
             */
7875
            static function (array $matches) use ($encoding): string {
7876 11
                return $matches[1] . static::ucfirst($matches[2], $encoding);
7877 35
            },
7878 35
            $str
7879
        );
7880
7881
        // ...and end of title
7882 35
        $str = (string) \preg_replace_callback(
7883 35
            '~\\b ( ' . $small_words_rx . ' ) # small word...
7884
                      (?= [[:punct:]]* \Z          # ...at the end of the title...
7885
                      |   [\'"’”)\]] [ ] )         # ...or of an inserted subphrase?
7886
                     ~uxi',
7887
            /**
7888
             * @param string[] $matches
7889
             *
7890
             * @return string
7891
             */
7892
            static function (array $matches) use ($encoding): string {
7893 3
                return static::ucfirst($matches[1], $encoding);
7894 35
            },
7895 35
            $str
7896
        );
7897
7898
        // Exceptions for small words in hyphenated compound words.
7899
        // e.g. "in-flight" -> In-Flight
7900 35
        $str = (string) \preg_replace_callback(
7901
            '~\\b
7902
                        (?<! -)                   # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (in-flight)
7903 35
                        ( ' . $small_words_rx . ' )
7904
                        (?= -[[:alpha:]]+)        # lookahead for "-someword"
7905
                       ~uxi',
7906
            /**
7907
             * @param string[] $matches
7908
             *
7909
             * @return string
7910
             */
7911
            static function (array $matches) use ($encoding): string {
7912
                return static::ucfirst($matches[1], $encoding);
7913 35
            },
7914 35
            $str
7915
        );
7916
7917
        // e.g. "Stand-in" -> "Stand-In" (Stand is already capped at this point)
7918 35
        $str = (string) \preg_replace_callback(
7919
            '~\\b
7920
                      (?<!…)                    # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (stand-in)
7921
                      ( [[:alpha:]]+- )         # $1 = first word and hyphen, should already be properly capped
7922 35
                      ( ' . $small_words_rx . ' ) # ...followed by small word
7923
                      (?!	- )                 # Negative lookahead for another -
7924
                     ~uxi',
7925
            /**
7926
             * @param string[] $matches
7927
             *
7928
             * @return string
7929
             */
7930
            static function (array $matches) use ($encoding): string {
7931
                return $matches[1] . static::ucfirst($matches[2], $encoding);
7932 35
            },
7933 35
            $str
7934
        );
7935
7936 35
        return $str;
7937
    }
7938
7939
    /**
7940
     * Get a binary representation of a specific string.
7941
     *
7942
     * @param string $str <p>The input string.</p>
7943
     *
7944
     * @return false|string
7945
     *                      <p>false on error</p>
7946
     */
7947
    public static function str_to_binary(string $str)
7948
    {
7949
        /** @var array|false $value - needed for PhpStan (stubs error) */
7950 2
        $value = \unpack('H*', $str);
7951 2
        if ($value === false) {
7952
            return false;
7953
        }
7954
7955
        /** @noinspection OffsetOperationsInspection */
7956 2
        return \base_convert($value[1], 16, 2);
7957
    }
7958
7959
    /**
7960
     * @param string   $str
7961
     * @param bool     $remove_empty_values <p>Remove empty values.</p>
7962
     * @param int|null $remove_short_values <p>The min. string length or null to disable</p>
7963
     *
7964
     * @return string[]
7965
     */
7966
    public static function str_to_lines(string $str, bool $remove_empty_values = false, int $remove_short_values = null): array
7967
    {
7968 17
        if ($str === '') {
7969 1
            return $remove_empty_values === true ? [] : [''];
7970
        }
7971
7972 16
        if (self::$SUPPORT['mbstring'] === true) {
7973
            /** @noinspection PhpComposerExtensionStubsInspection */
7974 16
            $return = \mb_split("[\r\n]{1,2}", $str);
7975
        } else {
7976
            $return = \preg_split("/[\r\n]{1,2}/u", $str);
7977
        }
7978
7979 16
        if ($return === false) {
7980
            return $remove_empty_values === true ? [] : [''];
7981
        }
7982
7983
        if (
7984 16
            $remove_short_values === null
7985
            &&
7986 16
            $remove_empty_values === false
7987
        ) {
7988 16
            return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
7989
        }
7990
7991
        return self::reduce_string_array(
7992
            $return,
7993
            $remove_empty_values,
7994
            $remove_short_values
7995
        );
7996
    }
7997
7998
    /**
7999
     * Convert a string into an array of words.
8000
     *
8001
     * @param string   $str
8002
     * @param string   $char_list           <p>Additional chars for the definition of "words".</p>
8003
     * @param bool     $remove_empty_values <p>Remove empty values.</p>
8004
     * @param int|null $remove_short_values <p>The min. string length or null to disable</p>
8005
     *
8006
     * @return string[]
8007
     */
8008
    public static function str_to_words(
8009
        string $str,
8010
        string $char_list = '',
8011
        bool $remove_empty_values = false,
8012
        int $remove_short_values = null
8013
    ): array {
8014 13
        if ($str === '') {
8015 4
            return $remove_empty_values === true ? [] : [''];
8016
        }
8017
8018 13
        $char_list = self::rxClass($char_list, '\pL');
8019
8020 13
        $return = \preg_split("/({$char_list}+(?:[\p{Pd}’']{$char_list}+)*)/u", $str, -1, \PREG_SPLIT_DELIM_CAPTURE);
8021 13
        if ($return === false) {
8022
            return $remove_empty_values === true ? [] : [''];
8023
        }
8024
8025
        if (
8026 13
            $remove_short_values === null
8027
            &&
8028 13
            $remove_empty_values === false
8029
        ) {
8030 13
            return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
8031
        }
8032
8033 2
        $tmp_return = self::reduce_string_array(
8034 2
            $return,
8035 2
            $remove_empty_values,
8036 2
            $remove_short_values
8037
        );
8038
8039 2
        foreach ($tmp_return as &$item) {
8040 2
            $item = (string) $item;
8041
        }
8042
8043 2
        return $tmp_return;
8044
    }
8045
8046
    /**
8047
     * alias for "UTF8::to_ascii()"
8048
     *
8049
     * @param string $str
8050
     * @param string $unknown
8051
     * @param bool   $strict
8052
     *
8053
     * @return string
8054
     *
8055
     * @see UTF8::to_ascii()
8056
     * @deprecated <p>please use "UTF8::to_ascii()"</p>
8057
     */
8058
    public static function str_transliterate(
8059
        string $str,
8060
        string $unknown = '?',
8061
        bool $strict = false
8062
    ): string {
8063 7
        return self::to_ascii($str, $unknown, $strict);
8064
    }
8065
8066
    /**
8067
     * Truncates the string to a given length. If $substring is provided, and
8068
     * truncating occurs, the string is further truncated so that the substring
8069
     * may be appended without exceeding the desired length.
8070
     *
8071
     * @param string $str
8072
     * @param int    $length    <p>Desired length of the truncated string.</p>
8073
     * @param string $substring [optional] <p>The substring to append if it can fit. Default: ''</p>
8074
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
8075
     *
8076
     * @return string string after truncating
8077
     */
8078
    public static function str_truncate(
8079
        string $str,
8080
        int $length,
8081
        string $substring = '',
8082
        string $encoding = 'UTF-8'
8083
    ): string {
8084 22
        if ($str === '') {
8085
            return '';
8086
        }
8087
8088 22
        if ($encoding === 'UTF-8') {
8089 10
            if ($length >= (int) \mb_strlen($str)) {
8090 2
                return $str;
8091
            }
8092
8093 8
            if ($substring !== '') {
8094 4
                $length -= (int) \mb_strlen($substring);
8095
8096
                /** @noinspection UnnecessaryCastingInspection */
8097 4
                return (string) \mb_substr($str, 0, $length) . $substring;
8098
            }
8099
8100
            /** @noinspection UnnecessaryCastingInspection */
8101 4
            return (string) \mb_substr($str, 0, $length);
8102
        }
8103
8104 12
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
8105
8106 12
        if ($length >= (int) self::strlen($str, $encoding)) {
8107 2
            return $str;
8108
        }
8109
8110 10
        if ($substring !== '') {
8111 6
            $length -= (int) self::strlen($substring, $encoding);
8112
        }
8113
8114
        return (
8115 10
               (string) self::substr(
8116 10
                   $str,
8117 10
                   0,
8118 10
                   $length,
8119 10
                   $encoding
8120
               )
8121 10
               ) . $substring;
8122
    }
8123
8124
    /**
8125
     * Truncates the string to a given length, while ensuring that it does not
8126
     * split words. If $substring is provided, and truncating occurs, the
8127
     * string is further truncated so that the substring may be appended without
8128
     * exceeding the desired length.
8129
     *
8130
     * @param string $str
8131
     * @param int    $length                                 <p>Desired length of the truncated string.</p>
8132
     * @param string $substring                              [optional] <p>The substring to append if it can fit. Default:
8133
     *                                                       ''</p>
8134
     * @param string $encoding                               [optional] <p>Default: 'UTF-8'</p>
8135
     * @param bool   $ignore_do_not_split_words_for_one_word [optional] <p>Default: false</p>
8136
     *
8137
     * @return string string after truncating
8138
     */
8139
    public static function str_truncate_safe(
8140
        string $str,
8141
        int $length,
8142
        string $substring = '',
8143
        string $encoding = 'UTF-8',
8144
        bool $ignore_do_not_split_words_for_one_word = false
8145
    ): string {
8146 47
        if ($str === '' || $length <= 0) {
8147 1
            return $substring;
8148
        }
8149
8150 47
        if ($encoding === 'UTF-8') {
8151 21
            if ($length >= (int) \mb_strlen($str)) {
8152 5
                return $str;
8153
            }
8154
8155
            // need to further trim the string so we can append the substring
8156 17
            $length -= (int) \mb_strlen($substring);
8157 17
            if ($length <= 0) {
8158 1
                return $substring;
8159
            }
8160
8161
            /** @var false|string $truncated - needed for PhpStan (stubs error) */
8162 17
            $truncated = \mb_substr($str, 0, $length);
8163 17
            if ($truncated === false) {
8164
                return '';
8165
            }
8166
8167
            // if the last word was truncated
8168 17
            $space_position = \mb_strpos($str, ' ', $length - 1);
8169 17
            if ($space_position !== $length) {
8170
                // find pos of the last occurrence of a space, get up to that
8171 13
                $last_position = \mb_strrpos($truncated, ' ', 0);
8172
8173
                if (
8174 13
                    $last_position !== false
8175
                    ||
8176 13
                    ($space_position !== false && $ignore_do_not_split_words_for_one_word === false)
8177
                ) {
8178 17
                    $truncated = (string) \mb_substr($truncated, 0, (int) $last_position);
8179
                }
8180
            }
8181
        } else {
8182 26
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8183
8184 26
            if ($length >= (int) self::strlen($str, $encoding)) {
8185 4
                return $str;
8186
            }
8187
8188
            // need to further trim the string so we can append the substring
8189 22
            $length -= (int) self::strlen($substring, $encoding);
8190 22
            if ($length <= 0) {
8191
                return $substring;
8192
            }
8193
8194 22
            $truncated = self::substr($str, 0, $length, $encoding);
8195
8196 22
            if ($truncated === false) {
8197
                return '';
8198
            }
8199
8200
            // if the last word was truncated
8201 22
            $space_position = self::strpos($str, ' ', $length - 1, $encoding);
8202 22
            if ($space_position !== $length) {
8203
                // find pos of the last occurrence of a space, get up to that
8204 12
                $last_position = self::strrpos($truncated, ' ', 0, $encoding);
8205
8206
                if (
8207 12
                    $last_position !== false
8208
                    ||
8209 12
                    ($space_position !== false && $ignore_do_not_split_words_for_one_word === false)
8210
                ) {
8211 9
                    $truncated = (string) self::substr($truncated, 0, (int) $last_position, $encoding);
8212
                }
8213
            }
8214
        }
8215
8216 39
        return $truncated . $substring;
8217
    }
8218
8219
    /**
8220
     * Returns a lowercase and trimmed string separated by underscores.
8221
     * Underscores are inserted before uppercase characters (with the exception
8222
     * of the first character of the string), and in place of spaces as well as
8223
     * dashes.
8224
     *
8225
     * @param string $str
8226
     *
8227
     * @return string the underscored string
8228
     */
8229
    public static function str_underscored(string $str): string
8230
    {
8231 16
        return self::str_delimit($str, '_');
8232
    }
8233
8234
    /**
8235
     * Returns an UpperCamelCase version of the supplied string. It trims
8236
     * surrounding spaces, capitalizes letters following digits, spaces, dashes
8237
     * and underscores, and removes spaces, dashes, underscores.
8238
     *
8239
     * @param string      $str                           <p>The input string.</p>
8240
     * @param string      $encoding                      [optional] <p>Default: 'UTF-8'</p>
8241
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
8242
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt, tr</p>
8243
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
8244
     *
8245
     * @return string string in UpperCamelCase
8246
     */
8247
    public static function str_upper_camelize(
8248
        string $str,
8249
        string $encoding = 'UTF-8',
8250
        bool $clean_utf8 = false,
8251
        string $lang = null,
8252
        bool $try_to_keep_the_string_length = false
8253
    ): string {
8254 13
        return self::ucfirst(self::str_camelize($str, $encoding), $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
8255
    }
8256
8257
    /**
8258
     * alias for "UTF8::ucfirst()"
8259
     *
8260
     * @param string      $str
8261
     * @param string      $encoding
8262
     * @param bool        $clean_utf8
8263
     * @param string|null $lang
8264
     * @param bool        $try_to_keep_the_string_length
8265
     *
8266
     * @return string
8267
     *
8268
     * @see UTF8::ucfirst()
8269
     * @deprecated <p>please use "UTF8::ucfirst()"</p>
8270
     */
8271
    public static function str_upper_first(
8272
        string $str,
8273
        string $encoding = 'UTF-8',
8274
        bool $clean_utf8 = false,
8275
        string $lang = null,
8276
        bool $try_to_keep_the_string_length = false
8277
    ): string {
8278 5
        return self::ucfirst(
8279 5
            $str,
8280 5
            $encoding,
8281 5
            $clean_utf8,
8282 5
            $lang,
8283 5
            $try_to_keep_the_string_length
8284
        );
8285
    }
8286
8287
    /**
8288
     * Get the number of words in a specific string.
8289
     *
8290
     * @param string $str       <p>The input string.</p>
8291
     * @param int    $format    [optional] <p>
8292
     *                          <strong>0</strong> => return a number of words (default)<br>
8293
     *                          <strong>1</strong> => return an array of words<br>
8294
     *                          <strong>2</strong> => return an array of words with word-offset as key
8295
     *                          </p>
8296
     * @param string $char_list [optional] <p>Additional chars that contains to words and do not start a new word.</p>
8297
     *
8298
     * @return int|string[] The number of words in the string
8299
     */
8300
    public static function str_word_count(string $str, int $format = 0, string $char_list = '')
8301
    {
8302 2
        $str_parts = self::str_to_words($str, $char_list);
8303
8304 2
        $len = \count($str_parts);
8305
8306 2
        if ($format === 1) {
8307 2
            $number_of_words = [];
8308 2
            for ($i = 1; $i < $len; $i += 2) {
8309 2
                $number_of_words[] = $str_parts[$i];
8310
            }
8311 2
        } elseif ($format === 2) {
8312 2
            $number_of_words = [];
8313 2
            $offset = (int) self::strlen($str_parts[0]);
8314 2
            for ($i = 1; $i < $len; $i += 2) {
8315 2
                $number_of_words[$offset] = $str_parts[$i];
8316 2
                $offset += (int) self::strlen($str_parts[$i]) + (int) self::strlen($str_parts[$i + 1]);
8317
            }
8318
        } else {
8319 2
            $number_of_words = (int) (($len - 1) / 2);
8320
        }
8321
8322 2
        return $number_of_words;
8323
    }
8324
8325
    /**
8326
     * Case-insensitive string comparison.
8327
     *
8328
     * INFO: Case-insensitive version of UTF8::strcmp()
8329
     *
8330
     * @param string $str1     <p>The first string.</p>
8331
     * @param string $str2     <p>The second string.</p>
8332
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8333
     *
8334
     * @return int
8335
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
8336
     *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
8337
     *             <strong>0</strong> if they are equal
8338
     */
8339
    public static function strcasecmp(
8340
        string $str1,
8341
        string $str2,
8342
        string $encoding = 'UTF-8'
8343
    ): int {
8344 23
        return self::strcmp(
8345 23
            self::strtocasefold(
8346 23
                $str1,
8347 23
                true,
8348 23
                false,
8349 23
                $encoding,
8350 23
                null,
8351 23
                false
8352
            ),
8353 23
            self::strtocasefold(
8354 23
                $str2,
8355 23
                true,
8356 23
                false,
8357 23
                $encoding,
8358 23
                null,
8359 23
                false
8360
            )
8361
        );
8362
    }
8363
8364
    /**
8365
     * alias for "UTF8::strstr()"
8366
     *
8367
     * @param string $haystack
8368
     * @param string $needle
8369
     * @param bool   $before_needle
8370
     * @param string $encoding
8371
     * @param bool   $clean_utf8
8372
     *
8373
     * @return false|string
8374
     *
8375
     * @see UTF8::strstr()
8376
     * @deprecated <p>please use "UTF8::strstr()"</p>
8377
     */
8378
    public static function strchr(
8379
        string $haystack,
8380
        string $needle,
8381
        bool $before_needle = false,
8382
        string $encoding = 'UTF-8',
8383
        bool $clean_utf8 = false
8384
    ) {
8385 2
        return self::strstr(
8386 2
            $haystack,
8387 2
            $needle,
8388 2
            $before_needle,
8389 2
            $encoding,
8390 2
            $clean_utf8
8391
        );
8392
    }
8393
8394
    /**
8395
     * Case-sensitive string comparison.
8396
     *
8397
     * @param string $str1 <p>The first string.</p>
8398
     * @param string $str2 <p>The second string.</p>
8399
     *
8400
     * @return int
8401
     *             <strong>&lt; 0</strong> if str1 is less than str2<br>
8402
     *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
8403
     *             <strong>0</strong> if they are equal
8404
     */
8405
    public static function strcmp(string $str1, string $str2): int
8406
    {
8407 29
        if ($str1 === $str2) {
8408 21
            return 0;
8409
        }
8410
8411 24
        return \strcmp(
8412 24
            \Normalizer::normalize($str1, \Normalizer::NFD),
8413 24
            \Normalizer::normalize($str2, \Normalizer::NFD)
8414
        );
8415
    }
8416
8417
    /**
8418
     * Find length of initial segment not matching mask.
8419
     *
8420
     * @param string $str
8421
     * @param string $char_list
8422
     * @param int    $offset
8423
     * @param int    $length
8424
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
8425
     *
8426
     * @return int
8427
     */
8428
    public static function strcspn(
8429
        string $str,
8430
        string $char_list,
8431
        int $offset = null,
8432
        int $length = null,
8433
        string $encoding = 'UTF-8'
8434
    ): int {
8435 12
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8436
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8437
        }
8438
8439 12
        if ($char_list === '') {
8440 2
            return (int) self::strlen($str, $encoding);
8441
        }
8442
8443 11
        if ($offset !== null || $length !== null) {
8444 3
            if ($encoding === 'UTF-8') {
8445 3
                if ($length === null) {
8446
                    /** @noinspection UnnecessaryCastingInspection */
8447 2
                    $str_tmp = \mb_substr($str, (int) $offset);
8448
                } else {
8449
                    /** @noinspection UnnecessaryCastingInspection */
8450 3
                    $str_tmp = \mb_substr($str, (int) $offset, $length);
8451
                }
8452
            } else {
8453
                /** @noinspection UnnecessaryCastingInspection */
8454
                $str_tmp = self::substr($str, (int) $offset, $length, $encoding);
8455
            }
8456
8457 3
            if ($str_tmp === false) {
8458
                return 0;
8459
            }
8460
8461
            /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
8462 3
            $str = $str_tmp;
8463
        }
8464
8465 11
        if ($str === '') {
8466 2
            return 0;
8467
        }
8468
8469 10
        $matches = [];
8470 10
        if (\preg_match('/^(.*?)' . self::rxClass($char_list) . '/us', $str, $matches)) {
8471 9
            $return = self::strlen($matches[1], $encoding);
8472 9
            if ($return === false) {
8473
                return 0;
8474
            }
8475
8476 9
            return $return;
8477
        }
8478
8479 2
        return (int) self::strlen($str, $encoding);
8480
    }
8481
8482
    /**
8483
     * alias for "UTF8::stristr()"
8484
     *
8485
     * @param string $haystack
8486
     * @param string $needle
8487
     * @param bool   $before_needle
8488
     * @param string $encoding
8489
     * @param bool   $clean_utf8
8490
     *
8491
     * @return false|string
8492
     *
8493
     * @see UTF8::stristr()
8494
     * @deprecated <p>please use "UTF8::stristr()"</p>
8495
     */
8496
    public static function strichr(
8497
        string $haystack,
8498
        string $needle,
8499
        bool $before_needle = false,
8500
        string $encoding = 'UTF-8',
8501
        bool $clean_utf8 = false
8502
    ) {
8503 1
        return self::stristr(
8504 1
            $haystack,
8505 1
            $needle,
8506 1
            $before_needle,
8507 1
            $encoding,
8508 1
            $clean_utf8
8509
        );
8510
    }
8511
8512
    /**
8513
     * Create a UTF-8 string from code points.
8514
     *
8515
     * INFO: opposite to UTF8::codepoints()
8516
     *
8517
     * @param array $array <p>Integer or Hexadecimal codepoints.</p>
8518
     *
8519
     * @return string UTF-8 encoded string
8520
     */
8521
    public static function string(array $array): string
8522
    {
8523 4
        return \implode(
8524 4
            '',
8525 4
            \array_map(
8526
                [
8527 4
                    self::class,
8528
                    'chr',
8529
                ],
8530 4
                $array
8531
            )
8532
        );
8533
    }
8534
8535
    /**
8536
     * Checks if string starts with "BOM" (Byte Order Mark Character) character.
8537
     *
8538
     * @param string $str <p>The input string.</p>
8539
     *
8540
     * @return bool
8541
     *              <strong>true</strong> if the string has BOM at the start,<br>
8542
     *              <strong>false</strong> otherwise
8543
     */
8544
    public static function string_has_bom(string $str): bool
8545
    {
8546
        /** @noinspection PhpUnusedLocalVariableInspection */
8547 6
        foreach (self::$BOM as $bom_string => &$bom_byte_length) {
8548 6
            if (\strpos($str, $bom_string) === 0) {
8549 6
                return true;
8550
            }
8551
        }
8552
8553 6
        return false;
8554
    }
8555
8556
    /**
8557
     * Strip HTML and PHP tags from a string + clean invalid UTF-8.
8558
     *
8559
     * @see http://php.net/manual/en/function.strip-tags.php
8560
     *
8561
     * @param string $str            <p>
8562
     *                               The input string.
8563
     *                               </p>
8564
     * @param string $allowable_tags [optional] <p>
8565
     *                               You can use the optional second parameter to specify tags which should
8566
     *                               not be stripped.
8567
     *                               </p>
8568
     *                               <p>
8569
     *                               HTML comments and PHP tags are also stripped. This is hardcoded and
8570
     *                               can not be changed with allowable_tags.
8571
     *                               </p>
8572
     * @param bool   $clean_utf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
8573
     *
8574
     * @return string
8575
     *                <p>The stripped string.</p>
8576
     */
8577
    public static function strip_tags(
8578
        string $str,
8579
        string $allowable_tags = null,
8580
        bool $clean_utf8 = false
8581
    ): string {
8582 4
        if ($str === '') {
8583 1
            return '';
8584
        }
8585
8586 4
        if ($clean_utf8 === true) {
8587 2
            $str = self::clean($str);
8588
        }
8589
8590 4
        if ($allowable_tags === null) {
8591 4
            return \strip_tags($str);
8592
        }
8593
8594 2
        return \strip_tags($str, $allowable_tags);
8595
    }
8596
8597
    /**
8598
     * Strip all whitespace characters. This includes tabs and newline
8599
     * characters, as well as multibyte whitespace such as the thin space
8600
     * and ideographic space.
8601
     *
8602
     * @param string $str
8603
     *
8604
     * @return string
8605
     */
8606
    public static function strip_whitespace(string $str): string
8607
    {
8608 36
        if ($str === '') {
8609 3
            return '';
8610
        }
8611
8612 33
        return (string) \preg_replace('/[[:space:]]+/u', '', $str);
8613
    }
8614
8615
    /**
8616
     * Find the position of the first occurrence of a substring in a string, case-insensitive.
8617
     *
8618
     * @see http://php.net/manual/en/function.mb-stripos.php
8619
     *
8620
     * @param string $haystack   <p>The string from which to get the position of the first occurrence of needle.</p>
8621
     * @param string $needle     <p>The string to find in haystack.</p>
8622
     * @param int    $offset     [optional] <p>The position in haystack to start searching.</p>
8623
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
8624
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8625
     *
8626
     * @return false|int
8627
     *                   Return the <strong>(int)</strong> numeric position of the first occurrence of needle in the
8628
     *                   haystack string,<br> or <strong>false</strong> if needle is not found
8629
     */
8630
    public static function stripos(
8631
        string $haystack,
8632
        string $needle,
8633
        int $offset = 0,
8634
        $encoding = 'UTF-8',
8635
        bool $clean_utf8 = false
8636
    ) {
8637 24
        if ($haystack === '' || $needle === '') {
8638 5
            return false;
8639
        }
8640
8641 23
        if ($clean_utf8 === true) {
8642
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8643
            // if invalid characters are found in $haystack before $needle
8644 1
            $haystack = self::clean($haystack);
8645 1
            $needle = self::clean($needle);
8646
        }
8647
8648 23
        if (self::$SUPPORT['mbstring'] === true) {
8649 23
            if ($encoding === 'UTF-8') {
8650 23
                return \mb_stripos($haystack, $needle, $offset);
8651
            }
8652
8653 3
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8654
8655 3
            return \mb_stripos($haystack, $needle, $offset, $encoding);
8656
        }
8657
8658 2
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
8659
8660
        if (
8661 2
            $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
8662
            &&
8663 2
            $offset >= 0 // grapheme_stripos() can't handle negative offset
8664
            &&
8665 2
            self::$SUPPORT['intl'] === true
8666
        ) {
8667
            $return_tmp = \grapheme_stripos($haystack, $needle, $offset);
8668
            if ($return_tmp !== false) {
8669
                return $return_tmp;
8670
            }
8671
        }
8672
8673
        //
8674
        // fallback for ascii only
8675
        //
8676
8677 2
        if (ASCII::is_ascii($haystack . $needle)) {
8678
            return \stripos($haystack, $needle, $offset);
8679
        }
8680
8681
        //
8682
        // fallback via vanilla php
8683
        //
8684
8685 2
        $haystack = self::strtocasefold($haystack, true, false, $encoding, null, false);
8686 2
        $needle = self::strtocasefold($needle, true, false, $encoding, null, false);
8687
8688 2
        return self::strpos($haystack, $needle, $offset, $encoding);
8689
    }
8690
8691
    /**
8692
     * Returns all of haystack starting from and including the first occurrence of needle to the end.
8693
     *
8694
     * @param string $haystack      <p>The input string. Must be valid UTF-8.</p>
8695
     * @param string $needle        <p>The string to look for. Must be valid UTF-8.</p>
8696
     * @param bool   $before_needle [optional] <p>
8697
     *                              If <b>TRUE</b>, it returns the part of the
8698
     *                              haystack before the first occurrence of the needle (excluding the needle).
8699
     *                              </p>
8700
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
8701
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
8702
     *
8703
     * @return false|string
8704
     *                      <p>A sub-string,<br>or <strong>false</strong> if needle is not found.</p>
8705
     */
8706
    public static function stristr(
8707
        string $haystack,
8708
        string $needle,
8709
        bool $before_needle = false,
8710
        string $encoding = 'UTF-8',
8711
        bool $clean_utf8 = false
8712
    ) {
8713 12
        if ($haystack === '' || $needle === '') {
8714 3
            return false;
8715
        }
8716
8717 9
        if ($clean_utf8 === true) {
8718
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8719
            // if invalid characters are found in $haystack before $needle
8720 1
            $needle = self::clean($needle);
8721 1
            $haystack = self::clean($haystack);
8722
        }
8723
8724 9
        if (!$needle) {
8725
            return $haystack;
8726
        }
8727
8728 9
        if (self::$SUPPORT['mbstring'] === true) {
8729 9
            if ($encoding === 'UTF-8') {
8730 9
                return \mb_stristr($haystack, $needle, $before_needle);
8731
            }
8732
8733 1
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8734
8735 1
            return \mb_stristr($haystack, $needle, $before_needle, $encoding);
8736
        }
8737
8738
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
8739
8740
        if (
8741
            $encoding !== 'UTF-8'
8742
            &&
8743
            self::$SUPPORT['mbstring'] === false
8744
        ) {
8745
            \trigger_error('UTF8::stristr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
8746
        }
8747
8748
        if (
8749
            $encoding === 'UTF-8' // INFO: "grapheme_stristr()" can't handle other encodings
8750
            &&
8751
            self::$SUPPORT['intl'] === true
8752
        ) {
8753
            $return_tmp = \grapheme_stristr($haystack, $needle, $before_needle);
8754
            if ($return_tmp !== false) {
8755
                return $return_tmp;
8756
            }
8757
        }
8758
8759
        if (ASCII::is_ascii($needle . $haystack)) {
8760
            return \stristr($haystack, $needle, $before_needle);
8761
        }
8762
8763
        \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/usi', $haystack, $match);
8764
8765
        if (!isset($match[1])) {
8766
            return false;
8767
        }
8768
8769
        if ($before_needle) {
8770
            return $match[1];
8771
        }
8772
8773
        return self::substr($haystack, (int) self::strlen($match[1], $encoding), null, $encoding);
8774
    }
8775
8776
    /**
8777
     * Get the string length, not the byte-length!
8778
     *
8779
     * @see http://php.net/manual/en/function.mb-strlen.php
8780
     *
8781
     * @param string $str        <p>The string being checked for length.</p>
8782
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
8783
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8784
     *
8785
     * @return false|int
8786
     *                   <p>
8787
     *                   The number <strong>(int)</strong> of characters in the string $str having character encoding
8788
     *                   $encoding.
8789
     *                   (One multi-byte character counted as +1).
8790
     *                   <br>
8791
     *                   Can return <strong>false</strong>, if e.g. mbstring is not installed and we process invalid
8792
     *                   chars.
8793
     *                   </p>
8794
     */
8795
    public static function strlen(
8796
        string $str,
8797
        string $encoding = 'UTF-8',
8798
        bool $clean_utf8 = false
8799
    ) {
8800 173
        if ($str === '') {
8801 21
            return 0;
8802
        }
8803
8804 171
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8805 12
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8806
        }
8807
8808 171
        if ($clean_utf8 === true) {
8809
            // "mb_strlen" and "\iconv_strlen" returns wrong length,
8810
            // if invalid characters are found in $str
8811 4
            $str = self::clean($str);
8812
        }
8813
8814
        //
8815
        // fallback via mbstring
8816
        //
8817
8818 171
        if (self::$SUPPORT['mbstring'] === true) {
8819 165
            if ($encoding === 'UTF-8') {
8820 165
                return \mb_strlen($str);
8821
            }
8822
8823 4
            return \mb_strlen($str, $encoding);
8824
        }
8825
8826
        //
8827
        // fallback for binary || ascii only
8828
        //
8829
8830
        if (
8831 8
            $encoding === 'CP850'
8832
            ||
8833 8
            $encoding === 'ASCII'
8834
        ) {
8835
            return \strlen($str);
8836
        }
8837
8838
        if (
8839 8
            $encoding !== 'UTF-8'
8840
            &&
8841 8
            self::$SUPPORT['mbstring'] === false
8842
            &&
8843 8
            self::$SUPPORT['iconv'] === false
8844
        ) {
8845 2
            \trigger_error('UTF8::strlen() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
8846
        }
8847
8848
        //
8849
        // fallback via iconv
8850
        //
8851
8852 8
        if (self::$SUPPORT['iconv'] === true) {
8853
            $return_tmp = \iconv_strlen($str, $encoding);
8854
            if ($return_tmp !== false) {
8855
                return $return_tmp;
8856
            }
8857
        }
8858
8859
        //
8860
        // fallback via intl
8861
        //
8862
8863
        if (
8864 8
            $encoding === 'UTF-8' // INFO: "grapheme_strlen()" can't handle other encodings
8865
            &&
8866 8
            self::$SUPPORT['intl'] === true
8867
        ) {
8868
            $return_tmp = \grapheme_strlen($str);
8869
            if ($return_tmp !== null) {
8870
                return $return_tmp;
8871
            }
8872
        }
8873
8874
        //
8875
        // fallback for ascii only
8876
        //
8877
8878 8
        if (ASCII::is_ascii($str)) {
8879 4
            return \strlen($str);
8880
        }
8881
8882
        //
8883
        // fallback via vanilla php
8884
        //
8885
8886 8
        \preg_match_all('/./us', $str, $parts);
8887
8888 8
        $return_tmp = \count($parts[0]);
8889 8
        if ($return_tmp === 0) {
8890
            return false;
8891
        }
8892
8893 8
        return $return_tmp;
8894
    }
8895
8896
    /**
8897
     * Get string length in byte.
8898
     *
8899
     * @param string $str
8900
     *
8901
     * @return int
8902
     */
8903
    public static function strlen_in_byte(string $str): int
8904
    {
8905
        if ($str === '') {
8906
            return 0;
8907
        }
8908
8909
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
8910
            // "mb_" is available if overload is used, so use it ...
8911
            return \mb_strlen($str, 'CP850'); // 8-BIT
8912
        }
8913
8914
        return \strlen($str);
8915
    }
8916
8917
    /**
8918
     * Case-insensitive string comparisons using a "natural order" algorithm.
8919
     *
8920
     * INFO: natural order version of UTF8::strcasecmp()
8921
     *
8922
     * @param string $str1     <p>The first string.</p>
8923
     * @param string $str2     <p>The second string.</p>
8924
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8925
     *
8926
     * @return int
8927
     *             <strong>&lt; 0</strong> if str1 is less than str2<br>
8928
     *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
8929
     *             <strong>0</strong> if they are equal
8930
     */
8931
    public static function strnatcasecmp(string $str1, string $str2, string $encoding = 'UTF-8'): int
8932
    {
8933 2
        return self::strnatcmp(
8934 2
            self::strtocasefold($str1, true, false, $encoding, null, false),
8935 2
            self::strtocasefold($str2, true, false, $encoding, null, false)
8936
        );
8937
    }
8938
8939
    /**
8940
     * String comparisons using a "natural order" algorithm
8941
     *
8942
     * INFO: natural order version of UTF8::strcmp()
8943
     *
8944
     * @see http://php.net/manual/en/function.strnatcmp.php
8945
     *
8946
     * @param string $str1 <p>The first string.</p>
8947
     * @param string $str2 <p>The second string.</p>
8948
     *
8949
     * @return int
8950
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
8951
     *             <strong>&gt; 0</strong> if str1 is greater than str2;<br>
8952
     *             <strong>0</strong> if they are equal
8953
     */
8954
    public static function strnatcmp(string $str1, string $str2): int
8955
    {
8956 4
        if ($str1 === $str2) {
8957 4
            return 0;
8958
        }
8959
8960 4
        return \strnatcmp(
8961 4
            (string) self::strtonatfold($str1),
8962 4
            (string) self::strtonatfold($str2)
8963
        );
8964
    }
8965
8966
    /**
8967
     * Case-insensitive string comparison of the first n characters.
8968
     *
8969
     * @see http://php.net/manual/en/function.strncasecmp.php
8970
     *
8971
     * @param string $str1     <p>The first string.</p>
8972
     * @param string $str2     <p>The second string.</p>
8973
     * @param int    $len      <p>The length of strings to be used in the comparison.</p>
8974
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8975
     *
8976
     * @return int
8977
     *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
8978
     *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
8979
     *             <strong>0</strong> if they are equal
8980
     */
8981
    public static function strncasecmp(
8982
        string $str1,
8983
        string $str2,
8984
        int $len,
8985
        string $encoding = 'UTF-8'
8986
    ): int {
8987 2
        return self::strncmp(
8988 2
            self::strtocasefold($str1, true, false, $encoding, null, false),
8989 2
            self::strtocasefold($str2, true, false, $encoding, null, false),
8990 2
            $len
8991
        );
8992
    }
8993
8994
    /**
8995
     * String comparison of the first n characters.
8996
     *
8997
     * @see http://php.net/manual/en/function.strncmp.php
8998
     *
8999
     * @param string $str1     <p>The first string.</p>
9000
     * @param string $str2     <p>The second string.</p>
9001
     * @param int    $len      <p>Number of characters to use in the comparison.</p>
9002
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
9003
     *
9004
     * @return int
9005
     *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
9006
     *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
9007
     *             <strong>0</strong> if they are equal
9008
     */
9009
    public static function strncmp(
9010
        string $str1,
9011
        string $str2,
9012
        int $len,
9013
        string $encoding = 'UTF-8'
9014
    ): int {
9015 4
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9016
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9017
        }
9018
9019 4
        if ($encoding === 'UTF-8') {
9020 4
            $str1 = (string) \mb_substr($str1, 0, $len);
9021 4
            $str2 = (string) \mb_substr($str2, 0, $len);
9022
        } else {
9023
            $str1 = (string) self::substr($str1, 0, $len, $encoding);
9024
            $str2 = (string) self::substr($str2, 0, $len, $encoding);
9025
        }
9026
9027 4
        return self::strcmp($str1, $str2);
9028
    }
9029
9030
    /**
9031
     * Search a string for any of a set of characters.
9032
     *
9033
     * @see http://php.net/manual/en/function.strpbrk.php
9034
     *
9035
     * @param string $haystack  <p>The string where char_list is looked for.</p>
9036
     * @param string $char_list <p>This parameter is case-sensitive.</p>
9037
     *
9038
     * @return false|string string starting from the character found, or false if it is not found
9039
     */
9040
    public static function strpbrk(string $haystack, string $char_list)
9041
    {
9042 2
        if ($haystack === '' || $char_list === '') {
9043 2
            return false;
9044
        }
9045
9046 2
        if (\preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
9047 2
            return \substr($haystack, (int) \strpos($haystack, $m[0]));
9048
        }
9049
9050 2
        return false;
9051
    }
9052
9053
    /**
9054
     * Find the position of the first occurrence of a substring in a string.
9055
     *
9056
     * @see http://php.net/manual/en/function.mb-strpos.php
9057
     *
9058
     * @param string     $haystack   <p>The string from which to get the position of the first occurrence of needle.</p>
9059
     * @param int|string $needle     <p>The string to find in haystack.<br>Or a code point as int.</p>
9060
     * @param int        $offset     [optional] <p>The search offset. If it is not specified, 0 is used.</p>
9061
     * @param string     $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
9062
     * @param bool       $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9063
     *
9064
     * @return false|int
9065
     *                   The <strong>(int)</strong> numeric position of the first occurrence of needle in the haystack
9066
     *                   string.<br> If needle is not found it returns false.
9067
     */
9068
    public static function strpos(
9069
        string $haystack,
9070
        $needle,
9071
        int $offset = 0,
9072
        $encoding = 'UTF-8',
9073
        bool $clean_utf8 = false
9074
    ) {
9075 53
        if ($haystack === '') {
9076 4
            return false;
9077
        }
9078
9079
        // iconv and mbstring do not support integer $needle
9080 52
        if ((int) $needle === $needle) {
9081
            $needle = (string) self::chr($needle);
9082
        }
9083 52
        $needle = (string) $needle;
9084
9085 52
        if ($needle === '') {
9086 2
            return false;
9087
        }
9088
9089 52
        if ($clean_utf8 === true) {
9090
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9091
            // if invalid characters are found in $haystack before $needle
9092 3
            $needle = self::clean($needle);
9093 3
            $haystack = self::clean($haystack);
9094
        }
9095
9096 52
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9097 11
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9098
        }
9099
9100
        //
9101
        // fallback via mbstring
9102
        //
9103
9104 52
        if (self::$SUPPORT['mbstring'] === true) {
9105 50
            if ($encoding === 'UTF-8') {
9106 50
                return \mb_strpos($haystack, $needle, $offset);
9107
            }
9108
9109 2
            return \mb_strpos($haystack, $needle, $offset, $encoding);
9110
        }
9111
9112
        //
9113
        // fallback for binary || ascii only
9114
        //
9115
        if (
9116 4
            $encoding === 'CP850'
9117
            ||
9118 4
            $encoding === 'ASCII'
9119
        ) {
9120 2
            return \strpos($haystack, $needle, $offset);
9121
        }
9122
9123
        if (
9124 4
            $encoding !== 'UTF-8'
9125
            &&
9126 4
            self::$SUPPORT['iconv'] === false
9127
            &&
9128 4
            self::$SUPPORT['mbstring'] === false
9129
        ) {
9130 2
            \trigger_error('UTF8::strpos() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9131
        }
9132
9133
        //
9134
        // fallback via intl
9135
        //
9136
9137
        if (
9138 4
            $encoding === 'UTF-8' // INFO: "grapheme_strpos()" can't handle other encodings
9139
            &&
9140 4
            $offset >= 0 // grapheme_strpos() can't handle negative offset
9141
            &&
9142 4
            self::$SUPPORT['intl'] === true
9143
        ) {
9144
            $return_tmp = \grapheme_strpos($haystack, $needle, $offset);
9145
            if ($return_tmp !== false) {
9146
                return $return_tmp;
9147
            }
9148
        }
9149
9150
        //
9151
        // fallback via iconv
9152
        //
9153
9154
        if (
9155 4
            $offset >= 0 // iconv_strpos() can't handle negative offset
9156
            &&
9157 4
            self::$SUPPORT['iconv'] === true
9158
        ) {
9159
            // ignore invalid negative offset to keep compatibility
9160
            // with php < 5.5.35, < 5.6.21, < 7.0.6
9161
            $return_tmp = \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
9162
            if ($return_tmp !== false) {
9163
                return $return_tmp;
9164
            }
9165
        }
9166
9167
        //
9168
        // fallback for ascii only
9169
        //
9170
9171 4
        if (ASCII::is_ascii($haystack . $needle)) {
9172 2
            return \strpos($haystack, $needle, $offset);
9173
        }
9174
9175
        //
9176
        // fallback via vanilla php
9177
        //
9178
9179 4
        $haystack_tmp = self::substr($haystack, $offset, null, $encoding);
9180 4
        if ($haystack_tmp === false) {
9181
            $haystack_tmp = '';
9182
        }
9183 4
        $haystack = (string) $haystack_tmp;
9184
9185 4
        if ($offset < 0) {
9186
            $offset = 0;
9187
        }
9188
9189 4
        $pos = \strpos($haystack, $needle);
9190 4
        if ($pos === false) {
9191 2
            return false;
9192
        }
9193
9194 4
        if ($pos) {
9195 4
            return $offset + (int) self::strlen(\substr($haystack, 0, $pos), $encoding);
9196
        }
9197
9198 2
        return $offset + 0;
9199
    }
9200
9201
    /**
9202
     * Find the position of the first occurrence of a substring in a string.
9203
     *
9204
     * @param string $haystack <p>
9205
     *                         The string being checked.
9206
     *                         </p>
9207
     * @param string $needle   <p>
9208
     *                         The position counted from the beginning of haystack.
9209
     *                         </p>
9210
     * @param int    $offset   [optional] <p>
9211
     *                         The search offset. If it is not specified, 0 is used.
9212
     *                         </p>
9213
     *
9214
     * @return false|int The numeric position of the first occurrence of needle in the
9215
     *                   haystack string. If needle is not found, it returns false.
9216
     */
9217
    public static function strpos_in_byte(string $haystack, string $needle, int $offset = 0)
9218
    {
9219
        if ($haystack === '' || $needle === '') {
9220
            return false;
9221
        }
9222
9223
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
9224
            // "mb_" is available if overload is used, so use it ...
9225
            return \mb_strpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
9226
        }
9227
9228
        return \strpos($haystack, $needle, $offset);
9229
    }
9230
9231
    /**
9232
     * Find the last occurrence of a character in a string within another.
9233
     *
9234
     * @see http://php.net/manual/en/function.mb-strrchr.php
9235
     *
9236
     * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
9237
     * @param string $needle        <p>The string to find in haystack</p>
9238
     * @param bool   $before_needle [optional] <p>
9239
     *                              Determines which portion of haystack
9240
     *                              this function returns.
9241
     *                              If set to true, it returns all of haystack
9242
     *                              from the beginning to the last occurrence of needle.
9243
     *                              If set to false, it returns all of haystack
9244
     *                              from the last occurrence of needle to the end,
9245
     *                              </p>
9246
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
9247
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
9248
     *
9249
     * @return false|string the portion of haystack or false if needle is not found
9250
     */
9251
    public static function strrchr(
9252
        string $haystack,
9253
        string $needle,
9254
        bool $before_needle = false,
9255
        string $encoding = 'UTF-8',
9256
        bool $clean_utf8 = false
9257
    ) {
9258 2
        if ($haystack === '' || $needle === '') {
9259 2
            return false;
9260
        }
9261
9262 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9263 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9264
        }
9265
9266 2
        if ($clean_utf8 === true) {
9267
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9268
            // if invalid characters are found in $haystack before $needle
9269 2
            $needle = self::clean($needle);
9270 2
            $haystack = self::clean($haystack);
9271
        }
9272
9273
        //
9274
        // fallback via mbstring
9275
        //
9276
9277 2
        if (self::$SUPPORT['mbstring'] === true) {
9278 2
            if ($encoding === 'UTF-8') {
9279 2
                return \mb_strrchr($haystack, $needle, $before_needle);
9280
            }
9281
9282 2
            return \mb_strrchr($haystack, $needle, $before_needle, $encoding);
9283
        }
9284
9285
        //
9286
        // fallback for binary || ascii only
9287
        //
9288
9289
        if (
9290
            $before_needle === false
9291
            &&
9292
            (
9293
                $encoding === 'CP850'
9294
                ||
9295
                $encoding === 'ASCII'
9296
            )
9297
        ) {
9298
            return \strrchr($haystack, $needle);
9299
        }
9300
9301
        if (
9302
            $encoding !== 'UTF-8'
9303
            &&
9304
            self::$SUPPORT['mbstring'] === false
9305
        ) {
9306
            \trigger_error('UTF8::strrchr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9307
        }
9308
9309
        //
9310
        // fallback via iconv
9311
        //
9312
9313
        if (self::$SUPPORT['iconv'] === true) {
9314
            $needle_tmp = self::substr($needle, 0, 1, $encoding);
9315
            if ($needle_tmp === false) {
9316
                return false;
9317
            }
9318
            $needle = (string) $needle_tmp;
9319
9320
            $pos = \iconv_strrpos($haystack, $needle, $encoding);
9321
            if ($pos === false) {
9322
                return false;
9323
            }
9324
9325
            if ($before_needle) {
9326
                return self::substr($haystack, 0, $pos, $encoding);
9327
            }
9328
9329
            return self::substr($haystack, $pos, null, $encoding);
9330
        }
9331
9332
        //
9333
        // fallback via vanilla php
9334
        //
9335
9336
        $needle_tmp = self::substr($needle, 0, 1, $encoding);
9337
        if ($needle_tmp === false) {
9338
            return false;
9339
        }
9340
        $needle = (string) $needle_tmp;
9341
9342
        $pos = self::strrpos($haystack, $needle, 0, $encoding);
9343
        if ($pos === false) {
9344
            return false;
9345
        }
9346
9347
        if ($before_needle) {
9348
            return self::substr($haystack, 0, $pos, $encoding);
9349
        }
9350
9351
        return self::substr($haystack, $pos, null, $encoding);
9352
    }
9353
9354
    /**
9355
     * Reverses characters order in the string.
9356
     *
9357
     * @param string $str      <p>The input string.</p>
9358
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
9359
     *
9360
     * @return string the string with characters in the reverse sequence
9361
     */
9362
    public static function strrev(string $str, string $encoding = 'UTF-8'): string
9363
    {
9364 10
        if ($str === '') {
9365 4
            return '';
9366
        }
9367
9368
        // init
9369 8
        $reversed = '';
9370
9371 8
        $str = self::emoji_encode($str, true);
9372
9373 8
        if ($encoding === 'UTF-8') {
9374 8
            if (self::$SUPPORT['intl'] === true) {
9375
                // try "grapheme" first: https://stackoverflow.com/questions/17496493/strrev-dosent-support-utf-8
9376 8
                $i = (int) \grapheme_strlen($str);
9377 8
                while ($i--) {
9378 8
                    $reversed_tmp = \grapheme_substr($str, $i, 1);
9379 8
                    if ($reversed_tmp !== false) {
9380 8
                        $reversed .= $reversed_tmp;
9381
                    }
9382
                }
9383
            } else {
9384
                $i = (int) \mb_strlen($str);
9385 8
                while ($i--) {
9386
                    $reversed_tmp = \mb_substr($str, $i, 1);
9387
                    if ($reversed_tmp !== false) {
9388
                        $reversed .= $reversed_tmp;
9389
                    }
9390
                }
9391
            }
9392
        } else {
9393
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9394
9395
            $i = (int) self::strlen($str, $encoding);
9396
            while ($i--) {
9397
                $reversed_tmp = self::substr($str, $i, 1, $encoding);
9398
                if ($reversed_tmp !== false) {
9399
                    $reversed .= $reversed_tmp;
9400
                }
9401
            }
9402
        }
9403
9404 8
        return self::emoji_decode($reversed, true);
9405
    }
9406
9407
    /**
9408
     * Find the last occurrence of a character in a string within another, case-insensitive.
9409
     *
9410
     * @see http://php.net/manual/en/function.mb-strrichr.php
9411
     *
9412
     * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
9413
     * @param string $needle        <p>The string to find in haystack.</p>
9414
     * @param bool   $before_needle [optional] <p>
9415
     *                              Determines which portion of haystack
9416
     *                              this function returns.
9417
     *                              If set to true, it returns all of haystack
9418
     *                              from the beginning to the last occurrence of needle.
9419
     *                              If set to false, it returns all of haystack
9420
     *                              from the last occurrence of needle to the end,
9421
     *                              </p>
9422
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
9423
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
9424
     *
9425
     * @return false|string the portion of haystack or<br>false if needle is not found
9426
     */
9427
    public static function strrichr(
9428
        string $haystack,
9429
        string $needle,
9430
        bool $before_needle = false,
9431
        string $encoding = 'UTF-8',
9432
        bool $clean_utf8 = false
9433
    ) {
9434 3
        if ($haystack === '' || $needle === '') {
9435 2
            return false;
9436
        }
9437
9438 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9439 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9440
        }
9441
9442 3
        if ($clean_utf8 === true) {
9443
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9444
            // if invalid characters are found in $haystack before $needle
9445 2
            $needle = self::clean($needle);
9446 2
            $haystack = self::clean($haystack);
9447
        }
9448
9449
        //
9450
        // fallback via mbstring
9451
        //
9452
9453 3
        if (self::$SUPPORT['mbstring'] === true) {
9454 3
            if ($encoding === 'UTF-8') {
9455 3
                return \mb_strrichr($haystack, $needle, $before_needle);
9456
            }
9457
9458 2
            return \mb_strrichr($haystack, $needle, $before_needle, $encoding);
9459
        }
9460
9461
        //
9462
        // fallback via vanilla php
9463
        //
9464
9465
        $needle_tmp = self::substr($needle, 0, 1, $encoding);
9466
        if ($needle_tmp === false) {
9467
            return false;
9468
        }
9469
        $needle = (string) $needle_tmp;
9470
9471
        $pos = self::strripos($haystack, $needle, 0, $encoding);
9472
        if ($pos === false) {
9473
            return false;
9474
        }
9475
9476
        if ($before_needle) {
9477
            return self::substr($haystack, 0, $pos, $encoding);
9478
        }
9479
9480
        return self::substr($haystack, $pos, null, $encoding);
9481
    }
9482
9483
    /**
9484
     * Find the position of the last occurrence of a substring in a string, case-insensitive.
9485
     *
9486
     * @param string     $haystack   <p>The string to look in.</p>
9487
     * @param int|string $needle     <p>The string to look for.</p>
9488
     * @param int        $offset     [optional] <p>Number of characters to ignore in the beginning or end.</p>
9489
     * @param string     $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
9490
     * @param bool       $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9491
     *
9492
     * @return false|int
9493
     *                   <p>The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
9494
     *                   string.<br>If needle is not found, it returns false.</p>
9495
     */
9496
    public static function strripos(
9497
        string $haystack,
9498
        $needle,
9499
        int $offset = 0,
9500
        string $encoding = 'UTF-8',
9501
        bool $clean_utf8 = false
9502
    ) {
9503 3
        if ($haystack === '') {
9504
            return false;
9505
        }
9506
9507
        // iconv and mbstring do not support integer $needle
9508 3
        if ((int) $needle === $needle && $needle >= 0) {
9509
            $needle = (string) self::chr($needle);
9510
        }
9511 3
        $needle = (string) $needle;
9512
9513 3
        if ($needle === '') {
9514
            return false;
9515
        }
9516
9517 3
        if ($clean_utf8 === true) {
9518
            // mb_strripos() && iconv_strripos() is not tolerant to invalid characters
9519 2
            $needle = self::clean($needle);
9520 2
            $haystack = self::clean($haystack);
9521
        }
9522
9523 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9524 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9525
        }
9526
9527
        //
9528
        // fallback via mbstrig
9529
        //
9530
9531 3
        if (self::$SUPPORT['mbstring'] === true) {
9532 3
            if ($encoding === 'UTF-8') {
9533 3
                return \mb_strripos($haystack, $needle, $offset);
9534
            }
9535
9536
            return \mb_strripos($haystack, $needle, $offset, $encoding);
9537
        }
9538
9539
        //
9540
        // fallback for binary || ascii only
9541
        //
9542
9543
        if (
9544
            $encoding === 'CP850'
9545
            ||
9546
            $encoding === 'ASCII'
9547
        ) {
9548
            return \strripos($haystack, $needle, $offset);
9549
        }
9550
9551
        if (
9552
            $encoding !== 'UTF-8'
9553
            &&
9554
            self::$SUPPORT['mbstring'] === false
9555
        ) {
9556
            \trigger_error('UTF8::strripos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9557
        }
9558
9559
        //
9560
        // fallback via intl
9561
        //
9562
9563
        if (
9564
            $encoding === 'UTF-8' // INFO: "grapheme_strripos()" can't handle other encodings
9565
            &&
9566
            $offset >= 0 // grapheme_strripos() can't handle negative offset
9567
            &&
9568
            self::$SUPPORT['intl'] === true
9569
        ) {
9570
            $return_tmp = \grapheme_strripos($haystack, $needle, $offset);
9571
            if ($return_tmp !== false) {
9572
                return $return_tmp;
9573
            }
9574
        }
9575
9576
        //
9577
        // fallback for ascii only
9578
        //
9579
9580
        if (ASCII::is_ascii($haystack . $needle)) {
9581
            return \strripos($haystack, $needle, $offset);
9582
        }
9583
9584
        //
9585
        // fallback via vanilla php
9586
        //
9587
9588
        $haystack = self::strtocasefold($haystack, true, false, $encoding);
9589
        $needle = self::strtocasefold($needle, true, false, $encoding);
9590
9591
        return self::strrpos($haystack, $needle, $offset, $encoding, $clean_utf8);
9592
    }
9593
9594
    /**
9595
     * Finds position of last occurrence of a string within another, case-insensitive.
9596
     *
9597
     * @param string $haystack <p>
9598
     *                         The string from which to get the position of the last occurrence
9599
     *                         of needle.
9600
     *                         </p>
9601
     * @param string $needle   <p>
9602
     *                         The string to find in haystack.
9603
     *                         </p>
9604
     * @param int    $offset   [optional] <p>
9605
     *                         The position in haystack
9606
     *                         to start searching.
9607
     *                         </p>
9608
     *
9609
     * @return false|int
9610
     *                   <p>eturn the numeric position of the last occurrence of needle in the
9611
     *                   haystack string, or false if needle is not found.</p>
9612
     */
9613
    public static function strripos_in_byte(string $haystack, string $needle, int $offset = 0)
9614
    {
9615
        if ($haystack === '' || $needle === '') {
9616
            return false;
9617
        }
9618
9619
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
9620
            // "mb_" is available if overload is used, so use it ...
9621
            return \mb_strripos($haystack, $needle, $offset, 'CP850'); // 8-BIT
9622
        }
9623
9624
        return \strripos($haystack, $needle, $offset);
9625
    }
9626
9627
    /**
9628
     * Find the position of the last occurrence of a substring in a string.
9629
     *
9630
     * @see http://php.net/manual/en/function.mb-strrpos.php
9631
     *
9632
     * @param string     $haystack   <p>The string being checked, for the last occurrence of needle</p>
9633
     * @param int|string $needle     <p>The string to find in haystack.<br>Or a code point as int.</p>
9634
     * @param int        $offset     [optional] <p>May be specified to begin searching an arbitrary number of characters
9635
     *                               into the string. Negative values will stop searching at an arbitrary point prior to
9636
     *                               the end of the string.
9637
     *                               </p>
9638
     * @param string     $encoding   [optional] <p>Set the charset.</p>
9639
     * @param bool       $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9640
     *
9641
     * @return false|int
9642
     *                   <p>The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
9643
     *                   string.<br>If needle is not found, it returns false.</p>
9644
     */
9645
    public static function strrpos(
9646
        string $haystack,
9647
        $needle,
9648
        int $offset = 0,
9649
        string $encoding = 'UTF-8',
9650
        bool $clean_utf8 = false
9651
    ) {
9652 35
        if ($haystack === '') {
9653 3
            return false;
9654
        }
9655
9656
        // iconv and mbstring do not support integer $needle
9657 34
        if ((int) $needle === $needle && $needle >= 0) {
9658 2
            $needle = (string) self::chr($needle);
9659
        }
9660 34
        $needle = (string) $needle;
9661
9662 34
        if ($needle === '') {
9663 2
            return false;
9664
        }
9665
9666 34
        if ($clean_utf8 === true) {
9667
            // mb_strrpos && iconv_strrpos is not tolerant to invalid characters
9668 4
            $needle = self::clean($needle);
9669 4
            $haystack = self::clean($haystack);
9670
        }
9671
9672 34
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9673 8
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9674
        }
9675
9676
        //
9677
        // fallback via mbstring
9678
        //
9679
9680 34
        if (self::$SUPPORT['mbstring'] === true) {
9681 34
            if ($encoding === 'UTF-8') {
9682 34
                return \mb_strrpos($haystack, $needle, $offset);
9683
            }
9684
9685 2
            return \mb_strrpos($haystack, $needle, $offset, $encoding);
9686
        }
9687
9688
        //
9689
        // fallback for binary || ascii only
9690
        //
9691
9692
        if (
9693
            $encoding === 'CP850'
9694
            ||
9695
            $encoding === 'ASCII'
9696
        ) {
9697
            return \strrpos($haystack, $needle, $offset);
9698
        }
9699
9700
        if (
9701
            $encoding !== 'UTF-8'
9702
            &&
9703
            self::$SUPPORT['mbstring'] === false
9704
        ) {
9705
            \trigger_error('UTF8::strrpos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9706
        }
9707
9708
        //
9709
        // fallback via intl
9710
        //
9711
9712
        if (
9713
            $offset >= 0 // grapheme_strrpos() can't handle negative offset
9714
            &&
9715
            $encoding === 'UTF-8' // INFO: "grapheme_strrpos()" can't handle other encodings
9716
            &&
9717
            self::$SUPPORT['intl'] === true
9718
        ) {
9719
            $return_tmp = \grapheme_strrpos($haystack, $needle, $offset);
9720
            if ($return_tmp !== false) {
9721
                return $return_tmp;
9722
            }
9723
        }
9724
9725
        //
9726
        // fallback for ascii only
9727
        //
9728
9729
        if (ASCII::is_ascii($haystack . $needle)) {
9730
            return \strrpos($haystack, $needle, $offset);
9731
        }
9732
9733
        //
9734
        // fallback via vanilla php
9735
        //
9736
9737
        $haystack_tmp = null;
9738
        if ($offset > 0) {
9739
            $haystack_tmp = self::substr($haystack, $offset);
9740
        } elseif ($offset < 0) {
9741
            $haystack_tmp = self::substr($haystack, 0, $offset);
9742
            $offset = 0;
9743
        }
9744
9745
        if ($haystack_tmp !== null) {
9746
            if ($haystack_tmp === false) {
9747
                $haystack_tmp = '';
9748
            }
9749
            $haystack = (string) $haystack_tmp;
9750
        }
9751
9752
        $pos = \strrpos($haystack, $needle);
9753
        if ($pos === false) {
9754
            return false;
9755
        }
9756
9757
        /** @var false|string $str_tmp - needed for PhpStan (stubs error) */
9758
        $str_tmp = \substr($haystack, 0, $pos);
9759
        if ($str_tmp === false) {
9760
            return false;
9761
        }
9762
9763
        return $offset + (int) self::strlen($str_tmp);
9764
    }
9765
9766
    /**
9767
     * Find the position of the last occurrence of a substring in a string.
9768
     *
9769
     * @param string $haystack <p>
9770
     *                         The string being checked, for the last occurrence
9771
     *                         of needle.
9772
     *                         </p>
9773
     * @param string $needle   <p>
9774
     *                         The string to find in haystack.
9775
     *                         </p>
9776
     * @param int    $offset   [optional] <p>May be specified to begin searching an arbitrary number of characters into
9777
     *                         the string. Negative values will stop searching at an arbitrary point
9778
     *                         prior to the end of the string.
9779
     *                         </p>
9780
     *
9781
     * @return false|int
9782
     *                   <p>The numeric position of the last occurrence of needle in the
9783
     *                   haystack string. If needle is not found, it returns false.</p>
9784
     */
9785
    public static function strrpos_in_byte(string $haystack, string $needle, int $offset = 0)
9786
    {
9787
        if ($haystack === '' || $needle === '') {
9788
            return false;
9789
        }
9790
9791
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
9792
            // "mb_" is available if overload is used, so use it ...
9793
            return \mb_strrpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
9794
        }
9795
9796
        return \strrpos($haystack, $needle, $offset);
9797
    }
9798
9799
    /**
9800
     * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
9801
     * mask.
9802
     *
9803
     * @param string $str      <p>The input string.</p>
9804
     * @param string $mask     <p>The mask of chars</p>
9805
     * @param int    $offset   [optional]
9806
     * @param int    $length   [optional]
9807
     * @param string $encoding [optional] <p>Set the charset.</p>
9808
     *
9809
     * @return false|int
9810
     */
9811
    public static function strspn(
9812
        string $str,
9813
        string $mask,
9814
        int $offset = 0,
9815
        int $length = null,
9816
        string $encoding = 'UTF-8'
9817
    ) {
9818 10
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9819
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9820
        }
9821
9822 10
        if ($offset || $length !== null) {
9823 2
            if ($encoding === 'UTF-8') {
9824 2
                if ($length === null) {
9825
                    $str = (string) \mb_substr($str, $offset);
9826
                } else {
9827 2
                    $str = (string) \mb_substr($str, $offset, $length);
9828
                }
9829
            } else {
9830
                $str = (string) self::substr($str, $offset, $length, $encoding);
9831
            }
9832
        }
9833
9834 10
        if ($str === '' || $mask === '') {
9835 2
            return 0;
9836
        }
9837
9838 8
        $matches = [];
9839
9840 8
        return \preg_match('/^' . self::rxClass($mask) . '+/u', $str, $matches) ? (int) self::strlen($matches[0], $encoding) : 0;
9841
    }
9842
9843
    /**
9844
     * Returns part of haystack string from the first occurrence of needle to the end of haystack.
9845
     *
9846
     * @param string $haystack      <p>The input string. Must be valid UTF-8.</p>
9847
     * @param string $needle        <p>The string to look for. Must be valid UTF-8.</p>
9848
     * @param bool   $before_needle [optional] <p>
9849
     *                              If <b>TRUE</b>, strstr() returns the part of the
9850
     *                              haystack before the first occurrence of the needle (excluding the needle).
9851
     *                              </p>
9852
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
9853
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
9854
     *
9855
     * @return false|string
9856
     *                      A sub-string,<br>or <strong>false</strong> if needle is not found
9857
     */
9858
    public static function strstr(
9859
        string $haystack,
9860
        string $needle,
9861
        bool $before_needle = false,
9862
        string $encoding = 'UTF-8',
9863
        $clean_utf8 = false
9864
    ) {
9865 3
        if ($haystack === '' || $needle === '') {
9866 2
            return false;
9867
        }
9868
9869 3
        if ($clean_utf8 === true) {
9870
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9871
            // if invalid characters are found in $haystack before $needle
9872
            $needle = self::clean($needle);
9873
            $haystack = self::clean($haystack);
9874
        }
9875
9876 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9877 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9878
        }
9879
9880
        //
9881
        // fallback via mbstring
9882
        //
9883
9884 3
        if (self::$SUPPORT['mbstring'] === true) {
9885 3
            if ($encoding === 'UTF-8') {
9886 3
                return \mb_strstr($haystack, $needle, $before_needle);
9887
            }
9888
9889 2
            return \mb_strstr($haystack, $needle, $before_needle, $encoding);
9890
        }
9891
9892
        //
9893
        // fallback for binary || ascii only
9894
        //
9895
9896
        if (
9897
            $encoding === 'CP850'
9898
            ||
9899
            $encoding === 'ASCII'
9900
        ) {
9901
            return \strstr($haystack, $needle, $before_needle);
9902
        }
9903
9904
        if (
9905
            $encoding !== 'UTF-8'
9906
            &&
9907
            self::$SUPPORT['mbstring'] === false
9908
        ) {
9909
            \trigger_error('UTF8::strstr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9910
        }
9911
9912
        //
9913
        // fallback via intl
9914
        //
9915
9916
        if (
9917
            $encoding === 'UTF-8' // INFO: "grapheme_strstr()" can't handle other encodings
9918
            &&
9919
            self::$SUPPORT['intl'] === true
9920
        ) {
9921
            $return_tmp = \grapheme_strstr($haystack, $needle, $before_needle);
9922
            if ($return_tmp !== false) {
9923
                return $return_tmp;
9924
            }
9925
        }
9926
9927
        //
9928
        // fallback for ascii only
9929
        //
9930
9931
        if (ASCII::is_ascii($haystack . $needle)) {
9932
            return \strstr($haystack, $needle, $before_needle);
9933
        }
9934
9935
        //
9936
        // fallback via vanilla php
9937
        //
9938
9939
        \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/us', $haystack, $match);
9940
9941
        if (!isset($match[1])) {
9942
            return false;
9943
        }
9944
9945
        if ($before_needle) {
9946
            return $match[1];
9947
        }
9948
9949
        return self::substr($haystack, (int) self::strlen($match[1]));
9950
    }
9951
9952
    /**
9953
     *  * Finds first occurrence of a string within another.
9954
     *
9955
     * @param string $haystack      <p>
9956
     *                              The string from which to get the first occurrence
9957
     *                              of needle.
9958
     *                              </p>
9959
     * @param string $needle        <p>
9960
     *                              The string to find in haystack.
9961
     *                              </p>
9962
     * @param bool   $before_needle [optional] <p>
9963
     *                              Determines which portion of haystack
9964
     *                              this function returns.
9965
     *                              If set to true, it returns all of haystack
9966
     *                              from the beginning to the first occurrence of needle.
9967
     *                              If set to false, it returns all of haystack
9968
     *                              from the first occurrence of needle to the end,
9969
     *                              </p>
9970
     *
9971
     * @return false|string
9972
     *                      <p>The portion of haystack,
9973
     *                      or false if needle is not found.</p>
9974
     */
9975
    public static function strstr_in_byte(
9976
        string $haystack,
9977
        string $needle,
9978
        bool $before_needle = false
9979
    ) {
9980
        if ($haystack === '' || $needle === '') {
9981
            return false;
9982
        }
9983
9984
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
9985
            // "mb_" is available if overload is used, so use it ...
9986
            return \mb_strstr($haystack, $needle, $before_needle, 'CP850'); // 8-BIT
9987
        }
9988
9989
        return \strstr($haystack, $needle, $before_needle);
9990
    }
9991
9992
    /**
9993
     * Unicode transformation for case-less matching.
9994
     *
9995
     * @see http://unicode.org/reports/tr21/tr21-5.html
9996
     *
9997
     * @param string      $str        <p>The input string.</p>
9998
     * @param bool        $full       [optional] <p>
9999
     *                                <b>true</b>, replace full case folding chars (default)<br>
10000
     *                                <b>false</b>, use only limited static array [UTF8::$COMMON_CASE_FOLD]
10001
     *                                </p>
10002
     * @param bool        $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10003
     * @param string      $encoding   [optional] <p>Set the charset.</p>
10004
     * @param string|null $lang       [optional] <p>Set the language for special cases: az, el, lt, tr</p>
10005
     * @param bool        $lower      [optional] <p>Use lowercase string, otherwise use uppercase string. PS: uppercase
10006
     *                                is for some languages better ...</p>
10007
     *
10008
     * @return string
10009
     */
10010
    public static function strtocasefold(
10011
        string $str,
10012
        bool $full = true,
10013
        bool $clean_utf8 = false,
10014
        string $encoding = 'UTF-8',
10015
        string $lang = null,
10016
        $lower = true
10017
    ): string {
10018 32
        if ($str === '') {
10019 5
            return '';
10020
        }
10021
10022 31
        if ($clean_utf8 === true) {
10023
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10024
            // if invalid characters are found in $haystack before $needle
10025 2
            $str = self::clean($str);
10026
        }
10027
10028 31
        $str = self::fixStrCaseHelper($str, $lower, $full);
10029
10030 31
        if ($lang === null && $encoding === 'UTF-8') {
10031 31
            if ($lower === true) {
10032 2
                return \mb_strtolower($str);
10033
            }
10034
10035 29
            return \mb_strtoupper($str);
10036
        }
10037
10038 2
        if ($lower === true) {
10039
            return self::strtolower($str, $encoding, false, $lang);
10040
        }
10041
10042 2
        return self::strtoupper($str, $encoding, false, $lang);
10043
    }
10044
10045
    /**
10046
     * Make a string lowercase.
10047
     *
10048
     * @see http://php.net/manual/en/function.mb-strtolower.php
10049
     *
10050
     * @param string      $str                           <p>The string being lowercased.</p>
10051
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
10052
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
10053
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt, tr</p>
10054
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
10055
     *
10056
     * @return string
10057
     *                <p>String with all alphabetic characters converted to lowercase.</p>
10058
     */
10059
    public static function strtolower(
10060
        $str,
10061
        string $encoding = 'UTF-8',
10062
        bool $clean_utf8 = false,
10063
        string $lang = null,
10064
        bool $try_to_keep_the_string_length = false
10065
    ): string {
10066
        // init
10067 73
        $str = (string) $str;
10068
10069 73
        if ($str === '') {
10070 1
            return '';
10071
        }
10072
10073 72
        if ($clean_utf8 === true) {
10074
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10075
            // if invalid characters are found in $haystack before $needle
10076 2
            $str = self::clean($str);
10077
        }
10078
10079
        // hack for old php version or for the polyfill ...
10080 72
        if ($try_to_keep_the_string_length === true) {
10081
            $str = self::fixStrCaseHelper($str, true);
10082
        }
10083
10084 72
        if ($lang === null && $encoding === 'UTF-8') {
10085 13
            return \mb_strtolower($str);
10086
        }
10087
10088 61
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
10089
10090 61
        if ($lang !== null) {
10091 2
            if (self::$SUPPORT['intl'] === true) {
10092 2
                if (self::$INTL_TRANSLITERATOR_LIST === null) {
10093
                    self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
10094
                }
10095
10096 2
                $language_code = $lang . '-Lower';
10097 2
                if (!\in_array($language_code, self::$INTL_TRANSLITERATOR_LIST, true)) {
10098
                    \trigger_error('UTF8::strtolower() cannot handle special language: ' . $lang . ' | supported: ' . \print_r(self::$INTL_TRANSLITERATOR_LIST, true), \E_USER_WARNING);
10099
10100
                    $language_code = 'Any-Lower';
10101
                }
10102
10103
                /** @noinspection PhpComposerExtensionStubsInspection */
10104
                /** @noinspection UnnecessaryCastingInspection */
10105 2
                return (string) \transliterator_transliterate($language_code, $str);
10106
            }
10107
10108
            \trigger_error('UTF8::strtolower() without intl cannot handle the "lang" parameter: ' . $lang, \E_USER_WARNING);
10109
        }
10110
10111
        // always fallback via symfony polyfill
10112 61
        return \mb_strtolower($str, $encoding);
10113
    }
10114
10115
    /**
10116
     * Make a string uppercase.
10117
     *
10118
     * @see http://php.net/manual/en/function.mb-strtoupper.php
10119
     *
10120
     * @param string      $str                           <p>The string being uppercased.</p>
10121
     * @param string      $encoding                      [optional] <p>Set the charset.</p>
10122
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
10123
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt, tr</p>
10124
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
10125
     *
10126
     * @return string
10127
     *                <p>String with all alphabetic characters converted to uppercase.</p>
10128
     */
10129
    public static function strtoupper(
10130
        $str,
10131
        string $encoding = 'UTF-8',
10132
        bool $clean_utf8 = false,
10133
        string $lang = null,
10134
        bool $try_to_keep_the_string_length = false
10135
    ): string {
10136
        // init
10137 17
        $str = (string) $str;
10138
10139 17
        if ($str === '') {
10140 1
            return '';
10141
        }
10142
10143 16
        if ($clean_utf8 === true) {
10144
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10145
            // if invalid characters are found in $haystack before $needle
10146 2
            $str = self::clean($str);
10147
        }
10148
10149
        // hack for old php version or for the polyfill ...
10150 16
        if ($try_to_keep_the_string_length === true) {
10151 2
            $str = self::fixStrCaseHelper($str, false);
10152
        }
10153
10154 16
        if ($lang === null && $encoding === 'UTF-8') {
10155 8
            return \mb_strtoupper($str);
10156
        }
10157
10158 10
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
10159
10160 10
        if ($lang !== null) {
10161 2
            if (self::$SUPPORT['intl'] === true) {
10162 2
                if (self::$INTL_TRANSLITERATOR_LIST === null) {
10163
                    self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
10164
                }
10165
10166 2
                $language_code = $lang . '-Upper';
10167 2
                if (!\in_array($language_code, self::$INTL_TRANSLITERATOR_LIST, true)) {
10168
                    \trigger_error('UTF8::strtoupper() without intl for special language: ' . $lang, \E_USER_WARNING);
10169
10170
                    $language_code = 'Any-Upper';
10171
                }
10172
10173
                /** @noinspection PhpComposerExtensionStubsInspection */
10174
                /** @noinspection UnnecessaryCastingInspection */
10175 2
                return (string) \transliterator_transliterate($language_code, $str);
10176
            }
10177
10178
            \trigger_error('UTF8::strtolower() without intl cannot handle the "lang"-parameter: ' . $lang, \E_USER_WARNING);
10179
        }
10180
10181
        // always fallback via symfony polyfill
10182 10
        return \mb_strtoupper($str, $encoding);
10183
    }
10184
10185
    /**
10186
     * Translate characters or replace sub-strings.
10187
     *
10188
     * @see http://php.net/manual/en/function.strtr.php
10189
     *
10190
     * @param string          $str  <p>The string being translated.</p>
10191
     * @param string|string[] $from <p>The string replacing from.</p>
10192
     * @param string|string[] $to   [optional] <p>The string being translated to to.</p>
10193
     *
10194
     * @return string
10195
     *                <p>This function returns a copy of str, translating all occurrences of each character in "from" to the
10196
     *                corresponding character in "to".</p>
10197
     */
10198
    public static function strtr(string $str, $from, $to = ''): string
10199
    {
10200 2
        if ($str === '') {
10201
            return '';
10202
        }
10203
10204 2
        if ($from === $to) {
10205
            return $str;
10206
        }
10207
10208 2
        if ($to !== '') {
10209 2
            $from = self::str_split($from);
10210 2
            $to = self::str_split($to);
10211 2
            $count_from = \count($from);
10212 2
            $count_to = \count($to);
10213
10214 2
            if ($count_from > $count_to) {
10215 2
                $from = \array_slice($from, 0, $count_to);
10216 2
            } elseif ($count_from < $count_to) {
10217 2
                $to = \array_slice($to, 0, $count_from);
10218
            }
10219
10220 2
            $from = \array_combine($from, $to);
10221
            /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
10222 2
            if ($from === false) {
10223
                throw new \InvalidArgumentException('The number of elements for each array isn\'t equal or the arrays are empty: (from: ' . \print_r($from, true) . ' | to: ' . \print_r($to, true) . ')');
10224
            }
10225
        }
10226
10227 2
        if (\is_string($from)) {
10228 2
            return \str_replace($from, '', $str);
10229
        }
10230
10231 2
        return \strtr($str, $from);
10232
    }
10233
10234
    /**
10235
     * Return the width of a string.
10236
     *
10237
     * @param string $str        <p>The input string.</p>
10238
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
10239
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10240
     *
10241
     * @return int
10242
     */
10243
    public static function strwidth(
10244
        string $str,
10245
        string $encoding = 'UTF-8',
10246
        bool $clean_utf8 = false
10247
    ): int {
10248 2
        if ($str === '') {
10249 2
            return 0;
10250
        }
10251
10252 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10253 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10254
        }
10255
10256 2
        if ($clean_utf8 === true) {
10257
            // iconv and mbstring are not tolerant to invalid encoding
10258
            // further, their behaviour is inconsistent with that of PHP's substr
10259 2
            $str = self::clean($str);
10260
        }
10261
10262
        //
10263
        // fallback via mbstring
10264
        //
10265
10266 2
        if (self::$SUPPORT['mbstring'] === true) {
10267 2
            if ($encoding === 'UTF-8') {
10268 2
                return \mb_strwidth($str);
10269
            }
10270
10271
            return \mb_strwidth($str, $encoding);
10272
        }
10273
10274
        //
10275
        // fallback via vanilla php
10276
        //
10277
10278
        if ($encoding !== 'UTF-8') {
10279
            $str = self::encode('UTF-8', $str, false, $encoding);
10280
        }
10281
10282
        $wide = 0;
10283
        $str = (string) \preg_replace('/[\x{1100}-\x{115F}\x{2329}\x{232A}\x{2E80}-\x{303E}\x{3040}-\x{A4CF}\x{AC00}-\x{D7A3}\x{F900}-\x{FAFF}\x{FE10}-\x{FE19}\x{FE30}-\x{FE6F}\x{FF00}-\x{FF60}\x{FFE0}-\x{FFE6}\x{20000}-\x{2FFFD}\x{30000}-\x{3FFFD}]/u', '', $str, -1, $wide);
10284
10285
        return ($wide << 1) + (int) self::strlen($str, 'UTF-8');
10286
    }
10287
10288
    /**
10289
     * Get part of a string.
10290
     *
10291
     * @see http://php.net/manual/en/function.mb-substr.php
10292
     *
10293
     * @param string $str        <p>The string being checked.</p>
10294
     * @param int    $offset     <p>The first position used in str.</p>
10295
     * @param int    $length     [optional] <p>The maximum length of the returned string.</p>
10296
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
10297
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10298
     *
10299
     * @return false|string
10300
     *                      The portion of <i>str</i> specified by the <i>offset</i> and
10301
     *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
10302
     *                      characters long, <b>FALSE</b> will be returned.
10303
     */
10304
    public static function substr(
10305
        string $str,
10306
        int $offset = 0,
10307
        int $length = null,
10308
        string $encoding = 'UTF-8',
10309
        bool $clean_utf8 = false
10310
    ) {
10311
        // empty string
10312 172
        if ($str === '' || $length === 0) {
10313 8
            return '';
10314
        }
10315
10316 168
        if ($clean_utf8 === true) {
10317
            // iconv and mbstring are not tolerant to invalid encoding
10318
            // further, their behaviour is inconsistent with that of PHP's substr
10319 2
            $str = self::clean($str);
10320
        }
10321
10322
        // whole string
10323 168
        if (!$offset && $length === null) {
10324 7
            return $str;
10325
        }
10326
10327 163
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10328 19
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10329
        }
10330
10331
        //
10332
        // fallback via mbstring
10333
        //
10334
10335 163
        if (self::$SUPPORT['mbstring'] === true) {
10336 161
            if ($encoding === 'UTF-8') {
10337 161
                if ($length === null) {
10338 64
                    return \mb_substr($str, $offset);
10339
                }
10340
10341 102
                return \mb_substr($str, $offset, $length);
10342
            }
10343
10344
            return self::substr($str, $offset, $length, $encoding);
10345
        }
10346
10347
        //
10348
        // fallback for binary || ascii only
10349
        //
10350
10351
        if (
10352 4
            $encoding === 'CP850'
10353
            ||
10354 4
            $encoding === 'ASCII'
10355
        ) {
10356
            if ($length === null) {
10357
                return \substr($str, $offset);
10358
            }
10359
10360
            return \substr($str, $offset, $length);
10361
        }
10362
10363
        // otherwise we need the string-length
10364 4
        $str_length = 0;
10365 4
        if ($offset || $length === null) {
10366 4
            $str_length = self::strlen($str, $encoding);
10367
        }
10368
10369
        // e.g.: invalid chars + mbstring not installed
10370 4
        if ($str_length === false) {
10371
            return false;
10372
        }
10373
10374
        // empty string
10375 4
        if ($offset === $str_length && !$length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type integer|null is loosely compared to false; this is ambiguous if the integer can be 0. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
10376
            return '';
10377
        }
10378
10379
        // impossible
10380 4
        if ($offset && $offset > $str_length) {
10381
            return '';
10382
        }
10383
10384 4
        if ($length === null) {
10385 4
            $length = (int) $str_length;
10386
        } else {
10387 2
            $length = (int) $length;
10388
        }
10389
10390
        if (
10391 4
            $encoding !== 'UTF-8'
10392
            &&
10393 4
            self::$SUPPORT['mbstring'] === false
10394
        ) {
10395 2
            \trigger_error('UTF8::substr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10396
        }
10397
10398
        //
10399
        // fallback via intl
10400
        //
10401
10402
        if (
10403 4
            $encoding === 'UTF-8' // INFO: "grapheme_substr()" can't handle other encodings
10404
            &&
10405 4
            $offset >= 0 // grapheme_substr() can't handle negative offset
10406
            &&
10407 4
            self::$SUPPORT['intl'] === true
10408
        ) {
10409
            $return_tmp = \grapheme_substr($str, $offset, $length);
10410
            if ($return_tmp !== false) {
10411
                return $return_tmp;
10412
            }
10413
        }
10414
10415
        //
10416
        // fallback via iconv
10417
        //
10418
10419
        if (
10420 4
            $length >= 0 // "iconv_substr()" can't handle negative length
10421
            &&
10422 4
            self::$SUPPORT['iconv'] === true
10423
        ) {
10424
            $return_tmp = \iconv_substr($str, $offset, $length);
10425
            if ($return_tmp !== false) {
10426
                return $return_tmp;
10427
            }
10428
        }
10429
10430
        //
10431
        // fallback for ascii only
10432
        //
10433
10434 4
        if (ASCII::is_ascii($str)) {
10435
            return \substr($str, $offset, $length);
10436
        }
10437
10438
        //
10439
        // fallback via vanilla php
10440
        //
10441
10442
        // split to array, and remove invalid characters
10443 4
        $array = self::str_split($str);
10444
10445
        // extract relevant part, and join to make sting again
10446 4
        return \implode('', \array_slice($array, $offset, $length));
10447
    }
10448
10449
    /**
10450
     * Binary-safe comparison of two strings from an offset, up to a length of characters.
10451
     *
10452
     * @param string   $str1               <p>The main string being compared.</p>
10453
     * @param string   $str2               <p>The secondary string being compared.</p>
10454
     * @param int      $offset             [optional] <p>The start position for the comparison. If negative, it starts
10455
     *                                     counting from the end of the string.</p>
10456
     * @param int|null $length             [optional] <p>The length of the comparison. The default value is the largest
10457
     *                                     of the length of the str compared to the length of main_str less the
10458
     *                                     offset.</p>
10459
     * @param bool     $case_insensitivity [optional] <p>If case_insensitivity is TRUE, comparison is case
10460
     *                                     insensitive.</p>
10461
     * @param string   $encoding           [optional] <p>Set the charset for e.g. "mb_" function</p>
10462
     *
10463
     * @return int
10464
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
10465
     *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
10466
     *             <strong>0</strong> if they are equal
10467
     */
10468
    public static function substr_compare(
10469
        string $str1,
10470
        string $str2,
10471
        int $offset = 0,
10472
        int $length = null,
10473
        bool $case_insensitivity = false,
10474
        string $encoding = 'UTF-8'
10475
    ): int {
10476
        if (
10477 2
            $offset !== 0
10478
            ||
10479 2
            $length !== null
10480
        ) {
10481 2
            if ($encoding === 'UTF-8') {
10482 2
                if ($length === null) {
10483 2
                    $str1 = (string) \mb_substr($str1, $offset);
10484
                } else {
10485 2
                    $str1 = (string) \mb_substr($str1, $offset, $length);
10486
                }
10487 2
                $str2 = (string) \mb_substr($str2, 0, (int) self::strlen($str1));
10488
            } else {
10489
                $encoding = self::normalize_encoding($encoding, 'UTF-8');
10490
10491
                $str1 = (string) self::substr($str1, $offset, $length, $encoding);
10492
                $str2 = (string) self::substr($str2, 0, (int) self::strlen($str1), $encoding);
10493
            }
10494
        }
10495
10496 2
        if ($case_insensitivity === true) {
10497 2
            return self::strcasecmp($str1, $str2, $encoding);
10498
        }
10499
10500 2
        return self::strcmp($str1, $str2);
10501
    }
10502
10503
    /**
10504
     * Count the number of substring occurrences.
10505
     *
10506
     * @see http://php.net/manual/en/function.substr-count.php
10507
     *
10508
     * @param string $haystack   <p>The string to search in.</p>
10509
     * @param string $needle     <p>The substring to search for.</p>
10510
     * @param int    $offset     [optional] <p>The offset where to start counting.</p>
10511
     * @param int    $length     [optional] <p>
10512
     *                           The maximum length after the specified offset to search for the
10513
     *                           substring. It outputs a warning if the offset plus the length is
10514
     *                           greater than the haystack length.
10515
     *                           </p>
10516
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
10517
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10518
     *
10519
     * @return false|int this functions returns an integer or false if there isn't a string
10520
     */
10521
    public static function substr_count(
10522
        string $haystack,
10523
        string $needle,
10524
        int $offset = 0,
10525
        int $length = null,
10526
        string $encoding = 'UTF-8',
10527
        bool $clean_utf8 = false
10528
    ) {
10529 5
        if ($haystack === '' || $needle === '') {
10530 2
            return false;
10531
        }
10532
10533 5
        if ($length === 0) {
10534 2
            return 0;
10535
        }
10536
10537 5
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10538 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10539
        }
10540
10541 5
        if ($clean_utf8 === true) {
10542
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10543
            // if invalid characters are found in $haystack before $needle
10544
            $needle = self::clean($needle);
10545
            $haystack = self::clean($haystack);
10546
        }
10547
10548 5
        if ($offset || $length > 0) {
10549 2
            if ($length === null) {
10550 2
                $length_tmp = self::strlen($haystack, $encoding);
10551 2
                if ($length_tmp === false) {
10552
                    return false;
10553
                }
10554 2
                $length = (int) $length_tmp;
10555
            }
10556
10557 2
            if ($encoding === 'UTF-8') {
10558 2
                $haystack = (string) \mb_substr($haystack, $offset, $length);
10559
            } else {
10560 2
                $haystack = (string) \mb_substr($haystack, $offset, $length, $encoding);
10561
            }
10562
        }
10563
10564
        if (
10565 5
            $encoding !== 'UTF-8'
10566
            &&
10567 5
            self::$SUPPORT['mbstring'] === false
10568
        ) {
10569
            \trigger_error('UTF8::substr_count() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10570
        }
10571
10572 5
        if (self::$SUPPORT['mbstring'] === true) {
10573 5
            if ($encoding === 'UTF-8') {
10574 5
                return \mb_substr_count($haystack, $needle);
10575
            }
10576
10577 2
            return \mb_substr_count($haystack, $needle, $encoding);
10578
        }
10579
10580
        \preg_match_all('/' . \preg_quote($needle, '/') . '/us', $haystack, $matches, \PREG_SET_ORDER);
10581
10582
        return \count($matches);
10583
    }
10584
10585
    /**
10586
     * Count the number of substring occurrences.
10587
     *
10588
     * @param string $haystack <p>
10589
     *                         The string being checked.
10590
     *                         </p>
10591
     * @param string $needle   <p>
10592
     *                         The string being found.
10593
     *                         </p>
10594
     * @param int    $offset   [optional] <p>
10595
     *                         The offset where to start counting
10596
     *                         </p>
10597
     * @param int    $length   [optional] <p>
10598
     *                         The maximum length after the specified offset to search for the
10599
     *                         substring. It outputs a warning if the offset plus the length is
10600
     *                         greater than the haystack length.
10601
     *                         </p>
10602
     *
10603
     * @return false|int the number of times the
10604
     *                   needle substring occurs in the
10605
     *                   haystack string
10606
     */
10607
    public static function substr_count_in_byte(
10608
        string $haystack,
10609
        string $needle,
10610
        int $offset = 0,
10611
        int $length = null
10612
    ) {
10613
        if ($haystack === '' || $needle === '') {
10614
            return 0;
10615
        }
10616
10617
        if (
10618
            ($offset || $length !== null)
10619
            &&
10620
            self::$SUPPORT['mbstring_func_overload'] === true
10621
        ) {
10622
            if ($length === null) {
10623
                $length_tmp = self::strlen($haystack);
10624
                if ($length_tmp === false) {
10625
                    return false;
10626
                }
10627
                $length = (int) $length_tmp;
10628
            }
10629
10630
            if (
10631
                (
10632
                    $length !== 0
10633
                    &&
10634
                    $offset !== 0
10635
                )
10636
                &&
10637
                ($length + $offset) <= 0
10638
                &&
10639
                Bootup::is_php('7.1') === false // output from "substr_count()" have changed in PHP 7.1
10640
            ) {
10641
                return false;
10642
            }
10643
10644
            /** @var false|string $haystack_tmp - needed for PhpStan (stubs error) */
10645
            $haystack_tmp = \substr($haystack, $offset, $length);
10646
            if ($haystack_tmp === false) {
10647
                $haystack_tmp = '';
10648
            }
10649
            $haystack = (string) $haystack_tmp;
10650
        }
10651
10652
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10653
            // "mb_" is available if overload is used, so use it ...
10654
            return \mb_substr_count($haystack, $needle, 'CP850'); // 8-BIT
10655
        }
10656
10657
        if ($length === null) {
10658
            return \substr_count($haystack, $needle, $offset);
10659
        }
10660
10661
        return \substr_count($haystack, $needle, $offset, $length);
10662
    }
10663
10664
    /**
10665
     * Returns the number of occurrences of $substring in the given string.
10666
     * By default, the comparison is case-sensitive, but can be made insensitive
10667
     * by setting $case_sensitive to false.
10668
     *
10669
     * @param string $str            <p>The input string.</p>
10670
     * @param string $substring      <p>The substring to search for.</p>
10671
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
10672
     * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
10673
     *
10674
     * @return int
10675
     */
10676
    public static function substr_count_simple(
10677
        string $str,
10678
        string $substring,
10679
        bool $case_sensitive = true,
10680
        string $encoding = 'UTF-8'
10681
    ): int {
10682 15
        if ($str === '' || $substring === '') {
10683 2
            return 0;
10684
        }
10685
10686 13
        if ($encoding === 'UTF-8') {
10687 7
            if ($case_sensitive) {
10688
                return (int) \mb_substr_count($str, $substring);
10689
            }
10690
10691 7
            return (int) \mb_substr_count(
10692 7
                \mb_strtoupper($str),
10693 7
                \mb_strtoupper($substring)
10694
            );
10695
        }
10696
10697 6
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
10698
10699 6
        if ($case_sensitive) {
10700 3
            return (int) \mb_substr_count($str, $substring, $encoding);
10701
        }
10702
10703 3
        return (int) \mb_substr_count(
10704 3
            self::strtocasefold($str, true, false, $encoding, null, false),
10705 3
            self::strtocasefold($substring, true, false, $encoding, null, false),
10706 3
            $encoding
10707
        );
10708
    }
10709
10710
    /**
10711
     * Removes a prefix ($needle) from the beginning of the string ($haystack), case-insensitive.
10712
     *
10713
     * @param string $haystack <p>The string to search in.</p>
10714
     * @param string $needle   <p>The substring to search for.</p>
10715
     *
10716
     * @return string return the sub-string
10717
     */
10718
    public static function substr_ileft(string $haystack, string $needle): string
10719
    {
10720 2
        if ($haystack === '') {
10721 2
            return '';
10722
        }
10723
10724 2
        if ($needle === '') {
10725 2
            return $haystack;
10726
        }
10727
10728 2
        if (self::str_istarts_with($haystack, $needle) === true) {
10729 2
            $haystack = (string) \mb_substr($haystack, (int) self::strlen($needle));
10730
        }
10731
10732 2
        return $haystack;
10733
    }
10734
10735
    /**
10736
     * Get part of a string process in bytes.
10737
     *
10738
     * @param string $str    <p>The string being checked.</p>
10739
     * @param int    $offset <p>The first position used in str.</p>
10740
     * @param int    $length [optional] <p>The maximum length of the returned string.</p>
10741
     *
10742
     * @return false|string
10743
     *                      The portion of <i>str</i> specified by the <i>offset</i> and
10744
     *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
10745
     *                      characters long, <b>FALSE</b> will be returned.
10746
     */
10747
    public static function substr_in_byte(string $str, int $offset = 0, int $length = null)
10748
    {
10749
        // empty string
10750
        if ($str === '' || $length === 0) {
10751
            return '';
10752
        }
10753
10754
        // whole string
10755
        if (!$offset && $length === null) {
10756
            return $str;
10757
        }
10758
10759
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10760
            // "mb_" is available if overload is used, so use it ...
10761
            return \mb_substr($str, $offset, $length, 'CP850'); // 8-BIT
10762
        }
10763
10764
        return \substr($str, $offset, $length ?? 2147483647);
10765
    }
10766
10767
    /**
10768
     * Removes a suffix ($needle) from the end of the string ($haystack), case-insensitive.
10769
     *
10770
     * @param string $haystack <p>The string to search in.</p>
10771
     * @param string $needle   <p>The substring to search for.</p>
10772
     *
10773
     * @return string return the sub-string
10774
     */
10775
    public static function substr_iright(string $haystack, string $needle): string
10776
    {
10777 2
        if ($haystack === '') {
10778 2
            return '';
10779
        }
10780
10781 2
        if ($needle === '') {
10782 2
            return $haystack;
10783
        }
10784
10785 2
        if (self::str_iends_with($haystack, $needle) === true) {
10786 2
            $haystack = (string) \mb_substr($haystack, 0, (int) self::strlen($haystack) - (int) self::strlen($needle));
10787
        }
10788
10789 2
        return $haystack;
10790
    }
10791
10792
    /**
10793
     * Removes a prefix ($needle) from the beginning of the string ($haystack).
10794
     *
10795
     * @param string $haystack <p>The string to search in.</p>
10796
     * @param string $needle   <p>The substring to search for.</p>
10797
     *
10798
     * @return string return the sub-string
10799
     */
10800
    public static function substr_left(string $haystack, string $needle): string
10801
    {
10802 2
        if ($haystack === '') {
10803 2
            return '';
10804
        }
10805
10806 2
        if ($needle === '') {
10807 2
            return $haystack;
10808
        }
10809
10810 2
        if (self::str_starts_with($haystack, $needle) === true) {
10811 2
            $haystack = (string) \mb_substr($haystack, (int) self::strlen($needle));
10812
        }
10813
10814 2
        return $haystack;
10815
    }
10816
10817
    /**
10818
     * Replace text within a portion of a string.
10819
     *
10820
     * source: https://gist.github.com/stemar/8287074
10821
     *
10822
     * @param string|string[] $str         <p>The input string or an array of stings.</p>
10823
     * @param string|string[] $replacement <p>The replacement string or an array of stings.</p>
10824
     * @param int|int[]       $offset      <p>
10825
     *                                     If start is positive, the replacing will begin at the start'th offset
10826
     *                                     into string.
10827
     *                                     <br><br>
10828
     *                                     If start is negative, the replacing will begin at the start'th character
10829
     *                                     from the end of string.
10830
     *                                     </p>
10831
     * @param int|int[]|null  $length      [optional] <p>If given and is positive, it represents the length of the
10832
     *                                     portion of string which is to be replaced. If it is negative, it
10833
     *                                     represents the number of characters from the end of string at which to
10834
     *                                     stop replacing. If it is not given, then it will default to strlen(
10835
     *                                     string ); i.e. end the replacing at the end of string. Of course, if
10836
     *                                     length is zero then this function will have the effect of inserting
10837
     *                                     replacement into string at the given start offset.</p>
10838
     * @param string          $encoding    [optional] <p>Set the charset for e.g. "mb_" function</p>
10839
     *
10840
     * @return string|string[] The result string is returned. If string is an array then array is returned.
10841
     */
10842
    public static function substr_replace(
10843
        $str,
10844
        $replacement,
10845
        $offset,
10846
        $length = null,
10847
        string $encoding = 'UTF-8'
10848
    ) {
10849 10
        if (\is_array($str) === true) {
10850 1
            $num = \count($str);
10851
10852
            // the replacement
10853 1
            if (\is_array($replacement) === true) {
10854 1
                $replacement = \array_slice($replacement, 0, $num);
10855
            } else {
10856 1
                $replacement = \array_pad([$replacement], $num, $replacement);
10857
            }
10858
10859
            // the offset
10860 1
            if (\is_array($offset) === true) {
10861 1
                $offset = \array_slice($offset, 0, $num);
10862 1
                foreach ($offset as &$value_tmp) {
10863 1
                    $value_tmp = (int) $value_tmp === $value_tmp ? $value_tmp : 0;
10864
                }
10865 1
                unset($value_tmp);
10866
            } else {
10867 1
                $offset = \array_pad([$offset], $num, $offset);
10868
            }
10869
10870
            // the length
10871 1
            if ($length === null) {
10872 1
                $length = \array_fill(0, $num, 0);
10873 1
            } elseif (\is_array($length) === true) {
10874 1
                $length = \array_slice($length, 0, $num);
10875 1
                foreach ($length as &$value_tmp_V2) {
10876 1
                    $value_tmp_V2 = (int) $value_tmp_V2 === $value_tmp_V2 ? $value_tmp_V2 : $num;
10877
                }
10878 1
                unset($value_tmp_V2);
10879
            } else {
10880 1
                $length = \array_pad([$length], $num, $length);
10881
            }
10882
10883
            // recursive call
10884 1
            return \array_map([self::class, 'substr_replace'], $str, $replacement, $offset, $length);
10885
        }
10886
10887 10
        if (\is_array($replacement) === true) {
10888 1
            if ($replacement !== []) {
10889 1
                $replacement = $replacement[0];
10890
            } else {
10891 1
                $replacement = '';
10892
            }
10893
        }
10894
10895
        // init
10896 10
        $str = (string) $str;
10897 10
        $replacement = (string) $replacement;
10898
10899 10
        if (\is_array($length) === true) {
10900
            throw new \InvalidArgumentException('Parameter "$length" can only be an array, if "$str" is also an array.');
10901
        }
10902
10903 10
        if (\is_array($offset) === true) {
10904
            throw new \InvalidArgumentException('Parameter "$offset" can only be an array, if "$str" is also an array.');
10905
        }
10906
10907 10
        if ($str === '') {
10908 1
            return $replacement;
10909
        }
10910
10911 9
        if (self::$SUPPORT['mbstring'] === true) {
10912 9
            $string_length = (int) self::strlen($str, $encoding);
10913
10914 9
            if ($offset < 0) {
10915 1
                $offset = (int) \max(0, $string_length + $offset);
10916 9
            } elseif ($offset > $string_length) {
10917 1
                $offset = $string_length;
10918
            }
10919
10920 9
            if ($length !== null && $length < 0) {
10921 1
                $length = (int) \max(0, $string_length - $offset + $length);
10922 9
            } elseif ($length === null || $length > $string_length) {
10923 4
                $length = $string_length;
10924
            }
10925
10926
            /** @noinspection AdditionOperationOnArraysInspection */
10927 9
            if (($offset + $length) > $string_length) {
10928 4
                $length = $string_length - $offset;
10929
            }
10930
10931
            /** @noinspection AdditionOperationOnArraysInspection */
10932 9
            return ((string) \mb_substr($str, 0, $offset, $encoding)) .
10933 9
                   $replacement .
10934 9
                   ((string) \mb_substr($str, $offset + $length, $string_length - $offset - $length, $encoding));
10935
        }
10936
10937
        //
10938
        // fallback for ascii only
10939
        //
10940
10941
        if (ASCII::is_ascii($str)) {
10942
            return ($length === null) ?
10943
                \substr_replace($str, $replacement, $offset) :
10944
                \substr_replace($str, $replacement, $offset, $length);
10945
        }
10946
10947
        //
10948
        // fallback via vanilla php
10949
        //
10950
10951
        \preg_match_all('/./us', $str, $str_matches);
10952
        \preg_match_all('/./us', $replacement, $replacement_matches);
10953
10954
        if ($length === null) {
10955
            $length_tmp = self::strlen($str, $encoding);
10956
            if ($length_tmp === false) {
10957
                // e.g.: non mbstring support + invalid chars
10958
                return '';
10959
            }
10960
            $length = (int) $length_tmp;
10961
        }
10962
10963
        \array_splice($str_matches[0], $offset, $length, $replacement_matches[0]);
10964
10965
        return \implode('', $str_matches[0]);
10966
    }
10967
10968
    /**
10969
     * Removes a suffix ($needle) from the end of the string ($haystack).
10970
     *
10971
     * @param string $haystack <p>The string to search in.</p>
10972
     * @param string $needle   <p>The substring to search for.</p>
10973
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
10974
     *
10975
     * @return string return the sub-string
10976
     */
10977
    public static function substr_right(
10978
        string $haystack,
10979
        string $needle,
10980
        string $encoding = 'UTF-8'
10981
    ): string {
10982 2
        if ($haystack === '') {
10983 2
            return '';
10984
        }
10985
10986 2
        if ($needle === '') {
10987 2
            return $haystack;
10988
        }
10989
10990
        if (
10991 2
            $encoding === 'UTF-8'
10992
            &&
10993 2
            \substr($haystack, -\strlen($needle)) === $needle
10994
        ) {
10995 2
            return (string) \mb_substr($haystack, 0, (int) \mb_strlen($haystack) - (int) \mb_strlen($needle));
10996
        }
10997
10998 2
        if (\substr($haystack, -\strlen($needle)) === $needle) {
10999
            return (string) self::substr(
11000
                $haystack,
11001
                0,
11002
                (int) self::strlen($haystack, $encoding) - (int) self::strlen($needle, $encoding),
11003
                $encoding
11004
            );
11005
        }
11006
11007 2
        return $haystack;
11008
    }
11009
11010
    /**
11011
     * Returns a case swapped version of the string.
11012
     *
11013
     * @param string $str        <p>The input string.</p>
11014
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
11015
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
11016
     *
11017
     * @return string each character's case swapped
11018
     */
11019
    public static function swapCase(string $str, string $encoding = 'UTF-8', bool $clean_utf8 = false): string
11020
    {
11021 6
        if ($str === '') {
11022 1
            return '';
11023
        }
11024
11025 6
        if ($clean_utf8 === true) {
11026
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11027
            // if invalid characters are found in $haystack before $needle
11028 2
            $str = self::clean($str);
11029
        }
11030
11031 6
        if ($encoding === 'UTF-8') {
11032 4
            return (string) (\mb_strtolower($str) ^ \mb_strtoupper($str) ^ $str);
11033
        }
11034
11035 4
        return (string) (self::strtolower($str, $encoding) ^ self::strtoupper($str, $encoding) ^ $str);
11036
    }
11037
11038
    /**
11039
     * Checks whether symfony-polyfills are used.
11040
     *
11041
     * @return bool
11042
     *              <strong>true</strong> if in use, <strong>false</strong> otherwise
11043
     */
11044
    public static function symfony_polyfill_used(): bool
11045
    {
11046
        // init
11047
        $return = false;
11048
11049
        $return_tmp = \extension_loaded('mbstring');
11050
        if ($return_tmp === false && \function_exists('mb_strlen')) {
11051
            $return = true;
11052
        }
11053
11054
        $return_tmp = \extension_loaded('iconv');
11055
        if ($return_tmp === false && \function_exists('iconv')) {
11056
            $return = true;
11057
        }
11058
11059
        return $return;
11060
    }
11061
11062
    /**
11063
     * @param string $str
11064
     * @param int    $tab_length
11065
     *
11066
     * @return string
11067
     */
11068
    public static function tabs_to_spaces(string $str, int $tab_length = 4): string
11069
    {
11070 6
        if ($tab_length === 4) {
11071 3
            $spaces = '    ';
11072 3
        } elseif ($tab_length === 2) {
11073 1
            $spaces = '  ';
11074
        } else {
11075 2
            $spaces = \str_repeat(' ', $tab_length);
11076
        }
11077
11078 6
        return \str_replace("\t", $spaces, $str);
11079
    }
11080
11081
    /**
11082
     * Converts the first character of each word in the string to uppercase
11083
     * and all other chars to lowercase.
11084
     *
11085
     * @param string      $str                           <p>The input string.</p>
11086
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
11087
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
11088
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt, tr</p>
11089
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
11090
     *
11091
     * @return string
11092
     *                <p>A string with all characters of $str being title-cased.</p>
11093
     */
11094
    public static function titlecase(
11095
        string $str,
11096
        string $encoding = 'UTF-8',
11097
        bool $clean_utf8 = false,
11098
        string $lang = null,
11099
        bool $try_to_keep_the_string_length = false
11100
    ): string {
11101 5
        if ($clean_utf8 === true) {
11102
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11103
            // if invalid characters are found in $haystack before $needle
11104
            $str = self::clean($str);
11105
        }
11106
11107 5
        if ($lang === null && $try_to_keep_the_string_length === false) {
11108 5
            if ($encoding === 'UTF-8') {
11109 3
                return \mb_convert_case($str, \MB_CASE_TITLE);
11110
            }
11111
11112 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11113
11114 2
            return \mb_convert_case($str, \MB_CASE_TITLE, $encoding);
11115
        }
11116
11117
        return self::str_titleize(
11118
            $str,
11119
            null,
11120
            $encoding,
11121
            false,
11122
            $lang,
11123
            $try_to_keep_the_string_length,
11124
            false
11125
        );
11126
    }
11127
11128
    /**
11129
     * alias for "UTF8::to_ascii()"
11130
     *
11131
     * @param string $str
11132
     * @param string $subst_chr
11133
     * @param bool   $strict
11134
     *
11135
     * @return string
11136
     *
11137
     * @see UTF8::to_ascii()
11138
     * @deprecated <p>please use "UTF8::to_ascii()"</p>
11139
     */
11140
    public static function toAscii(
11141
        string $str,
11142
        string $subst_chr = '?',
11143
        bool $strict = false
11144
    ): string {
11145 7
        return self::to_ascii($str, $subst_chr, $strict);
11146
    }
11147
11148
    /**
11149
     * alias for "UTF8::to_iso8859()"
11150
     *
11151
     * @param string|string[] $str
11152
     *
11153
     * @return string|string[]
11154
     *
11155
     * @see UTF8::to_iso8859()
11156
     * @deprecated <p>please use "UTF8::to_iso8859()"</p>
11157
     */
11158
    public static function toIso8859($str)
11159
    {
11160 2
        return self::to_iso8859($str);
11161
    }
11162
11163
    /**
11164
     * alias for "UTF8::to_latin1()"
11165
     *
11166
     * @param string|string[] $str
11167
     *
11168
     * @return string|string[]
11169
     *
11170
     * @see UTF8::to_iso8859()
11171
     * @deprecated <p>please use "UTF8::to_iso8859()"</p>
11172
     */
11173
    public static function toLatin1($str)
11174
    {
11175 2
        return self::to_iso8859($str);
11176
    }
11177
11178
    /**
11179
     * alias for "UTF8::to_utf8()"
11180
     *
11181
     * @param string|string[] $str
11182
     *
11183
     * @return string|string[]
11184
     *
11185
     * @see UTF8::to_utf8()
11186
     * @deprecated <p>please use "UTF8::to_utf8()"</p>
11187
     */
11188
    public static function toUTF8($str)
11189
    {
11190 2
        return self::to_utf8($str);
11191
    }
11192
11193
    /**
11194
     * Convert a string into ASCII.
11195
     *
11196
     * @param string $str     <p>The input string.</p>
11197
     * @param string $unknown [optional] <p>Character use if character unknown. (default is ?)</p>
11198
     * @param bool   $strict  [optional] <p>Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad
11199
     *                        performance</p>
11200
     *
11201
     * @return string
11202
     */
11203
    public static function to_ascii(
11204
        string $str,
11205
        string $unknown = '?',
11206
        bool $strict = false
11207
    ): string {
11208 37
        return ASCII::to_transliterate($str, $unknown, $strict);
11209
    }
11210
11211
    /**
11212
     * @param mixed $str
11213
     *
11214
     * @return bool
11215
     */
11216
    public static function to_boolean($str): bool
11217
    {
11218
        // init
11219 19
        $str = (string) $str;
11220
11221 19
        if ($str === '') {
11222 2
            return false;
11223
        }
11224
11225
        // Info: http://php.net/manual/en/filter.filters.validate.php
11226
        $map = [
11227 17
            'true'  => true,
11228
            '1'     => true,
11229
            'on'    => true,
11230
            'yes'   => true,
11231
            'false' => false,
11232
            '0'     => false,
11233
            'off'   => false,
11234
            'no'    => false,
11235
        ];
11236
11237 17
        if (isset($map[$str])) {
11238 11
            return $map[$str];
11239
        }
11240
11241 6
        $key = \strtolower($str);
11242 6
        if (isset($map[$key])) {
11243 2
            return $map[$key];
11244
        }
11245
11246 4
        if (\is_numeric($str)) {
11247 2
            return ((float) $str + 0) > 0;
11248
        }
11249
11250 2
        return (bool) \trim($str);
11251
    }
11252
11253
    /**
11254
     * Convert given string to safe filename (and keep string case).
11255
     *
11256
     * @param string $str
11257
     * @param bool   $use_transliterate No transliteration, conversion etc. is done by default - unsafe characters are
11258
     *                                  simply replaced with hyphen.
11259
     * @param string $fallback_char
11260
     *
11261
     * @return string
11262
     */
11263
    public static function to_filename(
11264
        string $str,
11265
        bool $use_transliterate = false,
11266
        string $fallback_char = '-'
11267
    ): string {
11268 1
        return ASCII::to_filename(
11269 1
            $str,
11270 1
            $use_transliterate,
11271 1
            $fallback_char
11272
        );
11273
    }
11274
11275
    /**
11276
     * Convert a string into "ISO-8859"-encoding (Latin-1).
11277
     *
11278
     * @param string|string[] $str
11279
     *
11280
     * @return string|string[]
11281
     */
11282
    public static function to_iso8859($str)
11283
    {
11284 8
        if (\is_array($str) === true) {
11285 2
            foreach ($str as $k => &$v) {
11286 2
                $v = self::to_iso8859($v);
11287
            }
11288
11289 2
            return $str;
11290
        }
11291
11292 8
        $str = (string) $str;
11293 8
        if ($str === '') {
11294 2
            return '';
11295
        }
11296
11297 8
        return self::utf8_decode($str);
11298
    }
11299
11300
    /**
11301
     * alias for "UTF8::to_iso8859()"
11302
     *
11303
     * @param string|string[] $str
11304
     *
11305
     * @return string|string[]
11306
     *
11307
     * @see UTF8::to_iso8859()
11308
     * @deprecated <p>please use "UTF8::to_iso8859()"</p>
11309
     */
11310
    public static function to_latin1($str)
11311
    {
11312 2
        return self::to_iso8859($str);
11313
    }
11314
11315
    /**
11316
     * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
11317
     *
11318
     * <ul>
11319
     * <li>It decode UTF-8 codepoints and Unicode escape sequences.</li>
11320
     * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li>
11321
     * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
11322
     * case.</li>
11323
     * </ul>
11324
     *
11325
     * @param string|string[] $str                        <p>Any string or array.</p>
11326
     * @param bool            $decode_html_entity_to_utf8 <p>Set to true, if you need to decode html-entities.</p>
11327
     *
11328
     * @return string|string[] the UTF-8 encoded string
11329
     */
11330
    public static function to_utf8($str, bool $decode_html_entity_to_utf8 = false)
11331
    {
11332 41
        if (\is_array($str) === true) {
11333 4
            foreach ($str as $k => &$v) {
11334 4
                $v = self::to_utf8($v, $decode_html_entity_to_utf8);
11335
            }
11336
11337 4
            return $str;
11338
        }
11339
11340 41
        $str = (string) $str;
11341 41
        if ($str === '') {
11342 6
            return $str;
11343
        }
11344
11345 41
        $max = \strlen($str);
11346 41
        $buf = '';
11347
11348 41
        for ($i = 0; $i < $max; ++$i) {
11349 41
            $c1 = $str[$i];
11350
11351 41
            if ($c1 >= "\xC0") { // should be converted to UTF8, if it's not UTF8 already
11352
11353 37
                if ($c1 <= "\xDF") { // looks like 2 bytes UTF8
11354
11355 34
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
11356
11357 34
                    if ($c2 >= "\x80" && $c2 <= "\xBF") { // yeah, almost sure it's UTF8 already
11358 20
                        $buf .= $c1 . $c2;
11359 20
                        ++$i;
11360
                    } else { // not valid UTF8 - convert it
11361 34
                        $buf .= self::to_utf8_convert_helper($c1);
11362
                    }
11363 34
                } elseif ($c1 >= "\xE0" && $c1 <= "\xEF") { // looks like 3 bytes UTF8
11364
11365 33
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
11366 33
                    $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
11367
11368 33
                    if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF") { // yeah, almost sure it's UTF8 already
11369 15
                        $buf .= $c1 . $c2 . $c3;
11370 15
                        $i += 2;
11371
                    } else { // not valid UTF8 - convert it
11372 33
                        $buf .= self::to_utf8_convert_helper($c1);
11373
                    }
11374 26
                } elseif ($c1 >= "\xF0" && $c1 <= "\xF7") { // looks like 4 bytes UTF8
11375
11376 26
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
11377 26
                    $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
11378 26
                    $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
11379
11380 26
                    if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF" && $c4 >= "\x80" && $c4 <= "\xBF") { // yeah, almost sure it's UTF8 already
11381 8
                        $buf .= $c1 . $c2 . $c3 . $c4;
11382 8
                        $i += 3;
11383
                    } else { // not valid UTF8 - convert it
11384 26
                        $buf .= self::to_utf8_convert_helper($c1);
11385
                    }
11386
                } else { // doesn't look like UTF8, but should be converted
11387
11388 37
                    $buf .= self::to_utf8_convert_helper($c1);
11389
                }
11390 38
            } elseif (($c1 & "\xC0") === "\x80") { // needs conversion
11391
11392 4
                $buf .= self::to_utf8_convert_helper($c1);
11393
            } else { // it doesn't need conversion
11394
11395 38
                $buf .= $c1;
11396
            }
11397
        }
11398
11399
        // decode unicode escape sequences + unicode surrogate pairs
11400 41
        $buf = \preg_replace_callback(
11401 41
            '/\\\\u([dD][89abAB][0-9a-fA-F]{2})\\\\u([dD][cdefCDEF][\da-fA-F]{2})|\\\\u([0-9a-fA-F]{4})/',
11402
            /**
11403
             * @param array $matches
11404
             *
11405
             * @return string
11406
             */
11407
            static function (array $matches): string {
11408 12
                if (isset($matches[3])) {
11409 12
                    $cp = (int) \hexdec($matches[3]);
11410
                } else {
11411
                    // http://unicode.org/faq/utf_bom.html#utf16-4
11412
                    $cp = ((int) \hexdec($matches[1]) << 10)
11413
                          + (int) \hexdec($matches[2])
11414
                          + 0x10000
11415
                          - (0xD800 << 10)
11416
                          - 0xDC00;
11417
                }
11418
11419
                // https://github.com/php/php-src/blob/php-7.3.2/ext/standard/html.c#L471
11420
                //
11421
                // php_utf32_utf8(unsigned char *buf, unsigned k)
11422
11423 12
                if ($cp < 0x80) {
11424 8
                    return (string) self::chr($cp);
11425
                }
11426
11427 9
                if ($cp < 0xA0) {
11428
                    /** @noinspection UnnecessaryCastingInspection */
11429
                    return (string) self::chr(0xC0 | $cp >> 6) . (string) self::chr(0x80 | $cp & 0x3F);
11430
                }
11431
11432 9
                return self::decimal_to_chr($cp);
11433 41
            },
11434 41
            $buf
11435
        );
11436
11437 41
        if ($buf === null) {
11438
            return '';
11439
        }
11440
11441
        // decode UTF-8 codepoints
11442 41
        if ($decode_html_entity_to_utf8 === true) {
11443 2
            $buf = self::html_entity_decode($buf);
11444
        }
11445
11446 41
        return $buf;
11447
    }
11448
11449
    /**
11450
     * Strip whitespace or other characters from the beginning and end of a UTF-8 string.
11451
     *
11452
     * INFO: This is slower then "trim()"
11453
     *
11454
     * We can only use the original-function, if we use <= 7-Bit in the string / chars
11455
     * but the check for ASCII (7-Bit) cost more time, then we can safe here.
11456
     *
11457
     * @param string      $str   <p>The string to be trimmed</p>
11458
     * @param string|null $chars [optional] <p>Optional characters to be stripped</p>
11459
     *
11460
     * @return string the trimmed string
11461
     */
11462
    public static function trim(string $str = '', string $chars = null): string
11463
    {
11464 56
        if ($str === '') {
11465 9
            return '';
11466
        }
11467
11468 49
        if (self::$SUPPORT['mbstring'] === true) {
11469 49
            if ($chars) {
11470
                /** @noinspection PregQuoteUsageInspection */
11471 27
                $chars = \preg_quote($chars);
11472 27
                $pattern = "^[${chars}]+|[${chars}]+\$";
11473
            } else {
11474 22
                $pattern = '^[\\s]+|[\\s]+$';
11475
            }
11476
11477
            /** @noinspection PhpComposerExtensionStubsInspection */
11478 49
            return (string) \mb_ereg_replace($pattern, '', $str);
11479
        }
11480
11481 8
        if ($chars) {
11482
            $chars = \preg_quote($chars, '/');
11483
            $pattern = "^[${chars}]+|[${chars}]+\$";
11484
        } else {
11485 8
            $pattern = '^[\\s]+|[\\s]+$';
11486
        }
11487
11488 8
        return self::regex_replace($str, $pattern, '', '', '/');
11489
    }
11490
11491
    /**
11492
     * Makes string's first char uppercase.
11493
     *
11494
     * @param string      $str                           <p>The input string.</p>
11495
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
11496
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
11497
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt, tr</p>
11498
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
11499
     *
11500
     * @return string the resulting string
11501
     */
11502
    public static function ucfirst(
11503
        string $str,
11504
        string $encoding = 'UTF-8',
11505
        bool $clean_utf8 = false,
11506
        string $lang = null,
11507
        bool $try_to_keep_the_string_length = false
11508
    ): string {
11509 69
        if ($str === '') {
11510 3
            return '';
11511
        }
11512
11513 68
        if ($clean_utf8 === true) {
11514
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11515
            // if invalid characters are found in $haystack before $needle
11516 1
            $str = self::clean($str);
11517
        }
11518
11519 68
        $use_mb_functions = $lang === null && $try_to_keep_the_string_length === false;
11520
11521 68
        if ($encoding === 'UTF-8') {
11522 22
            $str_part_two = (string) \mb_substr($str, 1);
11523
11524 22
            if ($use_mb_functions === true) {
11525 22
                $str_part_one = \mb_strtoupper(
11526 22
                    (string) \mb_substr($str, 0, 1)
11527
                );
11528
            } else {
11529
                $str_part_one = self::strtoupper(
11530
                    (string) \mb_substr($str, 0, 1),
11531
                    $encoding,
11532
                    false,
11533
                    $lang,
11534 22
                    $try_to_keep_the_string_length
11535
                );
11536
            }
11537
        } else {
11538 47
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11539
11540 47
            $str_part_two = (string) self::substr($str, 1, null, $encoding);
11541
11542 47
            if ($use_mb_functions === true) {
11543 47
                $str_part_one = \mb_strtoupper(
11544 47
                    (string) \mb_substr($str, 0, 1, $encoding),
11545 47
                    $encoding
11546
                );
11547
            } else {
11548
                $str_part_one = self::strtoupper(
11549
                    (string) self::substr($str, 0, 1, $encoding),
11550
                    $encoding,
11551
                    false,
11552
                    $lang,
11553
                    $try_to_keep_the_string_length
11554
                );
11555
            }
11556
        }
11557
11558 68
        return $str_part_one . $str_part_two;
11559
    }
11560
11561
    /**
11562
     * alias for "UTF8::ucfirst()"
11563
     *
11564
     * @param string $str
11565
     * @param string $encoding
11566
     * @param bool   $clean_utf8
11567
     *
11568
     * @return string
11569
     *
11570
     * @see UTF8::ucfirst()
11571
     * @deprecated <p>please use "UTF8::ucfirst()"</p>
11572
     */
11573
    public static function ucword(
11574
        string $str,
11575
        string $encoding = 'UTF-8',
11576
        bool $clean_utf8 = false
11577
    ): string {
11578 1
        return self::ucfirst($str, $encoding, $clean_utf8);
11579
    }
11580
11581
    /**
11582
     * Uppercase for all words in the string.
11583
     *
11584
     * @param string   $str        <p>The input string.</p>
11585
     * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
11586
     * @param string   $char_list  [optional] <p>Additional chars that contains to words and do not start a new
11587
     *                             word.</p>
11588
     * @param string   $encoding   [optional] <p>Set the charset.</p>
11589
     * @param bool     $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
11590
     *
11591
     * @return string
11592
     */
11593
    public static function ucwords(
11594
        string $str,
11595
        array $exceptions = [],
11596
        string $char_list = '',
11597
        string $encoding = 'UTF-8',
11598
        bool $clean_utf8 = false
11599
    ): string {
11600 8
        if (!$str) {
11601 2
            return '';
11602
        }
11603
11604
        // INFO: mb_convert_case($str, MB_CASE_TITLE);
11605
        // -> MB_CASE_TITLE didn't only uppercase the first letter, it also lowercase all other letters
11606
11607 7
        if ($clean_utf8 === true) {
11608
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11609
            // if invalid characters are found in $haystack before $needle
11610 1
            $str = self::clean($str);
11611
        }
11612
11613 7
        $use_php_default_functions = !(bool) ($char_list . \implode('', $exceptions));
11614
11615
        if (
11616 7
            $use_php_default_functions === true
11617
            &&
11618 7
            ASCII::is_ascii($str) === true
11619
        ) {
11620
            return \ucwords($str);
11621
        }
11622
11623 7
        $words = self::str_to_words($str, $char_list);
11624 7
        $use_exceptions = $exceptions !== [];
11625
11626 7
        foreach ($words as &$word) {
11627 7
            if (!$word) {
11628 7
                continue;
11629
            }
11630
11631
            if (
11632 7
                $use_exceptions === false
11633
                ||
11634 7
                !\in_array($word, $exceptions, true)
11635
            ) {
11636 7
                $word = self::ucfirst($word, $encoding);
11637
            }
11638
        }
11639
11640 7
        return \implode('', $words);
11641
    }
11642
11643
    /**
11644
     * Multi decode HTML entity + fix urlencoded-win1252-chars.
11645
     *
11646
     * e.g:
11647
     * 'test+test'                     => 'test test'
11648
     * 'D&#252;sseldorf'               => 'Düsseldorf'
11649
     * 'D%FCsseldorf'                  => 'Düsseldorf'
11650
     * 'D&#xFC;sseldorf'               => 'Düsseldorf'
11651
     * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
11652
     * 'Düsseldorf'                   => 'Düsseldorf'
11653
     * 'D%C3%BCsseldorf'               => 'Düsseldorf'
11654
     * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
11655
     * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
11656
     *
11657
     * @param string $str          <p>The input string.</p>
11658
     * @param bool   $multi_decode <p>Decode as often as possible.</p>
11659
     *
11660
     * @return string
11661
     */
11662
    public static function urldecode(string $str, bool $multi_decode = true): string
11663
    {
11664 4
        if ($str === '') {
11665 3
            return '';
11666
        }
11667
11668
        if (
11669 4
            \strpos($str, '&') === false
11670
            &&
11671 4
            \strpos($str, '%') === false
11672
            &&
11673 4
            \strpos($str, '+') === false
11674
            &&
11675 4
            \strpos($str, '\u') === false
11676
        ) {
11677 3
            return self::fix_simple_utf8($str);
11678
        }
11679
11680 4
        $str = self::urldecode_unicode_helper($str);
11681
11682
        do {
11683 4
            $str_compare = $str;
11684
11685
            /**
11686
             * @psalm-suppress PossiblyInvalidArgument
11687
             */
11688 4
            $str = self::fix_simple_utf8(
11689 4
                \urldecode(
11690 4
                    self::html_entity_decode(
11691 4
                        self::to_utf8($str),
11692 4
                        \ENT_QUOTES | \ENT_HTML5
11693
                    )
11694
                )
11695
            );
11696 4
        } while ($multi_decode === true && $str_compare !== $str);
11697
11698 4
        return $str;
11699
    }
11700
11701
    /**
11702
     * Return a array with "urlencoded"-win1252 -> UTF-8
11703
     *
11704
     * @return string[]
11705
     *
11706
     * @deprecated <p>please use the "UTF8::urldecode()" function to decode a string</p>
11707
     */
11708
    public static function urldecode_fix_win1252_chars(): array
11709
    {
11710
        return [
11711 2
            '%20' => ' ',
11712
            '%21' => '!',
11713
            '%22' => '"',
11714
            '%23' => '#',
11715
            '%24' => '$',
11716
            '%25' => '%',
11717
            '%26' => '&',
11718
            '%27' => "'",
11719
            '%28' => '(',
11720
            '%29' => ')',
11721
            '%2A' => '*',
11722
            '%2B' => '+',
11723
            '%2C' => ',',
11724
            '%2D' => '-',
11725
            '%2E' => '.',
11726
            '%2F' => '/',
11727
            '%30' => '0',
11728
            '%31' => '1',
11729
            '%32' => '2',
11730
            '%33' => '3',
11731
            '%34' => '4',
11732
            '%35' => '5',
11733
            '%36' => '6',
11734
            '%37' => '7',
11735
            '%38' => '8',
11736
            '%39' => '9',
11737
            '%3A' => ':',
11738
            '%3B' => ';',
11739
            '%3C' => '<',
11740
            '%3D' => '=',
11741
            '%3E' => '>',
11742
            '%3F' => '?',
11743
            '%40' => '@',
11744
            '%41' => 'A',
11745
            '%42' => 'B',
11746
            '%43' => 'C',
11747
            '%44' => 'D',
11748
            '%45' => 'E',
11749
            '%46' => 'F',
11750
            '%47' => 'G',
11751
            '%48' => 'H',
11752
            '%49' => 'I',
11753
            '%4A' => 'J',
11754
            '%4B' => 'K',
11755
            '%4C' => 'L',
11756
            '%4D' => 'M',
11757
            '%4E' => 'N',
11758
            '%4F' => 'O',
11759
            '%50' => 'P',
11760
            '%51' => 'Q',
11761
            '%52' => 'R',
11762
            '%53' => 'S',
11763
            '%54' => 'T',
11764
            '%55' => 'U',
11765
            '%56' => 'V',
11766
            '%57' => 'W',
11767
            '%58' => 'X',
11768
            '%59' => 'Y',
11769
            '%5A' => 'Z',
11770
            '%5B' => '[',
11771
            '%5C' => '\\',
11772
            '%5D' => ']',
11773
            '%5E' => '^',
11774
            '%5F' => '_',
11775
            '%60' => '`',
11776
            '%61' => 'a',
11777
            '%62' => 'b',
11778
            '%63' => 'c',
11779
            '%64' => 'd',
11780
            '%65' => 'e',
11781
            '%66' => 'f',
11782
            '%67' => 'g',
11783
            '%68' => 'h',
11784
            '%69' => 'i',
11785
            '%6A' => 'j',
11786
            '%6B' => 'k',
11787
            '%6C' => 'l',
11788
            '%6D' => 'm',
11789
            '%6E' => 'n',
11790
            '%6F' => 'o',
11791
            '%70' => 'p',
11792
            '%71' => 'q',
11793
            '%72' => 'r',
11794
            '%73' => 's',
11795
            '%74' => 't',
11796
            '%75' => 'u',
11797
            '%76' => 'v',
11798
            '%77' => 'w',
11799
            '%78' => 'x',
11800
            '%79' => 'y',
11801
            '%7A' => 'z',
11802
            '%7B' => '{',
11803
            '%7C' => '|',
11804
            '%7D' => '}',
11805
            '%7E' => '~',
11806
            '%7F' => '',
11807
            '%80' => '`',
11808
            '%81' => '',
11809
            '%82' => '‚',
11810
            '%83' => 'ƒ',
11811
            '%84' => '„',
11812
            '%85' => '…',
11813
            '%86' => '†',
11814
            '%87' => '‡',
11815
            '%88' => 'ˆ',
11816
            '%89' => '‰',
11817
            '%8A' => 'Š',
11818
            '%8B' => '‹',
11819
            '%8C' => 'Œ',
11820
            '%8D' => '',
11821
            '%8E' => 'Ž',
11822
            '%8F' => '',
11823
            '%90' => '',
11824
            '%91' => '‘',
11825
            '%92' => '’',
11826
            '%93' => '“',
11827
            '%94' => '”',
11828
            '%95' => '•',
11829
            '%96' => '–',
11830
            '%97' => '—',
11831
            '%98' => '˜',
11832
            '%99' => '™',
11833
            '%9A' => 'š',
11834
            '%9B' => '›',
11835
            '%9C' => 'œ',
11836
            '%9D' => '',
11837
            '%9E' => 'ž',
11838
            '%9F' => 'Ÿ',
11839
            '%A0' => '',
11840
            '%A1' => '¡',
11841
            '%A2' => '¢',
11842
            '%A3' => '£',
11843
            '%A4' => '¤',
11844
            '%A5' => '¥',
11845
            '%A6' => '¦',
11846
            '%A7' => '§',
11847
            '%A8' => '¨',
11848
            '%A9' => '©',
11849
            '%AA' => 'ª',
11850
            '%AB' => '«',
11851
            '%AC' => '¬',
11852
            '%AD' => '',
11853
            '%AE' => '®',
11854
            '%AF' => '¯',
11855
            '%B0' => '°',
11856
            '%B1' => '±',
11857
            '%B2' => '²',
11858
            '%B3' => '³',
11859
            '%B4' => '´',
11860
            '%B5' => 'µ',
11861
            '%B6' => '¶',
11862
            '%B7' => '·',
11863
            '%B8' => '¸',
11864
            '%B9' => '¹',
11865
            '%BA' => 'º',
11866
            '%BB' => '»',
11867
            '%BC' => '¼',
11868
            '%BD' => '½',
11869
            '%BE' => '¾',
11870
            '%BF' => '¿',
11871
            '%C0' => 'À',
11872
            '%C1' => 'Á',
11873
            '%C2' => 'Â',
11874
            '%C3' => 'Ã',
11875
            '%C4' => 'Ä',
11876
            '%C5' => 'Å',
11877
            '%C6' => 'Æ',
11878
            '%C7' => 'Ç',
11879
            '%C8' => 'È',
11880
            '%C9' => 'É',
11881
            '%CA' => 'Ê',
11882
            '%CB' => 'Ë',
11883
            '%CC' => 'Ì',
11884
            '%CD' => 'Í',
11885
            '%CE' => 'Î',
11886
            '%CF' => 'Ï',
11887
            '%D0' => 'Ð',
11888
            '%D1' => 'Ñ',
11889
            '%D2' => 'Ò',
11890
            '%D3' => 'Ó',
11891
            '%D4' => 'Ô',
11892
            '%D5' => 'Õ',
11893
            '%D6' => 'Ö',
11894
            '%D7' => '×',
11895
            '%D8' => 'Ø',
11896
            '%D9' => 'Ù',
11897
            '%DA' => 'Ú',
11898
            '%DB' => 'Û',
11899
            '%DC' => 'Ü',
11900
            '%DD' => 'Ý',
11901
            '%DE' => 'Þ',
11902
            '%DF' => 'ß',
11903
            '%E0' => 'à',
11904
            '%E1' => 'á',
11905
            '%E2' => 'â',
11906
            '%E3' => 'ã',
11907
            '%E4' => 'ä',
11908
            '%E5' => 'å',
11909
            '%E6' => 'æ',
11910
            '%E7' => 'ç',
11911
            '%E8' => 'è',
11912
            '%E9' => 'é',
11913
            '%EA' => 'ê',
11914
            '%EB' => 'ë',
11915
            '%EC' => 'ì',
11916
            '%ED' => 'í',
11917
            '%EE' => 'î',
11918
            '%EF' => 'ï',
11919
            '%F0' => 'ð',
11920
            '%F1' => 'ñ',
11921
            '%F2' => 'ò',
11922
            '%F3' => 'ó',
11923
            '%F4' => 'ô',
11924
            '%F5' => 'õ',
11925
            '%F6' => 'ö',
11926
            '%F7' => '÷',
11927
            '%F8' => 'ø',
11928
            '%F9' => 'ù',
11929
            '%FA' => 'ú',
11930
            '%FB' => 'û',
11931
            '%FC' => 'ü',
11932
            '%FD' => 'ý',
11933
            '%FE' => 'þ',
11934
            '%FF' => 'ÿ',
11935
        ];
11936
    }
11937
11938
    /**
11939
     * Decodes a UTF-8 string to ISO-8859-1.
11940
     *
11941
     * @param string $str             <p>The input string.</p>
11942
     * @param bool   $keep_utf8_chars
11943
     *
11944
     * @return string
11945
     */
11946
    public static function utf8_decode(string $str, bool $keep_utf8_chars = false): string
11947
    {
11948 14
        if ($str === '') {
11949 6
            return '';
11950
        }
11951
11952
        // save for later comparision
11953 14
        $str_backup = $str;
11954 14
        $len = \strlen($str);
11955
11956 14
        if (self::$ORD === null) {
11957
            self::$ORD = self::getData('ord');
11958
        }
11959
11960 14
        if (self::$CHR === null) {
11961
            self::$CHR = self::getData('chr');
11962
        }
11963
11964 14
        $no_char_found = '?';
11965
        /** @noinspection ForeachInvariantsInspection */
11966 14
        for ($i = 0, $j = 0; $i < $len; ++$i, ++$j) {
11967 14
            switch ($str[$i] & "\xF0") {
11968 14
                case "\xC0":
11969 13
                case "\xD0":
11970 13
                    $c = (self::$ORD[$str[$i] & "\x1F"] << 6) | self::$ORD[$str[++$i] & "\x3F"];
11971 13
                    $str[$j] = $c < 256 ? self::$CHR[$c] : $no_char_found;
11972
11973 13
                    break;
11974
11975
                /** @noinspection PhpMissingBreakStatementInspection */
11976 13
                case "\xF0":
11977
                    ++$i;
11978
11979
                // no break
11980
11981 13
                case "\xE0":
11982 11
                    $str[$j] = $no_char_found;
11983 11
                    $i += 2;
11984
11985 11
                    break;
11986
11987
                default:
11988 12
                    $str[$j] = $str[$i];
11989
            }
11990
        }
11991
11992
        /** @var false|string $return - needed for PhpStan (stubs error) */
11993 14
        $return = \substr($str, 0, $j);
11994 14
        if ($return === false) {
11995
            $return = '';
11996
        }
11997
11998
        if (
11999 14
            $keep_utf8_chars === true
12000
            &&
12001 14
            (int) self::strlen($return) >= (int) self::strlen($str_backup)
12002
        ) {
12003 2
            return $str_backup;
12004
        }
12005
12006 14
        return $return;
12007
    }
12008
12009
    /**
12010
     * Encodes an ISO-8859-1 string to UTF-8.
12011
     *
12012
     * @param string $str <p>The input string.</p>
12013
     *
12014
     * @return string
12015
     */
12016
    public static function utf8_encode(string $str): string
12017
    {
12018 14
        if ($str === '') {
12019 14
            return '';
12020
        }
12021
12022
        /** @var false|string $str - the polyfill maybe return false */
12023 14
        $str = \utf8_encode($str);
0 ignored issues
show
Bug introduced by
It seems like $str can also be of type false; however, parameter $data of utf8_encode() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

12023
        $str = \utf8_encode(/** @scrutinizer ignore-type */ $str);
Loading history...
12024
12025
        /** @noinspection CallableParameterUseCaseInTypeContextInspection */
12026
        /** @psalm-suppress TypeDoesNotContainType */
12027 14
        if ($str === false) {
12028
            return '';
12029
        }
12030
12031 14
        return $str;
12032
    }
12033
12034
    /**
12035
     * fix -> utf8-win1252 chars
12036
     *
12037
     * @param string $str <p>The input string.</p>
12038
     *
12039
     * @return string
12040
     *
12041
     * @deprecated <p>please use "UTF8::fix_simple_utf8()"</p>
12042
     */
12043
    public static function utf8_fix_win1252_chars(string $str): string
12044
    {
12045 2
        return self::fix_simple_utf8($str);
12046
    }
12047
12048
    /**
12049
     * Returns an array with all utf8 whitespace characters.
12050
     *
12051
     * @see http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
12052
     *
12053
     * @return string[]
12054
     *                  An array with all known whitespace characters as values and the type of whitespace as keys
12055
     *                  as defined in above URL
12056
     */
12057
    public static function whitespace_table(): array
12058
    {
12059 2
        return self::$WHITESPACE_TABLE;
12060
    }
12061
12062
    /**
12063
     * Limit the number of words in a string.
12064
     *
12065
     * @param string $str        <p>The input string.</p>
12066
     * @param int    $limit      <p>The limit of words as integer.</p>
12067
     * @param string $str_add_on <p>Replacement for the striped string.</p>
12068
     *
12069
     * @return string
12070
     */
12071
    public static function words_limit(
12072
        string $str,
12073
        int $limit = 100,
12074
        string $str_add_on = '…'
12075
    ): string {
12076 2
        if ($str === '' || $limit < 1) {
12077 2
            return '';
12078
        }
12079
12080 2
        \preg_match('/^\\s*+(?:[^\\s]++\\s*+){1,' . $limit . '}/u', $str, $matches);
12081
12082
        if (
12083 2
            !isset($matches[0])
12084
            ||
12085 2
            \mb_strlen($str) === (int) \mb_strlen($matches[0])
12086
        ) {
12087 2
            return $str;
12088
        }
12089
12090 2
        return \rtrim($matches[0]) . $str_add_on;
12091
    }
12092
12093
    /**
12094
     * Wraps a string to a given number of characters
12095
     *
12096
     * @see http://php.net/manual/en/function.wordwrap.php
12097
     *
12098
     * @param string $str   <p>The input string.</p>
12099
     * @param int    $width [optional] <p>The column width.</p>
12100
     * @param string $break [optional] <p>The line is broken using the optional break parameter.</p>
12101
     * @param bool   $cut   [optional] <p>
12102
     *                      If the cut is set to true, the string is
12103
     *                      always wrapped at or before the specified width. So if you have
12104
     *                      a word that is larger than the given width, it is broken apart.
12105
     *                      </p>
12106
     *
12107
     * @return string
12108
     *                <p>The given string wrapped at the specified column.</p>
12109
     */
12110
    public static function wordwrap(
12111
        string $str,
12112
        int $width = 75,
12113
        string $break = "\n",
12114
        bool $cut = false
12115
    ): string {
12116 12
        if ($str === '' || $break === '') {
12117 4
            return '';
12118
        }
12119
12120 10
        $str_split = \explode($break, $str);
12121 10
        if ($str_split === false) {
12122
            return '';
12123
        }
12124
12125 10
        $chars = [];
12126 10
        $word_split = '';
12127 10
        foreach ($str_split as $i => $i_value) {
12128 10
            if ($i) {
12129 3
                $chars[] = $break;
12130 3
                $word_split .= '#';
12131
            }
12132
12133 10
            foreach (self::str_split($i_value) as $c) {
12134 10
                $chars[] = $c;
12135 10
                if ($c === ' ') {
12136 3
                    $word_split .= ' ';
12137
                } else {
12138 10
                    $word_split .= '?';
12139
                }
12140
            }
12141
        }
12142
12143 10
        $str_return = '';
12144 10
        $j = 0;
12145 10
        $b = -1;
12146 10
        $i = -1;
12147 10
        $word_split = \wordwrap($word_split, $width, '#', $cut);
12148
12149 10
        $max = \mb_strlen($word_split);
12150 10
        while (($b = \mb_strpos($word_split, '#', $b + 1)) !== false) {
12151 8
            for (++$i; $i < $b; ++$i) {
12152 8
                $str_return .= $chars[$j];
12153 8
                unset($chars[$j++]);
12154
12155
                // prevent endless loop, e.g. if there is a error in the "mb_*" polyfill
12156 8
                if ($i > $max) {
12157
                    break 2;
12158
                }
12159
            }
12160
12161
            if (
12162 8
                $break === $chars[$j]
12163
                ||
12164 8
                $chars[$j] === ' '
12165
            ) {
12166 5
                unset($chars[$j++]);
12167
            }
12168
12169 8
            $str_return .= $break;
12170
12171
            // prevent endless loop, e.g. if there is a error in the "mb_*" polyfill
12172 8
            if ($b > $max) {
12173
                break;
12174
            }
12175
        }
12176
12177 10
        return $str_return . \implode('', $chars);
12178
    }
12179
12180
    /**
12181
     * Line-Wrap the string after $limit, but split the string by "$delimiter" before ...
12182
     *    ... so that we wrap the per line.
12183
     *
12184
     * @param string      $str             <p>The input string.</p>
12185
     * @param int         $width           [optional] <p>The column width.</p>
12186
     * @param string      $break           [optional] <p>The line is broken using the optional break parameter.</p>
12187
     * @param bool        $cut             [optional] <p>
12188
     *                                     If the cut is set to true, the string is
12189
     *                                     always wrapped at or before the specified width. So if you have
12190
     *                                     a word that is larger than the given width, it is broken apart.
12191
     *                                     </p>
12192
     * @param bool        $add_final_break [optional] <p>
12193
     *                                     If this flag is true, then the method will add a $break at the end
12194
     *                                     of the result string.
12195
     *                                     </p>
12196
     * @param string|null $delimiter       [optional] <p>
12197
     *                                     You can change the default behavior, where we split the string by newline.
12198
     *                                     </p>
12199
     *
12200
     * @return string
12201
     */
12202
    public static function wordwrap_per_line(
12203
        string $str,
12204
        int $width = 75,
12205
        string $break = "\n",
12206
        bool $cut = false,
12207
        bool $add_final_break = true,
12208
        string $delimiter = null
12209
    ): string {
12210 1
        if ($delimiter === null) {
12211 1
            $strings = \preg_split('/\\r\\n|\\r|\\n/', $str);
12212
        } else {
12213 1
            $strings = \explode($delimiter, $str);
12214
        }
12215
12216 1
        $string_helper_array = [];
12217 1
        if ($strings !== false) {
12218 1
            foreach ($strings as $value) {
12219 1
                $string_helper_array[] = self::wordwrap($value, $width, $break, $cut);
12220
            }
12221
        }
12222
12223 1
        if ($add_final_break) {
12224 1
            $final_break = $break;
12225
        } else {
12226 1
            $final_break = '';
12227
        }
12228
12229 1
        return \implode($delimiter ?? "\n", $string_helper_array) . $final_break;
12230
    }
12231
12232
    /**
12233
     * Returns an array of Unicode White Space characters.
12234
     *
12235
     * @return string[] an array with numeric code point as key and White Space Character as value
12236
     */
12237
    public static function ws(): array
12238
    {
12239 2
        return self::$WHITESPACE;
12240
    }
12241
12242
    /**
12243
     * Checks whether the passed string contains only byte sequences that are valid UTF-8 characters.
12244
     *
12245
     * @see http://hsivonen.iki.fi/php-utf8/
12246
     *
12247
     * @param string $str    <p>The string to be checked.</p>
12248
     * @param bool   $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
12249
     *
12250
     * @return bool
12251
     *
12252
     * @noinspection ReturnTypeCanBeDeclaredInspection
12253
     */
12254
    private static function is_utf8_string(string $str, bool $strict = false)
12255
    {
12256 108
        if ($str === '') {
12257 14
            return true;
12258
        }
12259
12260 102
        if ($strict === true) {
12261 2
            $is_binary = self::is_binary($str, true);
12262
12263 2
            if ($is_binary && self::is_utf16($str, false) !== false) {
12264 2
                return false;
12265
            }
12266
12267
            if ($is_binary && self::is_utf32($str, false) !== false) {
12268
                return false;
12269
            }
12270
        }
12271
12272 102
        if (self::pcre_utf8_support() !== true) {
12273
            // If even just the first character can be matched, when the /u
12274
            // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
12275
            // invalid, nothing at all will match, even if the string contains
12276
            // some valid sequences
12277
            return \preg_match('/^./us', $str, $ar) === 1;
12278
        }
12279
12280 102
        $mState = 0; // cached expected number of octets after the current octet
12281
        // until the beginning of the next UTF8 character sequence
12282 102
        $mUcs4 = 0; // cached Unicode character
12283 102
        $mBytes = 1; // cached expected number of octets in the current sequence
12284
12285 102
        if (self::$ORD === null) {
12286
            self::$ORD = self::getData('ord');
12287
        }
12288
12289 102
        $len = \strlen($str);
12290
        /** @noinspection ForeachInvariantsInspection */
12291 102
        for ($i = 0; $i < $len; ++$i) {
12292 102
            $in = self::$ORD[$str[$i]];
12293
12294 102
            if ($mState === 0) {
12295
                // When mState is zero we expect either a US-ASCII character or a
12296
                // multi-octet sequence.
12297 102
                if ((0x80 & $in) === 0) {
12298
                    // US-ASCII, pass straight through.
12299 97
                    $mBytes = 1;
12300 83
                } elseif ((0xE0 & $in) === 0xC0) {
12301
                    // First octet of 2 octet sequence.
12302 73
                    $mUcs4 = $in;
12303 73
                    $mUcs4 = ($mUcs4 & 0x1F) << 6;
12304 73
                    $mState = 1;
12305 73
                    $mBytes = 2;
12306 58
                } elseif ((0xF0 & $in) === 0xE0) {
12307
                    // First octet of 3 octet sequence.
12308 42
                    $mUcs4 = $in;
12309 42
                    $mUcs4 = ($mUcs4 & 0x0F) << 12;
12310 42
                    $mState = 2;
12311 42
                    $mBytes = 3;
12312 29
                } elseif ((0xF8 & $in) === 0xF0) {
12313
                    // First octet of 4 octet sequence.
12314 18
                    $mUcs4 = $in;
12315 18
                    $mUcs4 = ($mUcs4 & 0x07) << 18;
12316 18
                    $mState = 3;
12317 18
                    $mBytes = 4;
12318 13
                } elseif ((0xFC & $in) === 0xF8) {
12319
                    /* First octet of 5 octet sequence.
12320
                     *
12321
                     * This is illegal because the encoded codepoint must be either
12322
                     * (a) not the shortest form or
12323
                     * (b) outside the Unicode range of 0-0x10FFFF.
12324
                     * Rather than trying to resynchronize, we will carry on until the end
12325
                     * of the sequence and let the later error handling code catch it.
12326
                     */
12327 5
                    $mUcs4 = $in;
12328 5
                    $mUcs4 = ($mUcs4 & 0x03) << 24;
12329 5
                    $mState = 4;
12330 5
                    $mBytes = 5;
12331 10
                } elseif ((0xFE & $in) === 0xFC) {
12332
                    // First octet of 6 octet sequence, see comments for 5 octet sequence.
12333 5
                    $mUcs4 = $in;
12334 5
                    $mUcs4 = ($mUcs4 & 1) << 30;
12335 5
                    $mState = 5;
12336 5
                    $mBytes = 6;
12337
                } else {
12338
                    // Current octet is neither in the US-ASCII range nor a legal first
12339
                    // octet of a multi-octet sequence.
12340 102
                    return false;
12341
                }
12342 83
            } elseif ((0xC0 & $in) === 0x80) {
12343
12344
                // When mState is non-zero, we expect a continuation of the multi-octet
12345
                // sequence
12346
12347
                // Legal continuation.
12348 75
                $shift = ($mState - 1) * 6;
12349 75
                $tmp = $in;
12350 75
                $tmp = ($tmp & 0x0000003F) << $shift;
12351 75
                $mUcs4 |= $tmp;
12352
                // Prefix: End of the multi-octet sequence. mUcs4 now contains the final
12353
                // Unicode code point to be output.
12354 75
                if (--$mState === 0) {
12355
                    // Check for illegal sequences and code points.
12356
                    //
12357
                    // From Unicode 3.1, non-shortest form is illegal
12358
                    if (
12359 75
                        ($mBytes === 2 && $mUcs4 < 0x0080)
12360
                        ||
12361 75
                        ($mBytes === 3 && $mUcs4 < 0x0800)
12362
                        ||
12363 75
                        ($mBytes === 4 && $mUcs4 < 0x10000)
12364
                        ||
12365 75
                        ($mBytes > 4)
12366
                        ||
12367
                        // From Unicode 3.2, surrogate characters are illegal.
12368 75
                        (($mUcs4 & 0xFFFFF800) === 0xD800)
12369
                        ||
12370
                        // Code points outside the Unicode range are illegal.
12371 75
                        ($mUcs4 > 0x10FFFF)
12372
                    ) {
12373 8
                        return false;
12374
                    }
12375
                    // initialize UTF8 cache
12376 75
                    $mState = 0;
12377 75
                    $mUcs4 = 0;
12378 75
                    $mBytes = 1;
12379
                }
12380
            } else {
12381
                // ((0xC0 & (*in) != 0x80) && (mState != 0))
12382
                // Incomplete multi-octet sequence.
12383 35
                return false;
12384
            }
12385
        }
12386
12387 67
        return true;
12388
    }
12389
12390
    /**
12391
     * @param string $str
12392
     * @param bool   $use_lowercase      <p>Use uppercase by default, otherwise use lowercase.</p>
12393
     * @param bool   $use_full_case_fold <p>Convert not only common cases.</p>
12394
     *
12395
     * @return string
12396
     *
12397
     * @noinspection ReturnTypeCanBeDeclaredInspection
12398
     */
12399
    private static function fixStrCaseHelper(
12400
        string $str,
12401
        $use_lowercase = false,
12402
        $use_full_case_fold = false
12403
    ) {
12404 33
        $upper = self::$COMMON_CASE_FOLD['upper'];
12405 33
        $lower = self::$COMMON_CASE_FOLD['lower'];
12406
12407 33
        if ($use_lowercase === true) {
12408 2
            $str = \str_replace(
12409 2
                $upper,
12410 2
                $lower,
12411 2
                $str
12412
            );
12413
        } else {
12414 31
            $str = \str_replace(
12415 31
                $lower,
12416 31
                $upper,
12417 31
                $str
12418
            );
12419
        }
12420
12421 33
        if ($use_full_case_fold) {
12422 31
            static $FULL_CASE_FOLD = null;
12423 31
            if ($FULL_CASE_FOLD === null) {
12424 1
                $FULL_CASE_FOLD = self::getData('caseFolding_full');
12425
            }
12426
12427 31
            if ($use_lowercase === true) {
12428 2
                $str = \str_replace($FULL_CASE_FOLD[0], $FULL_CASE_FOLD[1], $str);
12429
            } else {
12430 29
                $str = \str_replace($FULL_CASE_FOLD[1], $FULL_CASE_FOLD[0], $str);
12431
            }
12432
        }
12433
12434 33
        return $str;
12435
    }
12436
12437
    /**
12438
     * get data from "/data/*.php"
12439
     *
12440
     * @param string $file
12441
     *
12442
     * @return array
12443
     *
12444
     * @noinspection ReturnTypeCanBeDeclaredInspection
12445
     */
12446
    private static function getData(string $file)
12447
    {
12448
        /** @noinspection PhpIncludeInspection */
12449
        /** @noinspection UsingInclusionReturnValueInspection */
12450
        /** @psalm-suppress UnresolvableInclude */
12451 6
        return include __DIR__ . '/data/' . $file . '.php';
12452
    }
12453
12454
    /**
12455
     * @return true|null
12456
     */
12457
    private static function initEmojiData()
12458
    {
12459 12
        if (self::$EMOJI_KEYS_CACHE === null) {
12460 1
            if (self::$EMOJI === null) {
12461 1
                self::$EMOJI = self::getData('emoji');
12462
            }
12463
12464 1
            \uksort(
12465 1
                self::$EMOJI,
12466
                static function (string $a, string $b): int {
12467 1
                    return \strlen($b) <=> \strlen($a);
12468 1
                }
12469
            );
12470
12471 1
            self::$EMOJI_KEYS_CACHE = \array_keys(self::$EMOJI);
12472 1
            self::$EMOJI_VALUES_CACHE = \array_values(self::$EMOJI);
12473
12474 1
            foreach (self::$EMOJI_KEYS_CACHE as $key) {
12475 1
                $tmp_key = \crc32($key);
12476 1
                self::$EMOJI_KEYS_REVERSIBLE_CACHE[] = '_-_PORTABLE_UTF8_-_' . $tmp_key . '_-_' . \strrev((string) $tmp_key) . '_-_8FTU_ELBATROP_-_';
12477
            }
12478
12479 1
            return true;
12480
        }
12481
12482 12
        return null;
12483
    }
12484
12485
    /**
12486
     * Checks whether mbstring "overloaded" is active on the server.
12487
     *
12488
     * @return bool
12489
     *
12490
     * @noinspection ReturnTypeCanBeDeclaredInspection
12491
     */
12492
    private static function mbstring_overloaded()
12493
    {
12494
        /**
12495
         * INI directive 'mbstring.func_overload' is deprecated since PHP 7.2
12496
         */
12497
12498
        /** @noinspection PhpComposerExtensionStubsInspection */
12499
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
12500
        return \defined('MB_OVERLOAD_STRING')
12501
               &&
12502
               ((int) @\ini_get('mbstring.func_overload') & \MB_OVERLOAD_STRING);
12503
    }
12504
12505
    /**
12506
     * @param array    $strings
12507
     * @param bool     $remove_empty_values
12508
     * @param int|null $remove_short_values
12509
     *
12510
     * @return array
12511
     *
12512
     * @noinspection ReturnTypeCanBeDeclaredInspection
12513
     */
12514
    private static function reduce_string_array(
12515
        array $strings,
12516
        bool $remove_empty_values,
12517
        int $remove_short_values = null
12518
    ) {
12519
        // init
12520 2
        $return = [];
12521
12522 2
        foreach ($strings as &$str) {
12523
            if (
12524 2
                $remove_short_values !== null
12525
                &&
12526 2
                \mb_strlen($str) <= $remove_short_values
12527
            ) {
12528 2
                continue;
12529
            }
12530
12531
            if (
12532 2
                $remove_empty_values === true
12533
                &&
12534 2
                \trim($str) === ''
12535
            ) {
12536 2
                continue;
12537
            }
12538
12539 2
            $return[] = $str;
12540
        }
12541
12542 2
        return $return;
12543
    }
12544
12545
    /**
12546
     * rxClass
12547
     *
12548
     * @param string $s
12549
     * @param string $class
12550
     *
12551
     * @return string
12552
     *
12553
     * @noinspection ReturnTypeCanBeDeclaredInspection
12554
     */
12555
    private static function rxClass(string $s, string $class = '')
12556
    {
12557 33
        static $RX_CLASS_CACHE = [];
12558
12559 33
        $cache_key = $s . $class;
12560
12561 33
        if (isset($RX_CLASS_CACHE[$cache_key])) {
12562 21
            return $RX_CLASS_CACHE[$cache_key];
12563
        }
12564
12565 16
        $class_array = [$class];
12566
12567
        /** @noinspection SuspiciousLoopInspection */
12568
        /** @noinspection AlterInForeachInspection */
12569 16
        foreach (self::str_split($s) as &$s) {
0 ignored issues
show
Bug introduced by
The expression self::str_split($s) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
12570 15
            if ($s === '-') {
12571
                $class_array[0] = '-' . $class_array[0];
12572 15
            } elseif (!isset($s[2])) {
12573 15
                $class_array[0] .= \preg_quote($s, '/');
12574 1
            } elseif (self::strlen($s) === 1) {
12575 1
                $class_array[0] .= $s;
12576
            } else {
12577 15
                $class_array[] = $s;
12578
            }
12579
        }
12580
12581 16
        if ($class_array[0]) {
12582 16
            $class_array[0] = '[' . $class_array[0] . ']';
12583
        }
12584
12585 16
        if (\count($class_array) === 1) {
12586 16
            $return = $class_array[0];
12587
        } else {
12588
            $return = '(?:' . \implode('|', $class_array) . ')';
12589
        }
12590
12591 16
        $RX_CLASS_CACHE[$cache_key] = $return;
12592
12593 16
        return $return;
12594
    }
12595
12596
    /**
12597
     * Personal names such as "Marcus Aurelius" are sometimes typed incorrectly using lowercase ("marcus aurelius").
12598
     *
12599
     * @param string $names
12600
     * @param string $delimiter
12601
     * @param string $encoding
12602
     *
12603
     * @return string
12604
     *
12605
     * @noinspection ReturnTypeCanBeDeclaredInspection
12606
     */
12607
    private static function str_capitalize_name_helper(
12608
        string $names,
12609
        string $delimiter,
12610
        string $encoding = 'UTF-8'
12611
    ) {
12612
        // init
12613 1
        $name_helper_array = \explode($delimiter, $names);
12614 1
        if ($name_helper_array === false) {
12615
            return '';
12616
        }
12617
12618
        $special_cases = [
12619 1
            'names' => [
12620
                'ab',
12621
                'af',
12622
                'al',
12623
                'and',
12624
                'ap',
12625
                'bint',
12626
                'binte',
12627
                'da',
12628
                'de',
12629
                'del',
12630
                'den',
12631
                'der',
12632
                'di',
12633
                'dit',
12634
                'ibn',
12635
                'la',
12636
                'mac',
12637
                'nic',
12638
                'of',
12639
                'ter',
12640
                'the',
12641
                'und',
12642
                'van',
12643
                'von',
12644
                'y',
12645
                'zu',
12646
            ],
12647
            'prefixes' => [
12648
                'al-',
12649
                "d'",
12650
                'ff',
12651
                "l'",
12652
                'mac',
12653
                'mc',
12654
                'nic',
12655
            ],
12656
        ];
12657
12658 1
        foreach ($name_helper_array as &$name) {
12659 1
            if (\in_array($name, $special_cases['names'], true)) {
12660 1
                continue;
12661
            }
12662
12663 1
            $continue = false;
12664
12665 1
            if ($delimiter === '-') {
12666
                /** @noinspection AlterInForeachInspection */
12667 1
                foreach ((array) $special_cases['names'] as &$beginning) {
12668 1
                    if (self::strpos($name, $beginning, 0, $encoding) === 0) {
12669 1
                        $continue = true;
12670
                    }
12671
                }
12672
            }
12673
12674
            /** @noinspection AlterInForeachInspection */
12675 1
            foreach ((array) $special_cases['prefixes'] as &$beginning) {
12676 1
                if (self::strpos($name, $beginning, 0, $encoding) === 0) {
12677 1
                    $continue = true;
12678
                }
12679
            }
12680
12681 1
            if ($continue === true) {
12682 1
                continue;
12683
            }
12684
12685 1
            $name = self::ucfirst($name);
12686
        }
12687
12688 1
        return \implode($delimiter, $name_helper_array);
12689
    }
12690
12691
    /**
12692
     * Generic case-sensitive transformation for collation matching.
12693
     *
12694
     * @param string $str <p>The input string</p>
12695
     *
12696
     * @return string|null
12697
     */
12698
    private static function strtonatfold(string $str)
12699
    {
12700
        /** @noinspection PhpUndefinedClassInspection */
12701 6
        return \preg_replace(
12702 6
            '/\p{Mn}+/u',
12703 6
            '',
12704 6
            \Normalizer::normalize($str, \Normalizer::NFD)
12705
        );
12706
    }
12707
12708
    /**
12709
     * @param int|string $input
12710
     *
12711
     * @return string
12712
     *
12713
     * @noinspection ReturnTypeCanBeDeclaredInspection
12714
     */
12715
    private static function to_utf8_convert_helper($input)
12716
    {
12717
        // init
12718 31
        $buf = '';
12719
12720 31
        if (self::$ORD === null) {
12721 1
            self::$ORD = self::getData('ord');
12722
        }
12723
12724 31
        if (self::$CHR === null) {
12725 1
            self::$CHR = self::getData('chr');
12726
        }
12727
12728 31
        if (self::$WIN1252_TO_UTF8 === null) {
12729 1
            self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
12730
        }
12731
12732 31
        $ordC1 = self::$ORD[$input];
12733 31
        if (isset(self::$WIN1252_TO_UTF8[$ordC1])) { // found in Windows-1252 special cases
12734 31
            $buf .= self::$WIN1252_TO_UTF8[$ordC1];
12735
        } else {
12736
            /** @noinspection OffsetOperationsInspection */
12737 1
            $cc1 = self::$CHR[$ordC1 / 64] | "\xC0";
12738 1
            $cc2 = ((string) $input & "\x3F") | "\x80";
0 ignored issues
show
Bug introduced by
Are you sure you want to use the bitwise & or did you mean &&?
Loading history...
Bug introduced by
Are you sure you want to use the bitwise | or did you mean ||?
Loading history...
12739 1
            $buf .= $cc1 . $cc2;
12740
        }
12741
12742 31
        return $buf;
12743
    }
12744
12745
    /**
12746
     * @param string $str
12747
     *
12748
     * @return string
12749
     *
12750
     * @noinspection ReturnTypeCanBeDeclaredInspection
12751
     */
12752
    private static function urldecode_unicode_helper(string $str)
12753
    {
12754 9
        $pattern = '/%u([0-9a-fA-F]{3,4})/';
12755 9
        if (\preg_match($pattern, $str)) {
12756 7
            $str = (string) \preg_replace($pattern, '&#x\\1;', $str);
12757
        }
12758
12759 9
        return $str;
12760
    }
12761
}
12762