Passed
Push — master ( 713c72...ecb35d )
by Lars
03:55
created

UTF8::to_filename()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 9
Code Lines 4

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 4
CRAP Score 1

Importance

Changes 3
Bugs 0 Features 0
Metric Value
cc 1
eloc 4
c 3
b 0
f 0
nc 1
nop 3
dl 0
loc 9
ccs 4
cts 4
cp 1
crap 1
rs 10
1
<?php
2
3
declare(strict_types=1);
4
5
namespace voku\helper;
6
7
final class UTF8
8
{
9
    /**
10
     * (CRLF|([ZWNJ-ZWJ]|T+|L*(LV?V+|LV|LVT)T*|L+|[^Control])[Extend]*|[Control])
11
     * This regular expression is a work around for http://bugs.exim.org/1279
12
     */
13
    const GRAPHEME_CLUSTER_RX = "(?:\r\n|(?:[ -~\x{200C}\x{200D}]|[ᆨ-ᇹ]+|[ᄀ-ᅟ]*(?:[가개갸걔거게겨계고과괘괴교구궈궤귀규그긔기까깨꺄꺠꺼께껴꼐꼬꽈꽤꾀꾜꾸꿔꿰뀌뀨끄끠끼나내냐냬너네녀녜노놔놰뇌뇨누눠눼뉘뉴느늬니다대댜댸더데뎌뎨도돠돼되됴두둬뒈뒤듀드듸디따때땨떄떠떼뗘뗴또똬뙈뙤뚀뚜뚸뛔뛰뜌뜨띄띠라래랴럐러레려례로롸뢔뢰료루뤄뤠뤼류르릐리마매먀먜머메며몌모뫄뫠뫼묘무뭐뭬뮈뮤므믜미바배뱌뱨버베벼볘보봐봬뵈뵤부붜붸뷔뷰브븨비빠빼뺘뺴뻐뻬뼈뼤뽀뽜뽸뾔뾰뿌뿨쀄쀠쀼쁘쁴삐사새샤섀서세셔셰소솨쇄쇠쇼수숴쉐쉬슈스싀시싸쌔쌰썌써쎄쎠쎼쏘쏴쐐쐬쑈쑤쒀쒜쒸쓔쓰씌씨아애야얘어에여예오와왜외요우워웨위유으의이자재쟈쟤저제져졔조좌좨죄죠주줘줴쥐쥬즈즤지짜째쨔쨰쩌쩨쪄쪠쪼쫘쫴쬐쬬쭈쭤쮀쮜쮸쯔쯰찌차채챠챼처체쳐쳬초촤쵀최쵸추춰췌취츄츠츼치카캐캬컈커케켜켸코콰쾌쾨쿄쿠쿼퀘퀴큐크킈키타태탸턔터테텨톄토톼퇘퇴툐투퉈퉤튀튜트틔티파패퍄퍠퍼페펴폐포퐈퐤푀표푸풔풰퓌퓨프픠피하해햐햬허헤혀혜호화홰회효후훠훼휘휴흐희히]?[ᅠ-ᆢ]+|[가-힣])[ᆨ-ᇹ]*|[ᄀ-ᅟ]+|[^\p{Cc}\p{Cf}\p{Zl}\p{Zp}])[\p{Mn}\p{Me}\x{09BE}\x{09D7}\x{0B3E}\x{0B57}\x{0BBE}\x{0BD7}\x{0CC2}\x{0CD5}\x{0CD6}\x{0D3E}\x{0D57}\x{0DCF}\x{0DDF}\x{200C}\x{200D}\x{1D165}\x{1D16E}-\x{1D172}]*|[\p{Cc}\p{Cf}\p{Zl}\p{Zp}])";
14
15
    /**
16
     * Bom => Byte-Length
17
     *
18
     * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
19
     *
20
     * @var array<string, int>
21
     */
22
    private static $BOM = [
23
        "\xef\xbb\xbf"     => 3, // UTF-8 BOM
24
        ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
25
        "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
26
        '  þÿ'             => 6, // UTF-32 (BE) BOM as "WINDOWS-1252"
27
        "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
28
        'ÿþ  '             => 6, // UTF-32 (LE) BOM as "WINDOWS-1252"
29
        "\xfe\xff"         => 2, // UTF-16 (BE) BOM
30
        'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
31
        "\xff\xfe"         => 2, // UTF-16 (LE) BOM
32
        'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
33
    ];
34
35
    /**
36
     * Numeric code point => UTF-8 Character
37
     *
38
     * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
39
     *
40
     * @var array<int, string>
41
     */
42
    private static $WHITESPACE = [
43
        // NUL Byte
44
        0 => "\x0",
45
        // Tab
46
        9 => "\x9",
47
        // New Line
48
        10 => "\xa",
49
        // Vertical Tab
50
        11 => "\xb",
51
        // Carriage Return
52
        13 => "\xd",
53
        // Ordinary Space
54
        32 => "\x20",
55
        // NO-BREAK SPACE
56
        160 => "\xc2\xa0",
57
        // OGHAM SPACE MARK
58
        5760 => "\xe1\x9a\x80",
59
        // MONGOLIAN VOWEL SEPARATOR
60
        6158 => "\xe1\xa0\x8e",
61
        // EN QUAD
62
        8192 => "\xe2\x80\x80",
63
        // EM QUAD
64
        8193 => "\xe2\x80\x81",
65
        // EN SPACE
66
        8194 => "\xe2\x80\x82",
67
        // EM SPACE
68
        8195 => "\xe2\x80\x83",
69
        // THREE-PER-EM SPACE
70
        8196 => "\xe2\x80\x84",
71
        // FOUR-PER-EM SPACE
72
        8197 => "\xe2\x80\x85",
73
        // SIX-PER-EM SPACE
74
        8198 => "\xe2\x80\x86",
75
        // FIGURE SPACE
76
        8199 => "\xe2\x80\x87",
77
        // PUNCTUATION SPACE
78
        8200 => "\xe2\x80\x88",
79
        // THIN SPACE
80
        8201 => "\xe2\x80\x89",
81
        //HAIR SPACE
82
        8202 => "\xe2\x80\x8a",
83
        // LINE SEPARATOR
84
        8232 => "\xe2\x80\xa8",
85
        // PARAGRAPH SEPARATOR
86
        8233 => "\xe2\x80\xa9",
87
        // NARROW NO-BREAK SPACE
88
        8239 => "\xe2\x80\xaf",
89
        // MEDIUM MATHEMATICAL SPACE
90
        8287 => "\xe2\x81\x9f",
91
        // HALFWIDTH HANGUL FILLER
92
        65440 => "\xef\xbe\xa0",
93
        // IDEOGRAPHIC SPACE
94
        12288 => "\xe3\x80\x80",
95
    ];
96
97
    /**
98
     * @var array<string, string>
99
     */
100
    private static $WHITESPACE_TABLE = [
101
        'SPACE'                     => "\x20",
102
        'NO-BREAK SPACE'            => "\xc2\xa0",
103
        'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
104
        'EN QUAD'                   => "\xe2\x80\x80",
105
        'EM QUAD'                   => "\xe2\x80\x81",
106
        'EN SPACE'                  => "\xe2\x80\x82",
107
        'EM SPACE'                  => "\xe2\x80\x83",
108
        'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
109
        'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
110
        'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
111
        'FIGURE SPACE'              => "\xe2\x80\x87",
112
        'PUNCTUATION SPACE'         => "\xe2\x80\x88",
113
        'THIN SPACE'                => "\xe2\x80\x89",
114
        'HAIR SPACE'                => "\xe2\x80\x8a",
115
        'LINE SEPARATOR'            => "\xe2\x80\xa8",
116
        'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
117
        'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
118
        'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
119
        'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
120
        'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
121
        'HALFWIDTH HANGUL FILLER'   => "\xef\xbe\xa0",
122
    ];
123
124
    /**
125
     * @var array{upper: string[], lower: string[]}
126
     */
127
    private static $COMMON_CASE_FOLD = [
128
        'upper' => [
129
            'µ',
130
            'ſ',
131
            "\xCD\x85",
132
            'ς',
133
            'ẞ',
134
            "\xCF\x90",
135
            "\xCF\x91",
136
            "\xCF\x95",
137
            "\xCF\x96",
138
            "\xCF\xB0",
139
            "\xCF\xB1",
140
            "\xCF\xB5",
141
            "\xE1\xBA\x9B",
142
            "\xE1\xBE\xBE",
143
        ],
144
        'lower' => [
145
            'μ',
146
            's',
147
            'ι',
148
            'σ',
149
            'ß',
150
            'β',
151
            'θ',
152
            'φ',
153
            'π',
154
            'κ',
155
            'ρ',
156
            'ε',
157
            "\xE1\xB9\xA1",
158
            'ι',
159
        ],
160
    ];
161
162
    /**
163
     * @var array<string, mixed>
164
     */
165
    private static $SUPPORT = [];
166
167
    /**
168
     * @var array<string, string>|null
169
     */
170
    private static $BROKEN_UTF8_FIX;
171
172
    /**
173
     * @var array<int, string>|null
174
     */
175
    private static $WIN1252_TO_UTF8;
176
177
    /**
178
     * @var array<int ,string>|null
0 ignored issues
show
Documentation Bug introduced by
The doc comment array<int at position 2 could not be parsed: Expected '>' at position 2, but found 'int'.
Loading history...
179
     */
180
    private static $INTL_TRANSLITERATOR_LIST;
181
182
    /**
183
     * @var array<string>|null
184
     */
185
    private static $ENCODINGS;
186
187
    /**
188
     * @var array<string ,int>|null
0 ignored issues
show
Documentation Bug introduced by
The doc comment array<string at position 2 could not be parsed: Expected '>' at position 2, but found 'string'.
Loading history...
189
     */
190
    private static $ORD;
191
192
    /**
193
     * @var array<string, string>|null
194
     */
195
    private static $EMOJI;
196
197
    /**
198
     * @var array<string>|null
199
     */
200
    private static $EMOJI_VALUES_CACHE;
201
202
    /**
203
     * @var array<string>|null
204
     */
205
    private static $EMOJI_KEYS_CACHE;
206
207
    /**
208
     * @var array<string>|null
209
     */
210
    private static $EMOJI_KEYS_REVERSIBLE_CACHE;
211
212
    /**
213
     * @var array<int, string>|null
214
     */
215
    private static $CHR;
216
217
    /**
218
     * __construct()
219
     */
220 33
    public function __construct()
221
    {
222 33
    }
223
224
    /**
225
     * Return the character at the specified position: $str[1] like functionality.
226
     *
227
     * @param string $str      <p>A UTF-8 string.</p>
228
     * @param int    $pos      <p>The position of character to return.</p>
229
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
230
     *
231
     * @return string single multi-byte character
232
     */
233 3
    public static function access(string $str, int $pos, string $encoding = 'UTF-8'): string
234
    {
235 3
        if ($str === '' || $pos < 0) {
236 2
            return '';
237
        }
238
239 3
        if ($encoding === 'UTF-8') {
240 3
            return (string) \mb_substr($str, $pos, 1);
241
        }
242
243
        return (string) self::substr($str, $pos, 1, $encoding);
244
    }
245
246
    /**
247
     * Prepends UTF-8 BOM character to the string and returns the whole string.
248
     *
249
     * INFO: If BOM already existed there, the Input string is returned.
250
     *
251
     * @param string $str <p>The input string.</p>
252
     *
253
     * @return string the output string that contains BOM
254
     */
255 2
    public static function add_bom_to_string(string $str): string
256
    {
257 2
        if (self::string_has_bom($str) === false) {
258 2
            $str = self::bom() . $str;
259
        }
260
261 2
        return $str;
262
    }
263
264
    /**
265
     * Changes all keys in an array.
266
     *
267
     * @param array<string, mixed> $array    <p>The array to work on</p>
268
     * @param int                  $case     [optional] <p> Either <strong>CASE_UPPER</strong><br>
269
     *                                       or <strong>CASE_LOWER</strong> (default)</p>
270
     * @param string               $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
271
     *
272
     * @return string[]
273
     *                  <p>An array with its keys lower- or uppercased.</p>
274
     */
275 2
    public static function array_change_key_case(
276
        array $array,
277
        int $case = \CASE_LOWER,
278
        string $encoding = 'UTF-8'
279
    ): array {
280
        if (
281 2
            $case !== \CASE_LOWER
282
            &&
283 2
            $case !== \CASE_UPPER
284
        ) {
285
            $case = \CASE_LOWER;
286
        }
287
288 2
        $return = [];
289 2
        foreach ($array as $key => &$value) {
290 2
            $key = $case === \CASE_LOWER
291 2
                ? self::strtolower((string) $key, $encoding)
292 2
                : self::strtoupper((string) $key, $encoding);
293
294 2
            $return[$key] = $value;
295
        }
296
297 2
        return $return;
298
    }
299
300
    /**
301
     * Returns the substring between $start and $end, if found, or an empty
302
     * string. An optional offset may be supplied from which to begin the
303
     * search for the start string.
304
     *
305
     * @param string $str
306
     * @param string $start    <p>Delimiter marking the start of the substring.</p>
307
     * @param string $end      <p>Delimiter marking the end of the substring.</p>
308
     * @param int    $offset   [optional] <p>Index from which to begin the search. Default: 0</p>
309
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
310
     *
311
     * @return string
312
     */
313 16
    public static function between(
314
        string $str,
315
        string $start,
316
        string $end,
317
        int $offset = 0,
318
        string $encoding = 'UTF-8'
319
    ): string {
320 16
        if ($encoding === 'UTF-8') {
321 8
            $start_position = \mb_strpos($str, $start, $offset);
322 8
            if ($start_position === false) {
323 1
                return '';
324
            }
325
326 7
            $substr_index = $start_position + (int) \mb_strlen($start);
327 7
            $end_position = \mb_strpos($str, $end, $substr_index);
328
            if (
329 7
                $end_position === false
330
                ||
331 7
                $end_position === $substr_index
332
            ) {
333 2
                return '';
334
            }
335
336 5
            return (string) \mb_substr($str, $substr_index, $end_position - $substr_index);
337
        }
338
339 8
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
340
341 8
        $start_position = self::strpos($str, $start, $offset, $encoding);
342 8
        if ($start_position === false) {
343 1
            return '';
344
        }
345
346 7
        $substr_index = $start_position + (int) self::strlen($start, $encoding);
347 7
        $end_position = self::strpos($str, $end, $substr_index, $encoding);
348
        if (
349 7
            $end_position === false
350
            ||
351 7
            $end_position === $substr_index
352
        ) {
353 2
            return '';
354
        }
355
356 5
        return (string) self::substr(
357 5
            $str,
358 5
            $substr_index,
359 5
            $end_position - $substr_index,
360 5
            $encoding
361
        );
362
    }
363
364
    /**
365
     * Convert binary into a string.
366
     *
367
     * @param mixed $bin 1|0
368
     *
369
     * @return string
370
     */
371 2
    public static function binary_to_str($bin): string
372
    {
373 2
        if (!isset($bin[0])) {
374
            return '';
375
        }
376
377 2
        $convert = \base_convert($bin, 2, 16);
378 2
        if ($convert === '0') {
379 1
            return '';
380
        }
381
382 2
        return \pack('H*', $convert);
383
    }
384
385
    /**
386
     * Returns the UTF-8 Byte Order Mark Character.
387
     *
388
     * INFO: take a look at UTF8::$bom for e.g. UTF-16 and UTF-32 BOM values
389
     *
390
     * @return string UTF-8 Byte Order Mark
391
     */
392 4
    public static function bom(): string
393
    {
394 4
        return "\xef\xbb\xbf";
395
    }
396
397
    /**
398
     * @alias of UTF8::chr_map()
399
     *
400
     * @param callable $callback
401
     * @param string   $str
402
     *
403
     * @return string[]
404
     *
405
     * @see UTF8::chr_map()
406
     */
407 2
    public static function callback($callback, string $str): array
408
    {
409 2
        return self::chr_map($callback, $str);
410
    }
411
412
    /**
413
     * Returns the character at $index, with indexes starting at 0.
414
     *
415
     * @param string $str      <p>The input string.</p>
416
     * @param int    $index    <p>Position of the character.</p>
417
     * @param string $encoding [optional] <p>Default is UTF-8</p>
418
     *
419
     * @return string the character at $index
420
     */
421 9
    public static function char_at(string $str, int $index, string $encoding = 'UTF-8'): string
422
    {
423 9
        if ($encoding === 'UTF-8') {
424 5
            return (string) \mb_substr($str, $index, 1);
425
        }
426
427 4
        return (string) self::substr($str, $index, 1, $encoding);
428
    }
429
430
    /**
431
     * Returns an array consisting of the characters in the string.
432
     *
433
     * @param string $str <p>The input string.</p>
434
     *
435
     * @return string[] an array of chars
436
     */
437 3
    public static function chars(string $str): array
438
    {
439 3
        return self::str_split($str);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::str_split($str) returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
440
    }
441
442
    /**
443
     * This method will auto-detect your server environment for UTF-8 support.
444
     *
445
     * @return true|null
446
     *
447
     * @internal <p>You don't need to run it manually, it will be triggered if it's needed.</p>
448
     */
449 5
    public static function checkForSupport()
450
    {
451 5
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
452
            self::$SUPPORT['already_checked_via_portable_utf8'] = true;
453
454
            // http://php.net/manual/en/book.mbstring.php
455
            self::$SUPPORT['mbstring'] = self::mbstring_loaded();
456
            self::$SUPPORT['mbstring_func_overload'] = self::mbstring_overloaded();
457
            if (self::$SUPPORT['mbstring'] === true) {
458
                \mb_internal_encoding('UTF-8');
459
                /** @noinspection UnusedFunctionResultInspection */
460
                /** @noinspection PhpComposerExtensionStubsInspection */
461
                \mb_regex_encoding('UTF-8');
462
                self::$SUPPORT['mbstring_internal_encoding'] = 'UTF-8';
463
            }
464
465
            // http://php.net/manual/en/book.iconv.php
466
            self::$SUPPORT['iconv'] = self::iconv_loaded();
467
468
            // http://php.net/manual/en/book.intl.php
469
            self::$SUPPORT['intl'] = self::intl_loaded();
470
471
            // http://php.net/manual/en/class.intlchar.php
472
            self::$SUPPORT['intlChar'] = self::intlChar_loaded();
473
474
            // http://php.net/manual/en/book.ctype.php
475
            self::$SUPPORT['ctype'] = self::ctype_loaded();
476
477
            // http://php.net/manual/en/class.finfo.php
478
            self::$SUPPORT['finfo'] = self::finfo_loaded();
479
480
            // http://php.net/manual/en/book.json.php
481
            self::$SUPPORT['json'] = self::json_loaded();
482
483
            // http://php.net/manual/en/book.pcre.php
484
            self::$SUPPORT['pcre_utf8'] = self::pcre_utf8_support();
485
486
            self::$SUPPORT['symfony_polyfill_used'] = self::symfony_polyfill_used();
487
            if (self::$SUPPORT['symfony_polyfill_used'] === true) {
488
                \mb_internal_encoding('UTF-8');
489
                self::$SUPPORT['mbstring_internal_encoding'] = 'UTF-8';
490
            }
491
492
            return true;
493
        }
494
495 5
        return null;
496
    }
497
498
    /**
499
     * Generates a UTF-8 encoded character from the given code point.
500
     *
501
     * INFO: opposite to UTF8::ord()
502
     *
503
     * @param int|string $code_point <p>The code point for which to generate a character.</p>
504
     * @param string     $encoding   [optional] <p>Default is UTF-8</p>
505
     *
506
     * @return string|null multi-byte character, returns null on failure or empty input
507
     */
508 21
    public static function chr($code_point, string $encoding = 'UTF-8')
509
    {
510
        // init
511 21
        static $CHAR_CACHE = [];
512
513 21
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
514 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
515
        }
516
517
        if (
518 21
            $encoding !== 'UTF-8'
519
            &&
520 21
            $encoding !== 'ISO-8859-1'
521
            &&
522 21
            $encoding !== 'WINDOWS-1252'
523
            &&
524 21
            self::$SUPPORT['mbstring'] === false
525
        ) {
526
            \trigger_error('UTF8::chr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
527
        }
528
529 21
        $cache_key = $code_point . $encoding;
530 21
        if (isset($CHAR_CACHE[$cache_key]) === true) {
531 19
            return $CHAR_CACHE[$cache_key];
532
        }
533
534 12
        if ($code_point <= 127) { // use "simple"-char only until "\x80"
535
536 12
            if (self::$CHR === null) {
537
                self::$CHR = self::getData('chr');
538
            }
539
540
            /**
541
             * @psalm-suppress PossiblyNullArrayAccess
542
             */
543 12
            $chr = self::$CHR[$code_point];
544
545 12
            if ($encoding !== 'UTF-8') {
546 1
                $chr = self::encode($encoding, $chr);
547
            }
548
549 12
            return $CHAR_CACHE[$cache_key] = $chr;
550
        }
551
552
        //
553
        // fallback via "IntlChar"
554
        //
555
556 5
        if (self::$SUPPORT['intlChar'] === true) {
557
            /** @noinspection PhpComposerExtensionStubsInspection */
558 5
            $chr = \IntlChar::chr($code_point);
559
560 5
            if ($encoding !== 'UTF-8') {
561
                $chr = self::encode($encoding, $chr);
562
            }
563
564 5
            return $CHAR_CACHE[$cache_key] = $chr;
565
        }
566
567
        //
568
        // fallback via vanilla php
569
        //
570
571
        if (self::$CHR === null) {
572
            self::$CHR = self::getData('chr');
573
        }
574
575
        $code_point = (int) $code_point;
576
        if ($code_point <= 0x7F) {
577
            /**
578
             * @psalm-suppress PossiblyNullArrayAccess
579
             */
580
            $chr = self::$CHR[$code_point];
581
        } elseif ($code_point <= 0x7FF) {
582
            /**
583
             * @psalm-suppress PossiblyNullArrayAccess
584
             */
585
            $chr = self::$CHR[($code_point >> 6) + 0xC0] .
586
                   self::$CHR[($code_point & 0x3F) + 0x80];
587
        } elseif ($code_point <= 0xFFFF) {
588
            /**
589
             * @psalm-suppress PossiblyNullArrayAccess
590
             */
591
            $chr = self::$CHR[($code_point >> 12) + 0xE0] .
592
                   self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
593
                   self::$CHR[($code_point & 0x3F) + 0x80];
594
        } else {
595
            /**
596
             * @psalm-suppress PossiblyNullArrayAccess
597
             */
598
            $chr = self::$CHR[($code_point >> 18) + 0xF0] .
599
                   self::$CHR[(($code_point >> 12) & 0x3F) + 0x80] .
600
                   self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
601
                   self::$CHR[($code_point & 0x3F) + 0x80];
602
        }
603
604
        if ($encoding !== 'UTF-8') {
605
            $chr = self::encode($encoding, $chr);
606
        }
607
608
        return $CHAR_CACHE[$cache_key] = $chr;
609
    }
610
611
    /**
612
     * Applies callback to all characters of a string.
613
     *
614
     * @param callable $callback <p>The callback function.</p>
615
     * @param string   $str      <p>UTF-8 string to run callback on.</p>
616
     *
617
     * @return string[]
618
     *                  <p>The outcome of the callback, as array.</p>
619
     */
620 2
    public static function chr_map($callback, string $str): array
621
    {
622 2
        return \array_map(
623 2
            $callback,
624 2
            self::str_split($str)
625
        );
626
    }
627
628
    /**
629
     * Generates an array of byte length of each character of a Unicode string.
630
     *
631
     * 1 byte => U+0000  - U+007F
632
     * 2 byte => U+0080  - U+07FF
633
     * 3 byte => U+0800  - U+FFFF
634
     * 4 byte => U+10000 - U+10FFFF
635
     *
636
     * @param string $str <p>The original unicode string.</p>
637
     *
638
     * @return int[] an array of byte lengths of each character
639
     */
640 4
    public static function chr_size_list(string $str): array
641
    {
642 4
        if ($str === '') {
643 4
            return [];
644
        }
645
646 4
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
647
            return \array_map(
648
                static function (string $data): int {
649
                    // "mb_" is available if overload is used, so use it ...
650
                    return \mb_strlen($data, 'CP850'); // 8-BIT
651
                },
652
                self::str_split($str)
653
            );
654
        }
655
656 4
        return \array_map('\strlen', self::str_split($str));
657
    }
658
659
    /**
660
     * Get a decimal code representation of a specific character.
661
     *
662
     * @param string $char <p>The input character.</p>
663
     *
664
     * @return int
665
     */
666 4
    public static function chr_to_decimal(string $char): int
667
    {
668 4
        if (self::$SUPPORT['iconv'] === true) {
669 4
            $chr_tmp = \iconv('UTF-8', 'UCS-4LE', $char);
670 4
            if ($chr_tmp !== false) {
671
                /** @noinspection OffsetOperationsInspection */
672 4
                return \unpack('V', $chr_tmp)[1];
673
            }
674
        }
675
676
        $code = self::ord($char[0]);
677
        $bytes = 1;
678
679
        if (!($code & 0x80)) {
680
            // 0xxxxxxx
681
            return $code;
682
        }
683
684
        if (($code & 0xe0) === 0xc0) {
685
            // 110xxxxx
686
            $bytes = 2;
687
            $code &= ~0xc0;
688
        } elseif (($code & 0xf0) === 0xe0) {
689
            // 1110xxxx
690
            $bytes = 3;
691
            $code &= ~0xe0;
692
        } elseif (($code & 0xf8) === 0xf0) {
693
            // 11110xxx
694
            $bytes = 4;
695
            $code &= ~0xf0;
696
        }
697
698
        for ($i = 2; $i <= $bytes; ++$i) {
699
            // 10xxxxxx
700
            $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
701
        }
702
703
        return $code;
704
    }
705
706
    /**
707
     * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
708
     *
709
     * @param int|string $char   <p>The input character</p>
710
     * @param string     $prefix [optional]
711
     *
712
     * @return string The code point encoded as U+xxxx
713
     */
714 2
    public static function chr_to_hex($char, string $prefix = 'U+'): string
715
    {
716 2
        if ($char === '') {
717 2
            return '';
718
        }
719
720 2
        if ($char === '&#0;') {
721 2
            $char = '';
722
        }
723
724 2
        return self::int_to_hex(self::ord((string) $char), $prefix);
725
    }
726
727
    /**
728
     * alias for "UTF8::chr_to_decimal()"
729
     *
730
     * @param string $chr
731
     *
732
     * @return int
733
     *
734
     * @see UTF8::chr_to_decimal()
735
     * @deprecated <p>please use "UTF8::chr_to_decimal()"</p>
736
     */
737 2
    public static function chr_to_int(string $chr): int
738
    {
739 2
        return self::chr_to_decimal($chr);
740
    }
741
742
    /**
743
     * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
744
     *
745
     * @param string $body         <p>The original string to be split.</p>
746
     * @param int    $chunk_length [optional] <p>The maximum character length of a chunk.</p>
747
     * @param string $end          [optional] <p>The character(s) to be inserted at the end of each chunk.</p>
748
     *
749
     * @return string the chunked string
750
     */
751 4
    public static function chunk_split(string $body, int $chunk_length = 76, string $end = "\r\n"): string
752
    {
753 4
        return \implode($end, self::str_split($body, $chunk_length));
754
    }
755
756
    /**
757
     * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
758
     *
759
     * @param string $str                                     <p>The string to be sanitized.</p>
760
     * @param bool   $remove_bom                              [optional] <p>Set to true, if you need to remove UTF-BOM.</p>
761
     * @param bool   $normalize_whitespace                    [optional] <p>Set to true, if you need to normalize the
762
     *                                                        whitespace.</p>
763
     * @param bool   $normalize_msword                        [optional] <p>Set to true, if you need to normalize MS Word chars
764
     *                                                        e.g.: "…"
765
     *                                                        => "..."</p>
766
     * @param bool   $keep_non_breaking_space                 [optional] <p>Set to true, to keep non-breaking-spaces, in
767
     *                                                        combination with
768
     *                                                        $normalize_whitespace</p>
769
     * @param bool   $replace_diamond_question_mark           [optional] <p>Set to true, if you need to remove diamond question
770
     *                                                        mark e.g.: "�"</p>
771
     * @param bool   $remove_invisible_characters             [optional] <p>Set to false, if you not want to remove invisible
772
     *                                                        characters e.g.: "\0"</p>
773
     * @param bool   $remove_invisible_characters_url_encoded [optional] <p>Set to true, if you not want to remove invisible
774
     *                                                        url encoded characters e.g.: "%0B"<br>
775
     *                                                        WARNING: maybe contains false-positives e.g. aa%0Baa -> aaaa.
776
     *                                                        </p>
777
     *
778
     * @return string clean UTF-8 encoded string
779
     */
780 87
    public static function clean(
781
        string $str,
782
        bool $remove_bom = false,
783
        bool $normalize_whitespace = false,
784
        bool $normalize_msword = false,
785
        bool $keep_non_breaking_space = false,
786
        bool $replace_diamond_question_mark = false,
787
        bool $remove_invisible_characters = true,
788
        bool $remove_invisible_characters_url_encoded = false
789
    ): string {
790
        // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
791
        // caused connection reset problem on larger strings
792
793 87
        $regex = '/
794
          (
795
            (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
796
            |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
797
            |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
798
            |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
799
            ){1,100}                      # ...one or more times
800
          )
801
        | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
802
        | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
803
        /x';
804
        /** @noinspection NotOptimalRegularExpressionsInspection */
805 87
        $str = (string) \preg_replace($regex, '$1', $str);
806
807 87
        if ($replace_diamond_question_mark === true) {
808 33
            $str = self::replace_diamond_question_mark($str, '');
809
        }
810
811 87
        if ($remove_invisible_characters === true) {
812 87
            $str = self::remove_invisible_characters($str, $remove_invisible_characters_url_encoded);
813
        }
814
815 87
        if ($normalize_whitespace === true) {
816 37
            $str = self::normalize_whitespace($str, $keep_non_breaking_space);
817
        }
818
819 87
        if ($normalize_msword === true) {
820 4
            $str = self::normalize_msword($str);
821
        }
822
823 87
        if ($remove_bom === true) {
824 37
            $str = self::remove_bom($str);
825
        }
826
827 87
        return $str;
828
    }
829
830
    /**
831
     * Clean-up a string and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
832
     *
833
     * @param string $str <p>The input string.</p>
834
     *
835
     * @return string
836
     */
837 33
    public static function cleanup($str): string
838
    {
839
        // init
840 33
        $str = (string) $str;
841
842 33
        if ($str === '') {
843 5
            return '';
844
        }
845
846
        // fixed ISO <-> UTF-8 Errors
847 33
        $str = self::fix_simple_utf8($str);
848
849
        // remove all none UTF-8 symbols
850
        // && remove diamond question mark (�)
851
        // && remove remove invisible characters (e.g. "\0")
852
        // && remove BOM
853
        // && normalize whitespace chars (but keep non-breaking-spaces)
854 33
        return self::clean(
855 33
            $str,
856 33
            true,
857 33
            true,
858 33
            false,
859 33
            true,
860 33
            true,
861 33
            true
862
        );
863
    }
864
865
    /**
866
     * Accepts a string or a array of strings and returns an array of Unicode code points.
867
     *
868
     * INFO: opposite to UTF8::string()
869
     *
870
     * @param string|string[] $arg     <p>A UTF-8 encoded string or an array of such strings.</p>
871
     * @param bool            $u_style <p>If True, will return code points in U+xxxx format,
872
     *                                 default, code points will be returned as integers.</p>
873
     *
874
     * @return array<int|string>
875
     *                           The array of code points:<br>
876
     *                           array<int> for $u_style === false<br>
877
     *                           array<string> for $u_style === true<br>
878
     */
879 12
    public static function codepoints($arg, bool $u_style = false): array
880
    {
881 12
        if (\is_string($arg) === true) {
882 12
            $arg = self::str_split($arg);
883
        }
884
885
        /**
886
         * @psalm-suppress DocblockTypeContradiction
887
         */
888 12
        if (!\is_array($arg)) {
0 ignored issues
show
introduced by
The condition is_array($arg) is always true.
Loading history...
889 4
            return [];
890
        }
891
892 12
        if ($arg === []) {
893 7
            return [];
894
        }
895
896 11
        $arg = \array_map(
897
            [
898 11
                self::class,
899
                'ord',
900
            ],
901 11
            $arg
902
        );
903
904 11
        if ($u_style === true) {
905 2
            $arg = \array_map(
906
                [
907 2
                    self::class,
908
                    'int_to_hex',
909
                ],
910 2
                $arg
911
            );
912
        }
913
914 11
        return $arg;
915
    }
916
917
    /**
918
     * Trims the string and replaces consecutive whitespace characters with a
919
     * single space. This includes tabs and newline characters, as well as
920
     * multibyte whitespace such as the thin space and ideographic space.
921
     *
922
     * @param string $str <p>The input string.</p>
923
     *
924
     * @return string string with a trimmed $str and condensed whitespace
925
     */
926 13
    public static function collapse_whitespace(string $str): string
927
    {
928 13
        if (self::$SUPPORT['mbstring'] === true) {
929
            /** @noinspection PhpComposerExtensionStubsInspection */
930 13
            return \trim((string) \mb_ereg_replace('[[:space:]]+', ' ', $str));
931
        }
932
933
        return \trim(self::regex_replace($str, '[[:space:]]+', ' '));
934
    }
935
936
    /**
937
     * Returns count of characters used in a string.
938
     *
939
     * @param string $str                     <p>The input string.</p>
940
     * @param bool   $clean_utf8              [optional] <p>Remove non UTF-8 chars from the string.</p>
941
     * @param bool   $try_to_use_mb_functions [optional] <p>Set to false, if you don't want to use
942
     *
943
     * @return int[] an associative array of Character as keys and
944
     *               their count as values
945
     */
946 19
    public static function count_chars(
947
        string $str,
948
        bool $clean_utf8 = false,
949
        bool $try_to_use_mb_functions = true
950
    ): array {
951 19
        return \array_count_values(
952 19
            self::str_split(
953 19
                $str,
954 19
                1,
955 19
                $clean_utf8,
956 19
                $try_to_use_mb_functions
957
            )
958
        );
959
    }
960
961
    /**
962
     * Remove css media-queries.
963
     *
964
     * @param string $str
965
     *
966
     * @return string
967
     */
968 1
    public static function css_stripe_media_queries(string $str): string
969
    {
970 1
        return (string) \preg_replace(
971 1
            '#@media\\s+(?:only\\s)?(?:[\\s{(]|screen|all)\\s?[^{]+{.*}\\s*}\\s*#isumU',
972 1
            '',
973 1
            $str
974
        );
975
    }
976
977
    /**
978
     * Checks whether ctype is available on the server.
979
     *
980
     * @return bool
981
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
982
     */
983
    public static function ctype_loaded(): bool
984
    {
985
        return \extension_loaded('ctype');
986
    }
987
988
    /**
989
     * Converts an int value into a UTF-8 character.
990
     *
991
     * @param mixed $int
992
     *
993
     * @return string
994
     */
995 20
    public static function decimal_to_chr($int): string
996
    {
997 20
        return self::html_entity_decode('&#' . $int . ';', \ENT_QUOTES | \ENT_HTML5);
998
    }
999
1000
    /**
1001
     * Decodes a MIME header field
1002
     *
1003
     * @param string $str
1004
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
1005
     *
1006
     * @return false|string
1007
     *                      A decoded MIME field on success,
1008
     *                      or false if an error occurs during the decoding
1009
     */
1010
    public static function decode_mimeheader($str, string $encoding = 'UTF-8')
1011
    {
1012
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
1013
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
1014
        }
1015
1016
        if (self::$SUPPORT['iconv'] === true) {
1017
            return \iconv_mime_decode($str, \ICONV_MIME_DECODE_CONTINUE_ON_ERROR, $encoding);
1018
        }
1019
1020
        if ($encoding !== 'UTF-8') {
1021
            $str = self::encode($encoding, $str);
1022
        }
1023
1024
        return \mb_decode_mimeheader($str);
1025
    }
1026
1027
    /**
1028
     * Decodes a string which was encoded by "UTF8::emoji_encode()".
1029
     *
1030
     * @param string $str                            <p>The input string.</p>
1031
     * @param bool   $use_reversible_string_mappings [optional] <p>
1032
     *                                               When <b>TRUE</b>, we se a reversible string mapping
1033
     *                                               between "emoji_encode" and "emoji_decode".</p>
1034
     *
1035
     * @return string
1036
     */
1037 9
    public static function emoji_decode(
1038
        string $str,
1039
        bool $use_reversible_string_mappings = false
1040
    ): string {
1041 9
        self::initEmojiData();
1042
1043 9
        if ($use_reversible_string_mappings === true) {
1044 9
            return (string) \str_replace(
1045 9
                (array) self::$EMOJI_KEYS_REVERSIBLE_CACHE,
1046 9
                (array) self::$EMOJI_VALUES_CACHE,
1047 9
                $str
1048
            );
1049
        }
1050
1051 1
        return (string) \str_replace(
1052 1
            (array) self::$EMOJI_KEYS_CACHE,
1053 1
            (array) self::$EMOJI_VALUES_CACHE,
1054 1
            $str
1055
        );
1056
    }
1057
1058
    /**
1059
     * Encode a string with emoji chars into a non-emoji string.
1060
     *
1061
     * @param string $str                            <p>The input string</p>
1062
     * @param bool   $use_reversible_string_mappings [optional] <p>
1063
     *                                               when <b>TRUE</b>, we se a reversible string mapping
1064
     *                                               between "emoji_encode" and "emoji_decode"</p>
1065
     *
1066
     * @return string
1067
     */
1068 12
    public static function emoji_encode(
1069
        string $str,
1070
        bool $use_reversible_string_mappings = false
1071
    ): string {
1072 12
        self::initEmojiData();
1073
1074 12
        if ($use_reversible_string_mappings === true) {
1075 9
            return (string) \str_replace(
1076 9
                (array) self::$EMOJI_VALUES_CACHE,
1077 9
                (array) self::$EMOJI_KEYS_REVERSIBLE_CACHE,
1078 9
                $str
1079
            );
1080
        }
1081
1082 4
        return (string) \str_replace(
1083 4
            (array) self::$EMOJI_VALUES_CACHE,
1084 4
            (array) self::$EMOJI_KEYS_CACHE,
1085 4
            $str
1086
        );
1087
    }
1088
1089
    /**
1090
     * Encode a string with a new charset-encoding.
1091
     *
1092
     * INFO:  This function will also try to fix broken / double encoding,
1093
     *        so you can call this function also on a UTF-8 string and you don't mess up the string.
1094
     *
1095
     * @param string $to_encoding                   <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.</p>
1096
     * @param string $str                           <p>The input string</p>
1097
     * @param bool   $auto_detect_the_from_encoding [optional] <p>Force the new encoding (we try to fix broken / double
1098
     *                                              encoding for UTF-8)<br> otherwise we auto-detect the current
1099
     *                                              string-encoding</p>
1100
     * @param string $from_encoding                 [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
1101
     *                                              A empty string will trigger the autodetect anyway.</p>
1102
     *
1103
     * @return string
1104
     *
1105
     * @psalm-suppress InvalidReturnStatement
1106
     */
1107 28
    public static function encode(
1108
        string $to_encoding,
1109
        string $str,
1110
        bool $auto_detect_the_from_encoding = true,
1111
        string $from_encoding = ''
1112
    ): string {
1113 28
        if ($str === '' || $to_encoding === '') {
1114 13
            return $str;
1115
        }
1116
1117 28
        if ($to_encoding !== 'UTF-8' && $to_encoding !== 'CP850') {
1118 7
            $to_encoding = self::normalize_encoding($to_encoding, 'UTF-8');
1119
        }
1120
1121 28
        if ($from_encoding && $from_encoding !== 'UTF-8' && $from_encoding !== 'CP850') {
1122 2
            $from_encoding = self::normalize_encoding($from_encoding, null);
1123
        }
1124
1125
        if (
1126 28
            $to_encoding
1127
            &&
1128 28
            $from_encoding
1129
            &&
1130 28
            $from_encoding === $to_encoding
1131
        ) {
1132
            return $str;
1133
        }
1134
1135 28
        if ($to_encoding === 'JSON') {
1136 1
            $return = self::json_encode($str);
1137 1
            if ($return === false) {
1138
                throw new \InvalidArgumentException('The input string [' . $str . '] can not be used for json_encode().');
1139
            }
1140
1141 1
            return $return;
1142
        }
1143 28
        if ($from_encoding === 'JSON') {
1144 1
            $str = self::json_decode($str);
1145 1
            $from_encoding = '';
1146
        }
1147
1148 28
        if ($to_encoding === 'BASE64') {
1149 2
            return \base64_encode($str);
1150
        }
1151 28
        if ($from_encoding === 'BASE64') {
1152 2
            $str = \base64_decode($str, true);
1153 2
            $from_encoding = '';
1154
        }
1155
1156 28
        if ($to_encoding === 'HTML-ENTITIES') {
1157 2
            return self::html_encode($str, true, 'UTF-8');
1158
        }
1159 28
        if ($from_encoding === 'HTML-ENTITIES') {
1160 2
            $str = self::html_entity_decode($str, \ENT_COMPAT, 'UTF-8');
1161 2
            $from_encoding = '';
1162
        }
1163
1164 28
        $from_encoding_auto_detected = false;
1165
        if (
1166 28
            $auto_detect_the_from_encoding === true
1167
            ||
1168 28
            !$from_encoding
1169
        ) {
1170 28
            $from_encoding_auto_detected = self::str_detect_encoding($str);
1171
        }
1172
1173
        // DEBUG
1174
        //var_dump($to_encoding, $from_encoding, $from_encoding_auto_detected, $str, "\n\n");
1175
1176 28
        if ($from_encoding_auto_detected !== false) {
1177
            /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
1178 24
            $from_encoding = $from_encoding_auto_detected;
1179 7
        } elseif ($auto_detect_the_from_encoding === true) {
1180
            // fallback for the "autodetect"-mode
1181 7
            return self::to_utf8($str);
1182
        }
1183
1184
        if (
1185 24
            !$from_encoding
1186
            ||
1187 24
            $from_encoding === $to_encoding
1188
        ) {
1189 15
            return $str;
1190
        }
1191
1192
        if (
1193 19
            $to_encoding === 'UTF-8'
1194
            &&
1195
            (
1196 17
                $from_encoding === 'WINDOWS-1252'
1197
                ||
1198 19
                $from_encoding === 'ISO-8859-1'
1199
            )
1200
        ) {
1201 13
            return self::to_utf8($str);
1202
        }
1203
1204
        if (
1205 12
            $to_encoding === 'ISO-8859-1'
1206
            &&
1207
            (
1208 6
                $from_encoding === 'WINDOWS-1252'
1209
                ||
1210 12
                $from_encoding === 'UTF-8'
1211
            )
1212
        ) {
1213 6
            return self::to_iso8859($str);
1214
        }
1215
1216
        if (
1217 10
            $to_encoding !== 'UTF-8'
1218
            &&
1219 10
            $to_encoding !== 'ISO-8859-1'
1220
            &&
1221 10
            $to_encoding !== 'WINDOWS-1252'
1222
            &&
1223 10
            self::$SUPPORT['mbstring'] === false
1224
        ) {
1225
            \trigger_error('UTF8::encode() without mbstring cannot handle "' . $to_encoding . '" encoding', \E_USER_WARNING);
1226
        }
1227
1228 10
        if (self::$SUPPORT['mbstring'] === true) {
1229
            // warning: do not use the symfony polyfill here
1230 10
            $str_encoded = \mb_convert_encoding(
1231 10
                $str,
1232 10
                $to_encoding,
1233 10
                $from_encoding
1234
            );
1235
1236 10
            if ($str_encoded) {
1237 10
                return $str_encoded;
1238
            }
1239
        }
1240
1241
        $return = \iconv($from_encoding, $to_encoding, $str);
1242
        if ($return !== false) {
1243
            return $return;
1244
        }
1245
1246
        return $str;
1247
    }
1248
1249
    /**
1250
     * @param string $str
1251
     * @param string $from_charset      [optional] <p>Set the input charset.</p>
1252
     * @param string $to_charset        [optional] <p>Set the output charset.</p>
1253
     * @param string $transfer_encoding [optional] <p>Set the transfer encoding.</p>
1254
     * @param string $linefeed          [optional] <p>Set the used linefeed.</p>
1255
     * @param int    $indent            [optional] <p>Set the max length indent.</p>
1256
     *
1257
     * @return false|string
1258
     *                      <p>An encoded MIME field on success,
1259
     *                      or false if an error occurs during the encoding.</p>
1260
     */
1261
    public static function encode_mimeheader(
1262
        $str,
1263
        $from_charset = 'UTF-8',
1264
        $to_charset = 'UTF-8',
1265
        $transfer_encoding = 'Q',
1266
        $linefeed = '\\r\\n',
1267
        $indent = 76
1268
    ) {
1269
        if ($from_charset !== 'UTF-8' && $from_charset !== 'CP850') {
1270
            $from_charset = self::normalize_encoding($from_charset, 'UTF-8');
1271
        }
1272
1273
        if ($to_charset !== 'UTF-8' && $to_charset !== 'CP850') {
1274
            $to_charset = self::normalize_encoding($to_charset, 'UTF-8');
1275
        }
1276
1277
        return \iconv_mime_encode(
1278
            '',
1279
            $str,
1280
            [
1281
                'scheme'           => $transfer_encoding,
1282
                'line-length'      => $indent,
1283
                'input-charset'    => $from_charset,
1284
                'output-charset'   => $to_charset,
1285
                'line-break-chars' => $linefeed,
1286
            ]
1287
        );
1288
    }
1289
1290
    /**
1291
     * Create an extract from a sentence, so if the search-string was found, it try to centered in the output.
1292
     *
1293
     * @param string   $str                       <p>The input string.</p>
1294
     * @param string   $search                    <p>The searched string.</p>
1295
     * @param int|null $length                    [optional] <p>Default: null === text->length / 2</p>
1296
     * @param string   $replacer_for_skipped_text [optional] <p>Default: …</p>
1297
     * @param string   $encoding                  [optional] <p>Set the charset for e.g. "mb_" function</p>
1298
     *
1299
     * @return string
1300
     */
1301 1
    public static function extract_text(
1302
        string $str,
1303
        string $search = '',
1304
        int $length = null,
1305
        string $replacer_for_skipped_text = '…',
1306
        string $encoding = 'UTF-8'
1307
    ): string {
1308 1
        if ($str === '') {
1309 1
            return '';
1310
        }
1311
1312 1
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
1313
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
1314
        }
1315
1316 1
        $trim_chars = "\t\r\n -_()!~?=+/*\\,.:;\"'[]{}`&";
1317
1318 1
        if ($length === null) {
1319 1
            $length = (int) \round((int) self::strlen($str, $encoding) / 2, 0);
1320
        }
1321
1322 1
        if ($search === '') {
1323 1
            if ($encoding === 'UTF-8') {
1324 1
                if ($length > 0) {
1325 1
                    $string_length = (int) \mb_strlen($str);
1326 1
                    $end = ($length - 1) > $string_length ? $string_length : ($length - 1);
1327
                } else {
1328 1
                    $end = 0;
1329
                }
1330
1331 1
                $pos = (int) \min(
1332 1
                    \mb_strpos($str, ' ', $end),
1333 1
                    \mb_strpos($str, '.', $end)
1334
                );
1335
            } else {
1336
                if ($length > 0) {
1337
                    $string_length = (int) self::strlen($str, $encoding);
1338
                    $end = ($length - 1) > $string_length ? $string_length : ($length - 1);
1339
                } else {
1340
                    $end = 0;
1341
                }
1342
1343
                $pos = (int) \min(
1344
                    self::strpos($str, ' ', $end, $encoding),
1345
                    self::strpos($str, '.', $end, $encoding)
1346
                );
1347
            }
1348
1349 1
            if ($pos) {
1350 1
                if ($encoding === 'UTF-8') {
1351 1
                    $str_sub = \mb_substr($str, 0, $pos);
1352
                } else {
1353
                    $str_sub = self::substr($str, 0, $pos, $encoding);
1354
                }
1355
1356 1
                if ($str_sub === false) {
1357
                    return '';
1358
                }
1359
1360 1
                return \rtrim($str_sub, $trim_chars) . $replacer_for_skipped_text;
1361
            }
1362
1363
            return $str;
1364
        }
1365
1366 1
        if ($encoding === 'UTF-8') {
1367 1
            $word_position = (int) \mb_stripos($str, $search);
1368 1
            $half_side = (int) ($word_position - $length / 2 + (int) \mb_strlen($search) / 2);
1369
        } else {
1370
            $word_position = (int) self::stripos($str, $search, 0, $encoding);
1371
            $half_side = (int) ($word_position - $length / 2 + (int) self::strlen($search, $encoding) / 2);
1372
        }
1373
1374 1
        $pos_start = 0;
1375 1
        if ($half_side > 0) {
1376 1
            if ($encoding === 'UTF-8') {
1377 1
                $half_text = \mb_substr($str, 0, $half_side);
1378
            } else {
1379
                $half_text = self::substr($str, 0, $half_side, $encoding);
1380
            }
1381 1
            if ($half_text !== false) {
1382 1
                if ($encoding === 'UTF-8') {
1383 1
                    $pos_start = (int) \max(
1384 1
                        \mb_strrpos($half_text, ' '),
1385 1
                        \mb_strrpos($half_text, '.')
1386
                    );
1387
                } else {
1388
                    $pos_start = (int) \max(
1389
                        self::strrpos($half_text, ' ', 0, $encoding),
1390
                        self::strrpos($half_text, '.', 0, $encoding)
1391
                    );
1392
                }
1393
            }
1394
        }
1395
1396 1
        if ($word_position && $half_side > 0) {
1397 1
            $offset = $pos_start + $length - 1;
1398 1
            $real_length = (int) self::strlen($str, $encoding);
1399
1400 1
            if ($offset > $real_length) {
1401
                $offset = $real_length;
1402
            }
1403
1404 1
            if ($encoding === 'UTF-8') {
1405 1
                $pos_end = (int) \min(
1406 1
                    \mb_strpos($str, ' ', $offset),
1407 1
                    \mb_strpos($str, '.', $offset)
1408 1
                ) - $pos_start;
1409
            } else {
1410
                $pos_end = (int) \min(
1411
                    self::strpos($str, ' ', $offset, $encoding),
1412
                    self::strpos($str, '.', $offset, $encoding)
1413
                ) - $pos_start;
1414
            }
1415
1416 1
            if (!$pos_end || $pos_end <= 0) {
1417 1
                if ($encoding === 'UTF-8') {
1418 1
                    $str_sub = \mb_substr($str, $pos_start, (int) \mb_strlen($str));
1419
                } else {
1420
                    $str_sub = self::substr($str, $pos_start, (int) self::strlen($str, $encoding), $encoding);
1421
                }
1422 1
                if ($str_sub !== false) {
1423 1
                    $extract = $replacer_for_skipped_text . \ltrim($str_sub, $trim_chars);
1424
                } else {
1425 1
                    $extract = '';
1426
                }
1427
            } else {
1428 1
                if ($encoding === 'UTF-8') {
1429 1
                    $str_sub = \mb_substr($str, $pos_start, $pos_end);
1430
                } else {
1431
                    $str_sub = self::substr($str, $pos_start, $pos_end, $encoding);
1432
                }
1433 1
                if ($str_sub !== false) {
1434 1
                    $extract = $replacer_for_skipped_text . \trim($str_sub, $trim_chars) . $replacer_for_skipped_text;
1435
                } else {
1436 1
                    $extract = '';
1437
                }
1438
            }
1439
        } else {
1440 1
            $offset = $length - 1;
1441 1
            $true_length = (int) self::strlen($str, $encoding);
1442
1443 1
            if ($offset > $true_length) {
1444
                $offset = $true_length;
1445
            }
1446
1447 1
            if ($encoding === 'UTF-8') {
1448 1
                $pos_end = (int) \min(
1449 1
                    \mb_strpos($str, ' ', $offset),
1450 1
                    \mb_strpos($str, '.', $offset)
1451
                );
1452
            } else {
1453
                $pos_end = (int) \min(
1454
                    self::strpos($str, ' ', $offset, $encoding),
1455
                    self::strpos($str, '.', $offset, $encoding)
1456
                );
1457
            }
1458
1459 1
            if ($pos_end) {
1460 1
                if ($encoding === 'UTF-8') {
1461 1
                    $str_sub = \mb_substr($str, 0, $pos_end);
1462
                } else {
1463
                    $str_sub = self::substr($str, 0, $pos_end, $encoding);
1464
                }
1465 1
                if ($str_sub !== false) {
1466 1
                    $extract = \rtrim($str_sub, $trim_chars) . $replacer_for_skipped_text;
1467
                } else {
1468 1
                    $extract = '';
1469
                }
1470
            } else {
1471 1
                $extract = $str;
1472
            }
1473
        }
1474
1475 1
        return $extract;
1476
    }
1477
1478
    /**
1479
     * Reads entire file into a string.
1480
     *
1481
     * WARNING: Do not use UTF-8 Option ($convert_to_utf8) for binary files (e.g.: images) !!!
1482
     *
1483
     * @see http://php.net/manual/en/function.file-get-contents.php
1484
     *
1485
     * @param string        $filename         <p>
1486
     *                                        Name of the file to read.
1487
     *                                        </p>
1488
     * @param bool          $use_include_path [optional] <p>
1489
     *                                        Prior to PHP 5, this parameter is called
1490
     *                                        use_include_path and is a bool.
1491
     *                                        As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
1492
     *                                        to trigger include path
1493
     *                                        search.
1494
     *                                        </p>
1495
     * @param resource|null $context          [optional] <p>
1496
     *                                        A valid context resource created with
1497
     *                                        stream_context_create. If you don't need to use a
1498
     *                                        custom context, you can skip this parameter by &null;.
1499
     *                                        </p>
1500
     * @param int|null      $offset           [optional] <p>
1501
     *                                        The offset where the reading starts.
1502
     *                                        </p>
1503
     * @param int|null      $max_length       [optional] <p>
1504
     *                                        Maximum length of data read. The default is to read until end
1505
     *                                        of file is reached.
1506
     *                                        </p>
1507
     * @param int           $timeout          <p>The time in seconds for the timeout.</p>
1508
     * @param bool          $convert_to_utf8  <strong>WARNING!!!</strong> <p>Maybe you can't use this option for
1509
     *                                        some files, because they used non default utf-8 chars. Binary files
1510
     *                                        like images or pdf will not be converted.</p>
1511
     * @param string        $from_encoding    [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
1512
     *                                        A empty string will trigger the autodetect anyway.</p>
1513
     *
1514
     * @return false|string
1515
     *                      <p>The function returns the read data as string or <b>false</b> on failure.</p>
1516
     */
1517 12
    public static function file_get_contents(
1518
        string $filename,
1519
        bool $use_include_path = false,
1520
        $context = null,
1521
        int $offset = null,
1522
        int $max_length = null,
1523
        int $timeout = 10,
1524
        bool $convert_to_utf8 = true,
1525
        string $from_encoding = ''
1526
    ) {
1527
        // init
1528 12
        $filename = \filter_var($filename, \FILTER_SANITIZE_STRING);
1529
        /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
1530 12
        if ($filename === false) {
1531
            return false;
1532
        }
1533
1534 12
        if ($timeout && $context === null) {
1535 9
            $context = \stream_context_create(
1536
                [
1537
                    'http' => [
1538 9
                        'timeout' => $timeout,
1539
                    ],
1540
                ]
1541
            );
1542
        }
1543
1544 12
        if ($offset === null) {
1545 12
            $offset = 0;
1546
        }
1547
1548 12
        if (\is_int($max_length) === true) {
1549 2
            $data = \file_get_contents($filename, $use_include_path, $context, $offset, $max_length);
1550
        } else {
1551 12
            $data = \file_get_contents($filename, $use_include_path, $context, $offset);
1552
        }
1553
1554
        // return false on error
1555 12
        if ($data === false) {
1556
            return false;
1557
        }
1558
1559 12
        if ($convert_to_utf8 === true) {
1560
            if (
1561 12
                self::is_binary($data, true) !== true
1562
                ||
1563 9
                self::is_utf16($data, false) !== false
1564
                ||
1565 12
                self::is_utf32($data, false) !== false
1566
            ) {
1567 9
                $data = self::encode('UTF-8', $data, false, $from_encoding);
1568 9
                $data = self::cleanup($data);
1569
            }
1570
        }
1571
1572 12
        return $data;
1573
    }
1574
1575
    /**
1576
     * Checks if a file starts with BOM (Byte Order Mark) character.
1577
     *
1578
     * @param string $file_path <p>Path to a valid file.</p>
1579
     *
1580
     * @throws \RuntimeException if file_get_contents() returned false
1581
     *
1582
     * @return bool
1583
     *              <p><strong>true</strong> if the file has BOM at the start, <strong>false</strong> otherwise</p>
1584
     */
1585 2
    public static function file_has_bom(string $file_path): bool
1586
    {
1587 2
        $file_content = \file_get_contents($file_path);
1588 2
        if ($file_content === false) {
1589
            throw new \RuntimeException('file_get_contents() returned false for:' . $file_path);
1590
        }
1591
1592 2
        return self::string_has_bom($file_content);
1593
    }
1594
1595
    /**
1596
     * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1597
     *
1598
     * @param mixed  $var
1599
     * @param int    $normalization_form
1600
     * @param string $leading_combining
1601
     *
1602
     * @return mixed
1603
     */
1604 62
    public static function filter(
1605
        $var,
1606
        int $normalization_form = \Normalizer::NFC,
1607
        string $leading_combining = '◌'
1608
    ) {
1609 62
        switch (\gettype($var)) {
1610 62
            case 'array':
1611
                /** @noinspection ForeachSourceInspection */
1612 6
                foreach ($var as $k => &$v) {
1613 6
                    $v = self::filter($v, $normalization_form, $leading_combining);
1614
                }
1615 6
                unset($v);
1616
1617 6
                break;
1618 62
            case 'object':
1619
                /** @noinspection ForeachSourceInspection */
1620 4
                foreach ($var as $k => &$v) {
1621 4
                    $v = self::filter($v, $normalization_form, $leading_combining);
1622
                }
1623 4
                unset($v);
1624
1625 4
                break;
1626 62
            case 'string':
1627
1628 62
                if (\strpos($var, "\r") !== false) {
1629
                    // Workaround https://bugs.php.net/65732
1630 3
                    $var = self::normalize_line_ending($var);
1631
                }
1632
1633 62
                if (ASCII::is_ascii($var) === false) {
1634 32
                    if (\Normalizer::isNormalized($var, $normalization_form)) {
1635 27
                        $n = '-';
1636
                    } else {
1637 12
                        $n = \Normalizer::normalize($var, $normalization_form);
1638
1639 12
                        if (isset($n[0])) {
1640 7
                            $var = $n;
1641
                        } else {
1642 8
                            $var = self::encode('UTF-8', $var, true);
1643
                        }
1644
                    }
1645
1646
                    if (
1647 32
                        $var[0] >= "\x80"
1648
                        &&
1649 32
                        isset($n[0], $leading_combining[0])
1650
                        &&
1651 32
                        \preg_match('/^\\p{Mn}/u', $var)
1652
                    ) {
1653
                        // Prevent leading combining chars
1654
                        // for NFC-safe concatenations.
1655 3
                        $var = $leading_combining . $var;
1656
                    }
1657
                }
1658
1659 62
                break;
1660
        }
1661
1662 62
        return $var;
1663
    }
1664
1665
    /**
1666
     * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1667
     *
1668
     * Gets a specific external variable by name and optionally filters it
1669
     *
1670
     * @see http://php.net/manual/en/function.filter-input.php
1671
     *
1672
     * @param int    $type          <p>
1673
     *                              One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1674
     *                              <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1675
     *                              <b>INPUT_ENV</b>.
1676
     *                              </p>
1677
     * @param string $variable_name <p>
1678
     *                              Name of a variable to get.
1679
     *                              </p>
1680
     * @param int    $filter        [optional] <p>
1681
     *                              The ID of the filter to apply. The
1682
     *                              manual page lists the available filters.
1683
     *                              </p>
1684
     * @param mixed  $options       [optional] <p>
1685
     *                              Associative array of options or bitwise disjunction of flags. If filter
1686
     *                              accepts options, flags can be provided in "flags" field of array.
1687
     *                              </p>
1688
     *
1689
     * @return mixed Value of the requested variable on success, <b>FALSE</b> if the filter fails, or <b>NULL</b> if the
1690
     *               <i>variable_name</i> variable is not set. If the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it
1691
     *               returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter fails.
1692
     */
1693
    public static function filter_input(
1694
        int $type,
1695
        string $variable_name,
1696
        int $filter = \FILTER_DEFAULT,
1697
        $options = null
1698
    ) {
1699
        if ($options === null || \func_num_args() < 4) {
1700
            $var = \filter_input($type, $variable_name, $filter);
1701
        } else {
1702
            $var = \filter_input($type, $variable_name, $filter, $options);
1703
        }
1704
1705
        return self::filter($var);
1706
    }
1707
1708
    /**
1709
     * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1710
     *
1711
     * Gets external variables and optionally filters them
1712
     *
1713
     * @see http://php.net/manual/en/function.filter-input-array.php
1714
     *
1715
     * @param int   $type       <p>
1716
     *                          One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1717
     *                          <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1718
     *                          <b>INPUT_ENV</b>.
1719
     *                          </p>
1720
     * @param mixed $definition [optional] <p>
1721
     *                          An array defining the arguments. A valid key is a string
1722
     *                          containing a variable name and a valid value is either a filter type, or an array
1723
     *                          optionally specifying the filter, flags and options. If the value is an
1724
     *                          array, valid keys are filter which specifies the
1725
     *                          filter type,
1726
     *                          flags which specifies any flags that apply to the
1727
     *                          filter, and options which specifies any options that
1728
     *                          apply to the filter. See the example below for a better understanding.
1729
     *                          </p>
1730
     *                          <p>
1731
     *                          This parameter can be also an integer holding a filter constant. Then all values in the
1732
     *                          input array are filtered by this filter.
1733
     *                          </p>
1734
     * @param bool  $add_empty  [optional] <p>
1735
     *                          Add missing keys as <b>NULL</b> to the return value.
1736
     *                          </p>
1737
     *
1738
     * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b> on failure.
1739
     *               An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not
1740
     *               set. Or if the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it returns <b>FALSE</b> if the variable
1741
     *               is not set and <b>NULL</b> if the filter fails.
1742
     */
1743
    public static function filter_input_array(
1744
        int $type,
1745
        $definition = null,
1746
        bool $add_empty = true
1747
    ) {
1748
        if ($definition === null || \func_num_args() < 2) {
1749
            $a = \filter_input_array($type);
1750
        } else {
1751
            $a = \filter_input_array($type, $definition, $add_empty);
1752
        }
1753
1754
        return self::filter($a);
1755
    }
1756
1757
    /**
1758
     * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1759
     *
1760
     * Filters a variable with a specified filter
1761
     *
1762
     * @see http://php.net/manual/en/function.filter-var.php
1763
     *
1764
     * @param mixed $variable <p>
1765
     *                        Value to filter.
1766
     *                        </p>
1767
     * @param int   $filter   [optional] <p>
1768
     *                        The ID of the filter to apply. The
1769
     *                        manual page lists the available filters.
1770
     *                        </p>
1771
     * @param mixed $options  [optional] <p>
1772
     *                        Associative array of options or bitwise disjunction of flags. If filter
1773
     *                        accepts options, flags can be provided in "flags" field of array. For
1774
     *                        the "callback" filter, callable type should be passed. The
1775
     *                        callback must accept one argument, the value to be filtered, and return
1776
     *                        the value after filtering/sanitizing it.
1777
     *                        </p>
1778
     *                        <p>
1779
     *                        <code>
1780
     *                        // for filters that accept options, use this format
1781
     *                        $options = array(
1782
     *                        'options' => array(
1783
     *                        'default' => 3, // value to return if the filter fails
1784
     *                        // other options here
1785
     *                        'min_range' => 0
1786
     *                        ),
1787
     *                        'flags' => FILTER_FLAG_ALLOW_OCTAL,
1788
     *                        );
1789
     *                        $var = filter_var('0755', FILTER_VALIDATE_INT, $options);
1790
     *                        // for filter that only accept flags, you can pass them directly
1791
     *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);
1792
     *                        // for filter that only accept flags, you can also pass as an array
1793
     *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN,
1794
     *                        array('flags' => FILTER_NULL_ON_FAILURE));
1795
     *                        // callback validate filter
1796
     *                        function foo($value)
1797
     *                        {
1798
     *                        // Expected format: Surname, GivenNames
1799
     *                        if (strpos($value, ", ") === false) return false;
1800
     *                        list($surname, $givennames) = explode(", ", $value, 2);
1801
     *                        $empty = (empty($surname) || empty($givennames));
1802
     *                        $notstrings = (!is_string($surname) || !is_string($givennames));
1803
     *                        if ($empty || $notstrings) {
1804
     *                        return false;
1805
     *                        } else {
1806
     *                        return $value;
1807
     *                        }
1808
     *                        }
1809
     *                        $var = filter_var('Doe, Jane Sue', FILTER_CALLBACK, array('options' => 'foo'));
1810
     *                        </code>
1811
     *                        </p>
1812
     *
1813
     * @return mixed the filtered data, or <b>FALSE</b> if the filter fails
1814
     */
1815 2
    public static function filter_var(
1816
        $variable,
1817
        int $filter = \FILTER_DEFAULT,
1818
        $options = null
1819
    ) {
1820 2
        if (\func_num_args() < 3) {
1821 2
            $variable = \filter_var($variable, $filter);
1822
        } else {
1823 2
            $variable = \filter_var($variable, $filter, $options);
1824
        }
1825
1826 2
        return self::filter($variable);
1827
    }
1828
1829
    /**
1830
     * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1831
     *
1832
     * Gets multiple variables and optionally filters them
1833
     *
1834
     * @see http://php.net/manual/en/function.filter-var-array.php
1835
     *
1836
     * @param array<mixed> $data       <p>
1837
     *                                 An array with string keys containing the data to filter.
1838
     *                                 </p>
1839
     * @param mixed        $definition [optional] <p>
1840
     *                                 An array defining the arguments. A valid key is a string
1841
     *                                 containing a variable name and a valid value is either a
1842
     *                                 filter type, or an
1843
     *                                 array optionally specifying the filter, flags and options.
1844
     *                                 If the value is an array, valid keys are filter
1845
     *                                 which specifies the filter type,
1846
     *                                 flags which specifies any flags that apply to the
1847
     *                                 filter, and options which specifies any options that
1848
     *                                 apply to the filter. See the example below for a better understanding.
1849
     *                                 </p>
1850
     *                                 <p>
1851
     *                                 This parameter can be also an integer holding a filter constant. Then all values in the
1852
     *                                 input array are filtered by this filter.
1853
     *                                 </p>
1854
     * @param bool         $add_empty  [optional] <p>
1855
     *                                 Add missing keys as <b>NULL</b> to the return value.
1856
     *                                 </p>
1857
     *
1858
     * @return mixed an array containing the values of the requested variables on success, or <b>FALSE</b> on failure.
1859
     *               An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not
1860
     *               set
1861
     */
1862 2
    public static function filter_var_array(
1863
        array $data,
1864
        $definition = null,
1865
        bool $add_empty = true
1866
    ) {
1867 2
        if (\func_num_args() < 2) {
1868 2
            $a = \filter_var_array($data);
1869
        } else {
1870 2
            $a = \filter_var_array($data, $definition, $add_empty);
1871
        }
1872
1873 2
        return self::filter($a);
1874
    }
1875
1876
    /**
1877
     * Checks whether finfo is available on the server.
1878
     *
1879
     * @return bool
1880
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
1881
     */
1882
    public static function finfo_loaded(): bool
1883
    {
1884
        return \class_exists('finfo');
1885
    }
1886
1887
    /**
1888
     * Returns the first $n characters of the string.
1889
     *
1890
     * @param string $str      <p>The input string.</p>
1891
     * @param int    $n        <p>Number of characters to retrieve from the start.</p>
1892
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
1893
     *
1894
     * @return string
1895
     */
1896 13
    public static function first_char(
1897
        string $str,
1898
        int $n = 1,
1899
        string $encoding = 'UTF-8'
1900
    ): string {
1901 13
        if ($str === '' || $n <= 0) {
1902 5
            return '';
1903
        }
1904
1905 8
        if ($encoding === 'UTF-8') {
1906 4
            return (string) \mb_substr($str, 0, $n);
1907
        }
1908
1909 4
        return (string) self::substr($str, 0, $n, $encoding);
1910
    }
1911
1912
    /**
1913
     * Check if the number of Unicode characters isn't greater than the specified integer.
1914
     *
1915
     * @param string $str      the original string to be checked
1916
     * @param int    $box_size the size in number of chars to be checked against string
1917
     *
1918
     * @return bool true if string is less than or equal to $box_size, false otherwise
1919
     */
1920 2
    public static function fits_inside(string $str, int $box_size): bool
1921
    {
1922 2
        return (int) self::strlen($str) <= $box_size;
1923
    }
1924
1925
    /**
1926
     * Try to fix simple broken UTF-8 strings.
1927
     *
1928
     * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
1929
     *
1930
     * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
1931
     * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
1932
     * See: http://en.wikipedia.org/wiki/Windows-1252
1933
     *
1934
     * @param string $str <p>The input string</p>
1935
     *
1936
     * @return string
1937
     */
1938 47
    public static function fix_simple_utf8(string $str): string
1939
    {
1940 47
        if ($str === '') {
1941 4
            return '';
1942
        }
1943
1944 47
        static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null;
1945 47
        static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null;
1946
1947 47
        if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) {
1948 1
            if (self::$BROKEN_UTF8_FIX === null) {
1949 1
                self::$BROKEN_UTF8_FIX = self::getData('utf8_fix');
1950
            }
1951
1952 1
            $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = \array_keys(self::$BROKEN_UTF8_FIX);
1953 1
            $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = \array_values(self::$BROKEN_UTF8_FIX);
1954
        }
1955
1956 47
        return \str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str);
1957
    }
1958
1959
    /**
1960
     * Fix a double (or multiple) encoded UTF8 string.
1961
     *
1962
     * @param string|string[] $str you can use a string or an array of strings
1963
     *
1964
     * @return string|string[]
1965
     *                         Will return the fixed input-"array" or
1966
     *                         the fixed input-"string"
1967
     *
1968
     * @psalm-suppress InvalidReturnType
1969
     */
1970 2
    public static function fix_utf8($str)
1971
    {
1972 2
        if (\is_array($str) === true) {
1973 2
            foreach ($str as $k => &$v) {
1974 2
                $v = self::fix_utf8($v);
1975
            }
1976 2
            unset($v);
1977
1978
            /**
1979
             * @psalm-suppress InvalidReturnStatement
1980
             */
1981 2
            return $str;
1982
        }
1983
1984 2
        $str = (string) $str;
1985 2
        $last = '';
1986 2
        while ($last !== $str) {
1987 2
            $last = $str;
1988
            /**
1989
             * @psalm-suppress PossiblyInvalidArgument
1990
             */
1991 2
            $str = self::to_utf8(
1992 2
                self::utf8_decode($str, true)
1993
            );
1994
        }
1995
1996
        /**
1997
         * @psalm-suppress InvalidReturnStatement
1998
         */
1999 2
        return $str;
2000
    }
2001
2002
    /**
2003
     * Get character of a specific character.
2004
     *
2005
     * @param string $char
2006
     *
2007
     * @return string 'RTL' or 'LTR'
2008
     */
2009 2
    public static function getCharDirection(string $char): string
2010
    {
2011 2
        if (self::$SUPPORT['intlChar'] === true) {
2012
            /** @noinspection PhpComposerExtensionStubsInspection */
2013 2
            $tmp_return = \IntlChar::charDirection($char);
2014
2015
            // from "IntlChar"-Class
2016
            $char_direction = [
2017 2
                'RTL' => [1, 13, 14, 15, 21],
2018
                'LTR' => [0, 11, 12, 20],
2019
            ];
2020
2021 2
            if (\in_array($tmp_return, $char_direction['LTR'], true)) {
2022
                return 'LTR';
2023
            }
2024
2025 2
            if (\in_array($tmp_return, $char_direction['RTL'], true)) {
2026 2
                return 'RTL';
2027
            }
2028
        }
2029
2030 2
        $c = static::chr_to_decimal($char);
2031
2032 2
        if (!($c >= 0x5be && $c <= 0x10b7f)) {
2033 2
            return 'LTR';
2034
        }
2035
2036 2
        if ($c <= 0x85e) {
2037 2
            if ($c === 0x5be ||
2038 2
                $c === 0x5c0 ||
2039 2
                $c === 0x5c3 ||
2040 2
                $c === 0x5c6 ||
2041 2
                ($c >= 0x5d0 && $c <= 0x5ea) ||
2042 2
                ($c >= 0x5f0 && $c <= 0x5f4) ||
2043 2
                $c === 0x608 ||
2044 2
                $c === 0x60b ||
2045 2
                $c === 0x60d ||
2046 2
                $c === 0x61b ||
2047 2
                ($c >= 0x61e && $c <= 0x64a) ||
2048
                ($c >= 0x66d && $c <= 0x66f) ||
2049
                ($c >= 0x671 && $c <= 0x6d5) ||
2050
                ($c >= 0x6e5 && $c <= 0x6e6) ||
2051
                ($c >= 0x6ee && $c <= 0x6ef) ||
2052
                ($c >= 0x6fa && $c <= 0x70d) ||
2053
                $c === 0x710 ||
2054
                ($c >= 0x712 && $c <= 0x72f) ||
2055
                ($c >= 0x74d && $c <= 0x7a5) ||
2056
                $c === 0x7b1 ||
2057
                ($c >= 0x7c0 && $c <= 0x7ea) ||
2058
                ($c >= 0x7f4 && $c <= 0x7f5) ||
2059
                $c === 0x7fa ||
2060
                ($c >= 0x800 && $c <= 0x815) ||
2061
                $c === 0x81a ||
2062
                $c === 0x824 ||
2063
                $c === 0x828 ||
2064
                ($c >= 0x830 && $c <= 0x83e) ||
2065
                ($c >= 0x840 && $c <= 0x858) ||
2066 2
                $c === 0x85e
2067
            ) {
2068 2
                return 'RTL';
2069
            }
2070 2
        } elseif ($c === 0x200f) {
2071
            return 'RTL';
2072 2
        } elseif ($c >= 0xfb1d) {
2073 2
            if ($c === 0xfb1d ||
2074 2
                ($c >= 0xfb1f && $c <= 0xfb28) ||
2075 2
                ($c >= 0xfb2a && $c <= 0xfb36) ||
2076 2
                ($c >= 0xfb38 && $c <= 0xfb3c) ||
2077 2
                $c === 0xfb3e ||
2078 2
                ($c >= 0xfb40 && $c <= 0xfb41) ||
2079 2
                ($c >= 0xfb43 && $c <= 0xfb44) ||
2080 2
                ($c >= 0xfb46 && $c <= 0xfbc1) ||
2081 2
                ($c >= 0xfbd3 && $c <= 0xfd3d) ||
2082 2
                ($c >= 0xfd50 && $c <= 0xfd8f) ||
2083 2
                ($c >= 0xfd92 && $c <= 0xfdc7) ||
2084 2
                ($c >= 0xfdf0 && $c <= 0xfdfc) ||
2085 2
                ($c >= 0xfe70 && $c <= 0xfe74) ||
2086 2
                ($c >= 0xfe76 && $c <= 0xfefc) ||
2087 2
                ($c >= 0x10800 && $c <= 0x10805) ||
2088 2
                $c === 0x10808 ||
2089 2
                ($c >= 0x1080a && $c <= 0x10835) ||
2090 2
                ($c >= 0x10837 && $c <= 0x10838) ||
2091 2
                $c === 0x1083c ||
2092 2
                ($c >= 0x1083f && $c <= 0x10855) ||
2093 2
                ($c >= 0x10857 && $c <= 0x1085f) ||
2094 2
                ($c >= 0x10900 && $c <= 0x1091b) ||
2095 2
                ($c >= 0x10920 && $c <= 0x10939) ||
2096 2
                $c === 0x1093f ||
2097 2
                $c === 0x10a00 ||
2098 2
                ($c >= 0x10a10 && $c <= 0x10a13) ||
2099 2
                ($c >= 0x10a15 && $c <= 0x10a17) ||
2100 2
                ($c >= 0x10a19 && $c <= 0x10a33) ||
2101 2
                ($c >= 0x10a40 && $c <= 0x10a47) ||
2102 2
                ($c >= 0x10a50 && $c <= 0x10a58) ||
2103 2
                ($c >= 0x10a60 && $c <= 0x10a7f) ||
2104 2
                ($c >= 0x10b00 && $c <= 0x10b35) ||
2105 2
                ($c >= 0x10b40 && $c <= 0x10b55) ||
2106 2
                ($c >= 0x10b58 && $c <= 0x10b72) ||
2107 2
                ($c >= 0x10b78 && $c <= 0x10b7f)
2108
            ) {
2109 2
                return 'RTL';
2110
            }
2111
        }
2112
2113 2
        return 'LTR';
2114
    }
2115
2116
    /**
2117
     * Check for php-support.
2118
     *
2119
     * @param string|null $key
2120
     *
2121
     * @return mixed
2122
     *               Return the full support-"array", if $key === null<br>
2123
     *               return bool-value, if $key is used and available<br>
2124
     *               otherwise return <strong>null</strong>
2125
     */
2126 27
    public static function getSupportInfo(string $key = null)
2127
    {
2128 27
        if ($key === null) {
2129 4
            return self::$SUPPORT;
2130
        }
2131
2132 25
        if (self::$INTL_TRANSLITERATOR_LIST === null) {
2133 1
            self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
2134
        }
2135
        // compatibility fix for old versions
2136 25
        self::$SUPPORT['intl__transliterator_list_ids'] = self::$INTL_TRANSLITERATOR_LIST;
2137
2138 25
        return self::$SUPPORT[$key] ?? null;
2139
    }
2140
2141
    /**
2142
     * Warning: this method only works for some file-types (png, jpg)
2143
     *          if you need more supported types, please use e.g. "finfo"
2144
     *
2145
     * @param string $str
2146
     * @param array  $fallback <p>with this keys: 'ext', 'mime', 'type'
2147
     *
2148
     * @return array<string, string|null>
2149
     *                       <p>with this keys: 'ext', 'mime', 'type'</p>
2150
     *
2151
     * @phpstan-param array{ext: null|string, mime: null|string, type: null|string} $fallback
2152
     */
2153 39
    public static function get_file_type(
2154
        string $str,
2155
        array $fallback = [
2156
            'ext'  => null,
2157
            'mime' => 'application/octet-stream',
2158
            'type' => null,
2159
        ]
2160
    ): array {
2161 39
        if ($str === '') {
2162
            return $fallback;
2163
        }
2164
2165
        /** @var false|string $str_info - needed for PhpStan (stubs error) */
2166 39
        $str_info = \substr($str, 0, 2);
2167 39
        if ($str_info === false || \strlen($str_info) !== 2) {
2168 11
            return $fallback;
2169
        }
2170
2171
        // DEBUG
2172
        //var_dump($str_info);
2173
2174 35
        $str_info = \unpack('C2chars', $str_info);
2175
2176
        /** @noinspection PhpSillyAssignmentInspection */
2177
        /** @var array|false $str_info - needed for PhpStan (stubs error) */
2178 35
        $str_info = $str_info;
2179
2180 35
        if ($str_info === false) {
2181
            return $fallback;
2182
        }
2183
        /** @noinspection OffsetOperationsInspection */
2184 35
        $type_code = (int) ($str_info['chars1'] . $str_info['chars2']);
2185
2186
        // DEBUG
2187
        //var_dump($type_code);
2188
2189
        //
2190
        // info: https://en.wikipedia.org/wiki/Magic_number_%28programming%29#Format_indicator
2191
        //
2192
        switch ($type_code) {
2193
            // WARNING: do not add too simple comparisons, because of false-positive results:
2194
            //
2195
            // 3780 => 'pdf', 7790 => 'exe', 7784 => 'midi', 8075 => 'zip',
2196
            // 8297 => 'rar', 7173 => 'gif', 7373 => 'tiff' 6677 => 'bmp', ...
2197
            //
2198 35
            case 255216:
2199
                $ext = 'jpg';
2200
                $mime = 'image/jpeg';
2201
                $type = 'binary';
2202
2203
                break;
2204 35
            case 13780:
2205 7
                $ext = 'png';
2206 7
                $mime = 'image/png';
2207 7
                $type = 'binary';
2208
2209 7
                break;
2210
            default:
2211 34
                return $fallback;
2212
        }
2213
2214
        return [
2215 7
            'ext'  => $ext,
2216 7
            'mime' => $mime,
2217 7
            'type' => $type,
2218
        ];
2219
    }
2220
2221
    /**
2222
     * @param int    $length         <p>Length of the random string.</p>
2223
     * @param string $possible_chars [optional] <p>Characters string for the random selection.</p>
2224
     * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
2225
     *
2226
     * @return string
2227
     */
2228 1
    public static function get_random_string(
2229
        int $length,
2230
        string $possible_chars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789',
2231
        string $encoding = 'UTF-8'
2232
    ): string {
2233
        // init
2234 1
        $i = 0;
2235 1
        $str = '';
2236
2237
        //
2238
        // add random chars
2239
        //
2240
2241 1
        if ($encoding === 'UTF-8') {
2242 1
            $max_length = (int) \mb_strlen($possible_chars);
2243 1
            if ($max_length === 0) {
2244 1
                return '';
2245
            }
2246
2247 1
            while ($i < $length) {
2248
                try {
2249 1
                    $rand_int = \random_int(0, $max_length - 1);
2250
                } catch (\Exception $e) {
2251
                    /** @noinspection RandomApiMigrationInspection */
2252
                    $rand_int = \mt_rand(0, $max_length - 1);
2253
                }
2254 1
                $char = \mb_substr($possible_chars, $rand_int, 1);
2255 1
                if ($char !== false) {
2256 1
                    $str .= $char;
2257 1
                    ++$i;
2258
                }
2259
            }
2260
        } else {
2261
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2262
2263
            $max_length = (int) self::strlen($possible_chars, $encoding);
2264
            if ($max_length === 0) {
2265
                return '';
2266
            }
2267
2268
            while ($i < $length) {
2269
                try {
2270
                    $rand_int = \random_int(0, $max_length - 1);
2271
                } catch (\Exception $e) {
2272
                    /** @noinspection RandomApiMigrationInspection */
2273
                    $rand_int = \mt_rand(0, $max_length - 1);
2274
                }
2275
                $char = self::substr($possible_chars, $rand_int, 1, $encoding);
2276
                if ($char !== false) {
2277
                    $str .= $char;
2278
                    ++$i;
2279
                }
2280
            }
2281
        }
2282
2283 1
        return $str;
2284
    }
2285
2286
    /**
2287
     * @param int|string $entropy_extra [optional] <p>Extra entropy via a string or int value.</p>
2288
     * @param bool       $use_md5       [optional] <p>Return the unique identifier as md5-hash? Default: true</p>
2289
     *
2290
     * @return string
2291
     */
2292 1
    public static function get_unique_string($entropy_extra = '', bool $use_md5 = true): string
2293
    {
2294 1
        $unique_helper = \random_int(0, \mt_getrandmax()) .
2295 1
                        \session_id() .
2296 1
                        ($_SERVER['REMOTE_ADDR'] ?? '') .
2297 1
                        ($_SERVER['SERVER_ADDR'] ?? '') .
2298 1
                        $entropy_extra;
2299
2300 1
        $unique_string = \uniqid($unique_helper, true);
2301
2302 1
        if ($use_md5) {
2303 1
            $unique_string = \md5($unique_string . $unique_helper);
2304
        }
2305
2306 1
        return $unique_string;
2307
    }
2308
2309
    /**
2310
     * alias for "UTF8::string_has_bom()"
2311
     *
2312
     * @param string $str
2313
     *
2314
     * @return bool
2315
     *
2316
     * @see UTF8::string_has_bom()
2317
     * @deprecated <p>please use "UTF8::string_has_bom()"</p>
2318
     */
2319 2
    public static function hasBom(string $str): bool
2320
    {
2321 2
        return self::string_has_bom($str);
2322
    }
2323
2324
    /**
2325
     * Returns true if the string contains a lower case char, false otherwise.
2326
     *
2327
     * @param string $str <p>The input string.</p>
2328
     *
2329
     * @return bool
2330
     *              <p>Whether or not the string contains a lower case character.</p>
2331
     */
2332 47
    public static function has_lowercase(string $str): bool
2333
    {
2334 47
        if (self::$SUPPORT['mbstring'] === true) {
2335
            /** @noinspection PhpComposerExtensionStubsInspection */
2336 47
            return \mb_ereg_match('.*[[:lower:]]', $str);
2337
        }
2338
2339
        return self::str_matches_pattern($str, '.*[[:lower:]]');
2340
    }
2341
2342
    /**
2343
     * Returns true if the string contains whitespace, false otherwise.
2344
     *
2345
     * @param string $str <p>The input string.</p>
2346
     *
2347
     * @return bool
2348
     *              <p>Whether or not the string contains whitespace.</p>
2349
     */
2350 11
    public static function has_whitespace(string $str): bool
2351
    {
2352 11
        if (self::$SUPPORT['mbstring'] === true) {
2353
            /** @noinspection PhpComposerExtensionStubsInspection */
2354 11
            return \mb_ereg_match('.*[[:space:]]', $str);
2355
        }
2356
2357
        return self::str_matches_pattern($str, '.*[[:space:]]');
2358
    }
2359
2360
    /**
2361
     * Returns true if the string contains an upper case char, false otherwise.
2362
     *
2363
     * @param string $str <p>The input string.</p>
2364
     *
2365
     * @return bool whether or not the string contains an upper case character
2366
     */
2367 12
    public static function has_uppercase(string $str): bool
2368
    {
2369 12
        if (self::$SUPPORT['mbstring'] === true) {
2370
            /** @noinspection PhpComposerExtensionStubsInspection */
2371 12
            return \mb_ereg_match('.*[[:upper:]]', $str);
2372
        }
2373
2374
        return self::str_matches_pattern($str, '.*[[:upper:]]');
2375
    }
2376
2377
    /**
2378
     * Converts a hexadecimal value into a UTF-8 character.
2379
     *
2380
     * @param string $hexdec <p>The hexadecimal value.</p>
2381
     *
2382
     * @return false|string one single UTF-8 character
2383
     */
2384 4
    public static function hex_to_chr(string $hexdec)
2385
    {
2386 4
        return self::decimal_to_chr(\hexdec($hexdec));
2387
    }
2388
2389
    /**
2390
     * Converts hexadecimal U+xxxx code point representation to integer.
2391
     *
2392
     * INFO: opposite to UTF8::int_to_hex()
2393
     *
2394
     * @param string $hexdec <p>The hexadecimal code point representation.</p>
2395
     *
2396
     * @return false|int the code point, or false on failure
2397
     */
2398 2
    public static function hex_to_int($hexdec)
2399
    {
2400
        // init
2401 2
        $hexdec = (string) $hexdec;
2402
2403 2
        if ($hexdec === '') {
2404 2
            return false;
2405
        }
2406
2407 2
        if (\preg_match('/^(?:\\\u|U\+|)([a-zA-Z0-9]{4,6})$/', $hexdec, $match)) {
2408 2
            return \intval($match[1], 16);
2409
        }
2410
2411 2
        return false;
2412
    }
2413
2414
    /**
2415
     * alias for "UTF8::html_entity_decode()"
2416
     *
2417
     * @param string $str
2418
     * @param int    $flags
2419
     * @param string $encoding
2420
     *
2421
     * @return string
2422
     *
2423
     * @see UTF8::html_entity_decode()
2424
     * @deprecated <p>please use "UTF8::html_entity_decode()"</p>
2425
     */
2426 2
    public static function html_decode(
2427
        string $str,
2428
        int $flags = null,
2429
        string $encoding = 'UTF-8'
2430
    ): string {
2431 2
        return self::html_entity_decode($str, $flags, $encoding);
2432
    }
2433
2434
    /**
2435
     * Converts a UTF-8 string to a series of HTML numbered entities.
2436
     *
2437
     * INFO: opposite to UTF8::html_decode()
2438
     *
2439
     * @param string $str              <p>The Unicode string to be encoded as numbered entities.</p>
2440
     * @param bool   $keep_ascii_chars [optional] <p>Keep ASCII chars.</p>
2441
     * @param string $encoding         [optional] <p>Set the charset for e.g. "mb_" function</p>
2442
     *
2443
     * @return string HTML numbered entities
2444
     */
2445 14
    public static function html_encode(
2446
        string $str,
2447
        bool $keep_ascii_chars = false,
2448
        string $encoding = 'UTF-8'
2449
    ): string {
2450 14
        if ($str === '') {
2451 4
            return '';
2452
        }
2453
2454 14
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2455 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2456
        }
2457
2458
        // INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2459 14
        if (self::$SUPPORT['mbstring'] === true) {
2460 14
            $start_code = 0x00;
2461 14
            if ($keep_ascii_chars === true) {
2462 13
                $start_code = 0x80;
2463
            }
2464
2465 14
            if ($encoding === 'UTF-8') {
2466
                /** @var false|string|null $return - needed for PhpStan (stubs error) */
2467 14
                $return = \mb_encode_numericentity(
2468 14
                    $str,
2469 14
                    [$start_code, 0xfffff, 0, 0xfffff, 0]
2470
                );
2471 14
                if ($return !== null && $return !== false) {
2472 14
                    return $return;
2473
                }
2474
            }
2475
2476
            /** @var false|string|null $return - needed for PhpStan (stubs error) */
2477 4
            $return = \mb_encode_numericentity(
2478 4
                $str,
2479 4
                [$start_code, 0xfffff, 0, 0xfffff, 0],
2480 4
                $encoding
2481
            );
2482 4
            if ($return !== null && $return !== false) {
2483 4
                return $return;
2484
            }
2485
        }
2486
2487
        //
2488
        // fallback via vanilla php
2489
        //
2490
2491
        return \implode(
2492
            '',
2493
            \array_map(
2494
                static function (string $chr) use ($keep_ascii_chars, $encoding): string {
2495
                    return self::single_chr_html_encode($chr, $keep_ascii_chars, $encoding);
2496
                },
2497
                self::str_split($str)
2498
            )
2499
        );
2500
    }
2501
2502
    /**
2503
     * UTF-8 version of html_entity_decode()
2504
     *
2505
     * The reason we are not using html_entity_decode() by itself is because
2506
     * while it is not technically correct to leave out the semicolon
2507
     * at the end of an entity most browsers will still interpret the entity
2508
     * correctly. html_entity_decode() does not convert entities without
2509
     * semicolons, so we are left with our own little solution here. Bummer.
2510
     *
2511
     * Convert all HTML entities to their applicable characters
2512
     *
2513
     * INFO: opposite to UTF8::html_encode()
2514
     *
2515
     * @see http://php.net/manual/en/function.html-entity-decode.php
2516
     *
2517
     * @param string $str      <p>
2518
     *                         The input string.
2519
     *                         </p>
2520
     * @param int    $flags    [optional] <p>
2521
     *                         A bitmask of one or more of the following flags, which specify how to handle quotes
2522
     *                         and which document type to use. The default is ENT_COMPAT | ENT_HTML401.
2523
     *                         <table>
2524
     *                         Available <i>flags</i> constants
2525
     *                         <tr valign="top">
2526
     *                         <td>Constant Name</td>
2527
     *                         <td>Description</td>
2528
     *                         </tr>
2529
     *                         <tr valign="top">
2530
     *                         <td><b>ENT_COMPAT</b></td>
2531
     *                         <td>Will convert double-quotes and leave single-quotes alone.</td>
2532
     *                         </tr>
2533
     *                         <tr valign="top">
2534
     *                         <td><b>ENT_QUOTES</b></td>
2535
     *                         <td>Will convert both double and single quotes.</td>
2536
     *                         </tr>
2537
     *                         <tr valign="top">
2538
     *                         <td><b>ENT_NOQUOTES</b></td>
2539
     *                         <td>Will leave both double and single quotes unconverted.</td>
2540
     *                         </tr>
2541
     *                         <tr valign="top">
2542
     *                         <td><b>ENT_HTML401</b></td>
2543
     *                         <td>
2544
     *                         Handle code as HTML 4.01.
2545
     *                         </td>
2546
     *                         </tr>
2547
     *                         <tr valign="top">
2548
     *                         <td><b>ENT_XML1</b></td>
2549
     *                         <td>
2550
     *                         Handle code as XML 1.
2551
     *                         </td>
2552
     *                         </tr>
2553
     *                         <tr valign="top">
2554
     *                         <td><b>ENT_XHTML</b></td>
2555
     *                         <td>
2556
     *                         Handle code as XHTML.
2557
     *                         </td>
2558
     *                         </tr>
2559
     *                         <tr valign="top">
2560
     *                         <td><b>ENT_HTML5</b></td>
2561
     *                         <td>
2562
     *                         Handle code as HTML 5.
2563
     *                         </td>
2564
     *                         </tr>
2565
     *                         </table>
2566
     *                         </p>
2567
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
2568
     *
2569
     * @return string the decoded string
2570
     */
2571 51
    public static function html_entity_decode(
2572
        string $str,
2573
        int $flags = null,
2574
        string $encoding = 'UTF-8'
2575
    ): string {
2576
        if (
2577 51
            !isset($str[3]) // examples: &; || &x;
2578
            ||
2579 51
            \strpos($str, '&') === false // no "&"
2580
        ) {
2581 24
            return $str;
2582
        }
2583
2584 49
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2585 9
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2586
        }
2587
2588 49
        if ($flags === null) {
2589 11
            $flags = \ENT_QUOTES | \ENT_HTML5;
2590
        }
2591
2592
        if (
2593 49
            $encoding !== 'UTF-8'
2594
            &&
2595 49
            $encoding !== 'ISO-8859-1'
2596
            &&
2597 49
            $encoding !== 'WINDOWS-1252'
2598
            &&
2599 49
            self::$SUPPORT['mbstring'] === false
2600
        ) {
2601
            \trigger_error('UTF8::html_entity_decode() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
2602
        }
2603
2604
        do {
2605 49
            $str_compare = $str;
2606
2607 49
            if (\strpos($str, '&') !== false) {
2608 49
                if (\strpos($str, '&#') !== false) {
2609
                    // decode also numeric & UTF16 two byte entities
2610 41
                    $str = (string) \preg_replace(
2611 41
                        '/(&#(?:x0*[0-9a-fA-F]{2,6}(?![0-9a-fA-F;])|(?:0*\d{2,6}(?![0-9;]))))/S',
2612 41
                        '$1;',
2613 41
                        $str
2614
                    );
2615
                }
2616
2617 49
                $str = \html_entity_decode(
2618 49
                    $str,
2619 49
                    $flags,
2620 49
                    $encoding
2621
                );
2622
            }
2623 49
        } while ($str_compare !== $str);
2624
2625 49
        return $str;
2626
    }
2627
2628
    /**
2629
     * Create a escape html version of the string via "UTF8::htmlspecialchars()".
2630
     *
2631
     * @param string $str
2632
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
2633
     *
2634
     * @return string
2635
     */
2636 6
    public static function html_escape(string $str, string $encoding = 'UTF-8'): string
2637
    {
2638 6
        return self::htmlspecialchars(
2639 6
            $str,
2640 6
            \ENT_QUOTES | \ENT_SUBSTITUTE,
2641 6
            $encoding
2642
        );
2643
    }
2644
2645
    /**
2646
     * Remove empty html-tag.
2647
     *
2648
     * e.g.: <tag></tag>
2649
     *
2650
     * @param string $str
2651
     *
2652
     * @return string
2653
     */
2654 1
    public static function html_stripe_empty_tags(string $str): string
2655
    {
2656 1
        return (string) \preg_replace(
2657 1
            '/<[^\\/>]*?>\\s*?<\\/[^>]*?>/u',
2658 1
            '',
2659 1
            $str
2660
        );
2661
    }
2662
2663
    /**
2664
     * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities()
2665
     *
2666
     * @see http://php.net/manual/en/function.htmlentities.php
2667
     *
2668
     * @param string $str           <p>
2669
     *                              The input string.
2670
     *                              </p>
2671
     * @param int    $flags         [optional] <p>
2672
     *                              A bitmask of one or more of the following flags, which specify how to handle
2673
     *                              quotes, invalid code unit sequences and the used document type. The default is
2674
     *                              ENT_COMPAT | ENT_HTML401.
2675
     *                              <table>
2676
     *                              Available <i>flags</i> constants
2677
     *                              <tr valign="top">
2678
     *                              <td>Constant Name</td>
2679
     *                              <td>Description</td>
2680
     *                              </tr>
2681
     *                              <tr valign="top">
2682
     *                              <td><b>ENT_COMPAT</b></td>
2683
     *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2684
     *                              </tr>
2685
     *                              <tr valign="top">
2686
     *                              <td><b>ENT_QUOTES</b></td>
2687
     *                              <td>Will convert both double and single quotes.</td>
2688
     *                              </tr>
2689
     *                              <tr valign="top">
2690
     *                              <td><b>ENT_NOQUOTES</b></td>
2691
     *                              <td>Will leave both double and single quotes unconverted.</td>
2692
     *                              </tr>
2693
     *                              <tr valign="top">
2694
     *                              <td><b>ENT_IGNORE</b></td>
2695
     *                              <td>
2696
     *                              Silently discard invalid code unit sequences instead of returning
2697
     *                              an empty string. Using this flag is discouraged as it
2698
     *                              may have security implications.
2699
     *                              </td>
2700
     *                              </tr>
2701
     *                              <tr valign="top">
2702
     *                              <td><b>ENT_SUBSTITUTE</b></td>
2703
     *                              <td>
2704
     *                              Replace invalid code unit sequences with a Unicode Replacement Character
2705
     *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty
2706
     *                              string.
2707
     *                              </td>
2708
     *                              </tr>
2709
     *                              <tr valign="top">
2710
     *                              <td><b>ENT_DISALLOWED</b></td>
2711
     *                              <td>
2712
     *                              Replace invalid code points for the given document type with a
2713
     *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2714
     *                              (otherwise) instead of leaving them as is. This may be useful, for
2715
     *                              instance, to ensure the well-formedness of XML documents with
2716
     *                              embedded external content.
2717
     *                              </td>
2718
     *                              </tr>
2719
     *                              <tr valign="top">
2720
     *                              <td><b>ENT_HTML401</b></td>
2721
     *                              <td>
2722
     *                              Handle code as HTML 4.01.
2723
     *                              </td>
2724
     *                              </tr>
2725
     *                              <tr valign="top">
2726
     *                              <td><b>ENT_XML1</b></td>
2727
     *                              <td>
2728
     *                              Handle code as XML 1.
2729
     *                              </td>
2730
     *                              </tr>
2731
     *                              <tr valign="top">
2732
     *                              <td><b>ENT_XHTML</b></td>
2733
     *                              <td>
2734
     *                              Handle code as XHTML.
2735
     *                              </td>
2736
     *                              </tr>
2737
     *                              <tr valign="top">
2738
     *                              <td><b>ENT_HTML5</b></td>
2739
     *                              <td>
2740
     *                              Handle code as HTML 5.
2741
     *                              </td>
2742
     *                              </tr>
2743
     *                              </table>
2744
     *                              </p>
2745
     * @param string $encoding      [optional] <p>
2746
     *                              Like <b>htmlspecialchars</b>,
2747
     *                              <b>htmlentities</b> takes an optional third argument
2748
     *                              <i>encoding</i> which defines encoding used in
2749
     *                              conversion.
2750
     *                              Although this argument is technically optional, you are highly
2751
     *                              encouraged to specify the correct value for your code.
2752
     *                              </p>
2753
     * @param bool   $double_encode [optional] <p>
2754
     *                              When <i>double_encode</i> is turned off PHP will not
2755
     *                              encode existing html entities. The default is to convert everything.
2756
     *                              </p>
2757
     *
2758
     * @return string
2759
     *                <p>
2760
     *                The encoded string.
2761
     *                <br><br>
2762
     *                If the input <i>string</i> contains an invalid code unit
2763
     *                sequence within the given <i>encoding</i> an empty string
2764
     *                will be returned, unless either the <b>ENT_IGNORE</b> or
2765
     *                <b>ENT_SUBSTITUTE</b> flags are set.
2766
     *                </p>
2767
     */
2768 9
    public static function htmlentities(
2769
        string $str,
2770
        int $flags = \ENT_COMPAT,
2771
        string $encoding = 'UTF-8',
2772
        bool $double_encode = true
2773
    ): string {
2774 9
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2775 7
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2776
        }
2777
2778 9
        $str = \htmlentities(
2779 9
            $str,
2780 9
            $flags,
2781 9
            $encoding,
2782 9
            $double_encode
2783
        );
2784
2785
        /**
2786
         * PHP doesn't replace a backslash to its html entity since this is something
2787
         * that's mostly used to escape characters when inserting in a database. Since
2788
         * we're using a decent database layer, we don't need this shit and we're replacing
2789
         * the double backslashes by its' html entity equivalent.
2790
         *
2791
         * https://github.com/forkcms/library/blob/master/spoon/filter/filter.php#L303
2792
         */
2793 9
        $str = \str_replace('\\', '&#92;', $str);
2794
2795 9
        return self::html_encode($str, true, $encoding);
2796
    }
2797
2798
    /**
2799
     * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
2800
     *
2801
     * INFO: Take a look at "UTF8::htmlentities()"
2802
     *
2803
     * @see http://php.net/manual/en/function.htmlspecialchars.php
2804
     *
2805
     * @param string $str           <p>
2806
     *                              The string being converted.
2807
     *                              </p>
2808
     * @param int    $flags         [optional] <p>
2809
     *                              A bitmask of one or more of the following flags, which specify how to handle
2810
     *                              quotes, invalid code unit sequences and the used document type. The default is
2811
     *                              ENT_COMPAT | ENT_HTML401.
2812
     *                              <table>
2813
     *                              Available <i>flags</i> constants
2814
     *                              <tr valign="top">
2815
     *                              <td>Constant Name</td>
2816
     *                              <td>Description</td>
2817
     *                              </tr>
2818
     *                              <tr valign="top">
2819
     *                              <td><b>ENT_COMPAT</b></td>
2820
     *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2821
     *                              </tr>
2822
     *                              <tr valign="top">
2823
     *                              <td><b>ENT_QUOTES</b></td>
2824
     *                              <td>Will convert both double and single quotes.</td>
2825
     *                              </tr>
2826
     *                              <tr valign="top">
2827
     *                              <td><b>ENT_NOQUOTES</b></td>
2828
     *                              <td>Will leave both double and single quotes unconverted.</td>
2829
     *                              </tr>
2830
     *                              <tr valign="top">
2831
     *                              <td><b>ENT_IGNORE</b></td>
2832
     *                              <td>
2833
     *                              Silently discard invalid code unit sequences instead of returning
2834
     *                              an empty string. Using this flag is discouraged as it
2835
     *                              may have security implications.
2836
     *                              </td>
2837
     *                              </tr>
2838
     *                              <tr valign="top">
2839
     *                              <td><b>ENT_SUBSTITUTE</b></td>
2840
     *                              <td>
2841
     *                              Replace invalid code unit sequences with a Unicode Replacement Character
2842
     *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty
2843
     *                              string.
2844
     *                              </td>
2845
     *                              </tr>
2846
     *                              <tr valign="top">
2847
     *                              <td><b>ENT_DISALLOWED</b></td>
2848
     *                              <td>
2849
     *                              Replace invalid code points for the given document type with a
2850
     *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2851
     *                              (otherwise) instead of leaving them as is. This may be useful, for
2852
     *                              instance, to ensure the well-formedness of XML documents with
2853
     *                              embedded external content.
2854
     *                              </td>
2855
     *                              </tr>
2856
     *                              <tr valign="top">
2857
     *                              <td><b>ENT_HTML401</b></td>
2858
     *                              <td>
2859
     *                              Handle code as HTML 4.01.
2860
     *                              </td>
2861
     *                              </tr>
2862
     *                              <tr valign="top">
2863
     *                              <td><b>ENT_XML1</b></td>
2864
     *                              <td>
2865
     *                              Handle code as XML 1.
2866
     *                              </td>
2867
     *                              </tr>
2868
     *                              <tr valign="top">
2869
     *                              <td><b>ENT_XHTML</b></td>
2870
     *                              <td>
2871
     *                              Handle code as XHTML.
2872
     *                              </td>
2873
     *                              </tr>
2874
     *                              <tr valign="top">
2875
     *                              <td><b>ENT_HTML5</b></td>
2876
     *                              <td>
2877
     *                              Handle code as HTML 5.
2878
     *                              </td>
2879
     *                              </tr>
2880
     *                              </table>
2881
     *                              </p>
2882
     * @param string $encoding      [optional] <p>
2883
     *                              Defines encoding used in conversion.
2884
     *                              </p>
2885
     *                              <p>
2886
     *                              For the purposes of this function, the encodings
2887
     *                              ISO-8859-1, ISO-8859-15,
2888
     *                              UTF-8, cp866,
2889
     *                              cp1251, cp1252, and
2890
     *                              KOI8-R are effectively equivalent, provided the
2891
     *                              <i>string</i> itself is valid for the encoding, as
2892
     *                              the characters affected by <b>htmlspecialchars</b> occupy
2893
     *                              the same positions in all of these encodings.
2894
     *                              </p>
2895
     * @param bool   $double_encode [optional] <p>
2896
     *                              When <i>double_encode</i> is turned off PHP will not
2897
     *                              encode existing html entities, the default is to convert everything.
2898
     *                              </p>
2899
     *
2900
     * @return string the converted string.
2901
     *                </p>
2902
     *                <p>
2903
     *                If the input <i>string</i> contains an invalid code unit
2904
     *                sequence within the given <i>encoding</i> an empty string
2905
     *                will be returned, unless either the <b>ENT_IGNORE</b> or
2906
     *                <b>ENT_SUBSTITUTE</b> flags are set
2907
     */
2908 8
    public static function htmlspecialchars(
2909
        string $str,
2910
        int $flags = \ENT_COMPAT,
2911
        string $encoding = 'UTF-8',
2912
        bool $double_encode = true
2913
    ): string {
2914 8
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2915 8
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2916
        }
2917
2918 8
        return \htmlspecialchars(
2919 8
            $str,
2920 8
            $flags,
2921 8
            $encoding,
2922 8
            $double_encode
2923
        );
2924
    }
2925
2926
    /**
2927
     * Checks whether iconv is available on the server.
2928
     *
2929
     * @return bool
2930
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
2931
     */
2932
    public static function iconv_loaded(): bool
2933
    {
2934
        return \extension_loaded('iconv');
2935
    }
2936
2937
    /**
2938
     * alias for "UTF8::decimal_to_chr()"
2939
     *
2940
     * @param mixed $int
2941
     *
2942
     * @return string
2943
     *
2944
     * @see UTF8::decimal_to_chr()
2945
     * @deprecated <p>please use "UTF8::decimal_to_chr()"</p>
2946
     */
2947 4
    public static function int_to_chr($int): string
2948
    {
2949 4
        return self::decimal_to_chr($int);
2950
    }
2951
2952
    /**
2953
     * Converts Integer to hexadecimal U+xxxx code point representation.
2954
     *
2955
     * INFO: opposite to UTF8::hex_to_int()
2956
     *
2957
     * @param int    $int    <p>The integer to be converted to hexadecimal code point.</p>
2958
     * @param string $prefix [optional]
2959
     *
2960
     * @return string the code point, or empty string on failure
2961
     */
2962 6
    public static function int_to_hex(int $int, string $prefix = 'U+'): string
2963
    {
2964 6
        $hex = \dechex($int);
2965
2966 6
        $hex = (\strlen($hex) < 4 ? \substr('0000' . $hex, -4) : $hex);
2967
2968 6
        return $prefix . $hex . '';
2969
    }
2970
2971
    /**
2972
     * Checks whether intl-char is available on the server.
2973
     *
2974
     * @return bool
2975
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
2976
     */
2977
    public static function intlChar_loaded(): bool
2978
    {
2979
        return \class_exists('IntlChar');
2980
    }
2981
2982
    /**
2983
     * Checks whether intl is available on the server.
2984
     *
2985
     * @return bool
2986
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
2987
     */
2988 5
    public static function intl_loaded(): bool
2989
    {
2990 5
        return \extension_loaded('intl');
2991
    }
2992
2993
    /**
2994
     * alias for "UTF8::is_ascii()"
2995
     *
2996
     * @param string $str
2997
     *
2998
     * @return bool
2999
     *
3000
     * @see UTF8::is_ascii()
3001
     * @deprecated <p>please use "UTF8::is_ascii()"</p>
3002
     */
3003 2
    public static function isAscii(string $str): bool
3004
    {
3005 2
        return ASCII::is_ascii($str);
3006
    }
3007
3008
    /**
3009
     * alias for "UTF8::is_base64()"
3010
     *
3011
     * @param string $str
3012
     *
3013
     * @return bool
3014
     *
3015
     * @see UTF8::is_base64()
3016
     * @deprecated <p>please use "UTF8::is_base64()"</p>
3017
     */
3018 2
    public static function isBase64($str): bool
3019
    {
3020 2
        return self::is_base64($str);
3021
    }
3022
3023
    /**
3024
     * alias for "UTF8::is_binary()"
3025
     *
3026
     * @param mixed $str
3027
     * @param bool  $strict
3028
     *
3029
     * @return bool
3030
     *
3031
     * @see UTF8::is_binary()
3032
     * @deprecated <p>please use "UTF8::is_binary()"</p>
3033
     */
3034 4
    public static function isBinary($str, $strict = false): bool
3035
    {
3036 4
        return self::is_binary($str, $strict);
3037
    }
3038
3039
    /**
3040
     * alias for "UTF8::is_bom()"
3041
     *
3042
     * @param string $utf8_chr
3043
     *
3044
     * @return bool
3045
     *
3046
     * @see UTF8::is_bom()
3047
     * @deprecated <p>please use "UTF8::is_bom()"</p>
3048
     */
3049 2
    public static function isBom(string $utf8_chr): bool
3050
    {
3051 2
        return self::is_bom($utf8_chr);
3052
    }
3053
3054
    /**
3055
     * alias for "UTF8::is_html()"
3056
     *
3057
     * @param string $str
3058
     *
3059
     * @return bool
3060
     *
3061
     * @see UTF8::is_html()
3062
     * @deprecated <p>please use "UTF8::is_html()"</p>
3063
     */
3064 2
    public static function isHtml(string $str): bool
3065
    {
3066 2
        return self::is_html($str);
3067
    }
3068
3069
    /**
3070
     * alias for "UTF8::is_json()"
3071
     *
3072
     * @param string $str
3073
     *
3074
     * @return bool
3075
     *
3076
     * @see UTF8::is_json()
3077
     * @deprecated <p>please use "UTF8::is_json()"</p>
3078
     */
3079
    public static function isJson(string $str): bool
3080
    {
3081
        return self::is_json($str);
3082
    }
3083
3084
    /**
3085
     * alias for "UTF8::is_utf16()"
3086
     *
3087
     * @param mixed $str
3088
     *
3089
     * @return false|int
3090
     *                   <strong>false</strong> if is't not UTF16,<br>
3091
     *                   <strong>1</strong> for UTF-16LE,<br>
3092
     *                   <strong>2</strong> for UTF-16BE
3093
     *
3094
     * @see UTF8::is_utf16()
3095
     * @deprecated <p>please use "UTF8::is_utf16()"</p>
3096
     */
3097 2
    public static function isUtf16($str)
3098
    {
3099 2
        return self::is_utf16($str);
3100
    }
3101
3102
    /**
3103
     * alias for "UTF8::is_utf32()"
3104
     *
3105
     * @param mixed $str
3106
     *
3107
     * @return false|int
3108
     *                   <strong>false</strong> if is't not UTF16,
3109
     *                   <strong>1</strong> for UTF-32LE,
3110
     *                   <strong>2</strong> for UTF-32BE
3111
     *
3112
     * @see UTF8::is_utf32()
3113
     * @deprecated <p>please use "UTF8::is_utf32()"</p>
3114
     */
3115 2
    public static function isUtf32($str)
3116
    {
3117 2
        return self::is_utf32($str);
3118
    }
3119
3120
    /**
3121
     * alias for "UTF8::is_utf8()"
3122
     *
3123
     * @param string $str
3124
     * @param bool   $strict
3125
     *
3126
     * @return bool
3127
     *
3128
     * @see UTF8::is_utf8()
3129
     * @deprecated <p>please use "UTF8::is_utf8()"</p>
3130
     */
3131 17
    public static function isUtf8($str, $strict = false): bool
3132
    {
3133 17
        return self::is_utf8($str, $strict);
3134
    }
3135
3136
    /**
3137
     * Returns true if the string contains only alphabetic chars, false otherwise.
3138
     *
3139
     * @param string $str <p>The input string.</p>
3140
     *
3141
     * @return bool
3142
     *              <p>Whether or not $str contains only alphabetic chars.</p>
3143
     */
3144 10
    public static function is_alpha(string $str): bool
3145
    {
3146 10
        if (self::$SUPPORT['mbstring'] === true) {
3147
            /** @noinspection PhpComposerExtensionStubsInspection */
3148 10
            return \mb_ereg_match('^[[:alpha:]]*$', $str);
3149
        }
3150
3151
        return self::str_matches_pattern($str, '^[[:alpha:]]*$');
3152
    }
3153
3154
    /**
3155
     * Returns true if the string contains only alphabetic and numeric chars, false otherwise.
3156
     *
3157
     * @param string $str <p>The input string.</p>
3158
     *
3159
     * @return bool
3160
     *              <p>Whether or not $str contains only alphanumeric chars.</p>
3161
     */
3162 13
    public static function is_alphanumeric(string $str): bool
3163
    {
3164 13
        if (self::$SUPPORT['mbstring'] === true) {
3165
            /** @noinspection PhpComposerExtensionStubsInspection */
3166 13
            return \mb_ereg_match('^[[:alnum:]]*$', $str);
3167
        }
3168
3169
        return self::str_matches_pattern($str, '^[[:alnum:]]*$');
3170
    }
3171
3172
    /**
3173
     * Checks if a string is 7 bit ASCII.
3174
     *
3175
     * @param string $str <p>The string to check.</p>
3176
     *
3177
     * @return bool
3178
     *              <p>
3179
     *              <strong>true</strong> if it is ASCII<br>
3180
     *              <strong>false</strong> otherwise
3181
     *              </p>
3182
     */
3183 8
    public static function is_ascii(string $str): bool
3184
    {
3185 8
        return ASCII::is_ascii($str);
3186
    }
3187
3188
    /**
3189
     * Returns true if the string is base64 encoded, false otherwise.
3190
     *
3191
     * @param mixed|string $str                   <p>The input string.</p>
3192
     * @param bool         $empty_string_is_valid [optional] <p>Is an empty string valid base64 or not?</p>
3193
     *
3194
     * @return bool whether or not $str is base64 encoded
3195
     */
3196 16
    public static function is_base64($str, $empty_string_is_valid = false): bool
3197
    {
3198
        if (
3199 16
            $empty_string_is_valid === false
3200
            &&
3201 16
            $str === ''
3202
        ) {
3203 3
            return false;
3204
        }
3205
3206
        /**
3207
         * @psalm-suppress RedundantConditionGivenDocblockType
3208
         */
3209 15
        if (\is_string($str) === false) {
3210 2
            return false;
3211
        }
3212
3213 15
        $base64String = \base64_decode($str, true);
3214
3215 15
        return $base64String !== false && \base64_encode($base64String) === $str;
3216
    }
3217
3218
    /**
3219
     * Check if the input is binary... (is look like a hack).
3220
     *
3221
     * @param mixed $input
3222
     * @param bool  $strict
3223
     *
3224
     * @return bool
3225
     */
3226 39
    public static function is_binary($input, bool $strict = false): bool
3227
    {
3228 39
        $input = (string) $input;
3229 39
        if ($input === '') {
3230 10
            return false;
3231
        }
3232
3233 39
        if (\preg_match('~^[01]+$~', $input)) {
3234 13
            return true;
3235
        }
3236
3237 39
        $ext = self::get_file_type($input);
3238 39
        if ($ext['type'] === 'binary') {
3239 7
            return true;
3240
        }
3241
3242 38
        $test_length = \strlen($input);
3243 38
        $test_null_counting = \substr_count($input, "\x0", 0, $test_length);
3244 38
        if (($test_null_counting / $test_length) > 0.25) {
3245 15
            return true;
3246
        }
3247
3248 34
        if ($strict === true) {
3249 34
            if (self::$SUPPORT['finfo'] === false) {
3250
                throw new \RuntimeException('ext-fileinfo: is not installed');
3251
            }
3252
3253
            /** @noinspection PhpComposerExtensionStubsInspection */
3254 34
            $finfo_encoding = (new \finfo(\FILEINFO_MIME_ENCODING))->buffer($input);
3255 34
            if ($finfo_encoding && $finfo_encoding === 'binary') {
3256 15
                return true;
3257
            }
3258
        }
3259
3260 30
        return false;
3261
    }
3262
3263
    /**
3264
     * Check if the file is binary.
3265
     *
3266
     * @param string $file
3267
     *
3268
     * @return bool
3269
     */
3270 6
    public static function is_binary_file($file): bool
3271
    {
3272
        // init
3273 6
        $block = '';
3274
3275 6
        $fp = \fopen($file, 'rb');
3276 6
        if (\is_resource($fp)) {
3277 6
            $block = \fread($fp, 512);
3278 6
            \fclose($fp);
3279
        }
3280
3281 6
        if ($block === '') {
3282 2
            return false;
3283
        }
3284
3285 6
        return self::is_binary($block, true);
3286
    }
3287
3288
    /**
3289
     * Returns true if the string contains only whitespace chars, false otherwise.
3290
     *
3291
     * @param string $str <p>The input string.</p>
3292
     *
3293
     * @return bool
3294
     *              <p>Whether or not $str contains only whitespace characters.</p>
3295
     */
3296 15
    public static function is_blank(string $str): bool
3297
    {
3298 15
        if (self::$SUPPORT['mbstring'] === true) {
3299
            /** @noinspection PhpComposerExtensionStubsInspection */
3300 15
            return \mb_ereg_match('^[[:space:]]*$', $str);
3301
        }
3302
3303
        return self::str_matches_pattern($str, '^[[:space:]]*$');
3304
    }
3305
3306
    /**
3307
     * Checks if the given string is equal to any "Byte Order Mark".
3308
     *
3309
     * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
3310
     *
3311
     * @param string $str <p>The input string.</p>
3312
     *
3313
     * @return bool
3314
     *              <p><strong>true</strong> if the $utf8_chr is Byte Order Mark, <strong>false</strong> otherwise.</p>
3315
     */
3316 2
    public static function is_bom($str): bool
3317
    {
3318
        /** @noinspection PhpUnusedLocalVariableInspection */
3319 2
        foreach (self::$BOM as $bom_string => &$bom_byte_length) {
3320 2
            if ($str === $bom_string) {
3321 2
                return true;
3322
            }
3323
        }
3324
3325 2
        return false;
3326
    }
3327
3328
    /**
3329
     * Determine whether the string is considered to be empty.
3330
     *
3331
     * A variable is considered empty if it does not exist or if its value equals FALSE.
3332
     * empty() does not generate a warning if the variable does not exist.
3333
     *
3334
     * @param mixed $str
3335
     *
3336
     * @return bool whether or not $str is empty()
3337
     */
3338
    public static function is_empty($str): bool
3339
    {
3340
        return empty($str);
3341
    }
3342
3343
    /**
3344
     * Returns true if the string contains only hexadecimal chars, false otherwise.
3345
     *
3346
     * @param string $str <p>The input string.</p>
3347
     *
3348
     * @return bool
3349
     *              <p>Whether or not $str contains only hexadecimal chars.</p>
3350
     */
3351 13
    public static function is_hexadecimal(string $str): bool
3352
    {
3353 13
        if (self::$SUPPORT['mbstring'] === true) {
3354
            /** @noinspection PhpComposerExtensionStubsInspection */
3355 13
            return \mb_ereg_match('^[[:xdigit:]]*$', $str);
3356
        }
3357
3358
        return self::str_matches_pattern($str, '^[[:xdigit:]]*$');
3359
    }
3360
3361
    /**
3362
     * Check if the string contains any HTML tags.
3363
     *
3364
     * @param string $str <p>The input string.</p>
3365
     *
3366
     * @return bool
3367
     *              <p>Whether or not $str contains html elements.</p>
3368
     */
3369 3
    public static function is_html(string $str): bool
3370
    {
3371 3
        if ($str === '') {
3372 3
            return false;
3373
        }
3374
3375
        // init
3376 3
        $matches = [];
3377
3378 3
        $str = self::emoji_encode($str); // hack for emoji support :/
3379
3380 3
        \preg_match("/<\\/?\\w+(?:(?:\\s+\\w+(?:\\s*=\\s*(?:\".*?\"|'.*?'|[^'\">\\s]+))?)*\\s*|\\s*)\\/?>/u", $str, $matches);
3381
3382 3
        return $matches !== [];
3383
    }
3384
3385
    /**
3386
     * Try to check if "$str" is a JSON-string.
3387
     *
3388
     * @param string $str                                    <p>The input string.</p>
3389
     * @param bool   $only_array_or_object_results_are_valid [optional] <p>Only array and objects are valid json results.</p>
3390
     *
3391
     * @return bool
3392
     *              <p>Whether or not the $str is in JSON format.</p>
3393
     */
3394 42
    public static function is_json(
3395
        string $str,
3396
        $only_array_or_object_results_are_valid = true
3397
    ): bool {
3398 42
        if ($str === '') {
3399 4
            return false;
3400
        }
3401
3402 40
        if (self::$SUPPORT['json'] === false) {
3403
            throw new \RuntimeException('ext-json: is not installed');
3404
        }
3405
3406 40
        $json = self::json_decode($str);
3407 40
        if ($json === null && \strtoupper($str) !== 'NULL') {
3408 18
            return false;
3409
        }
3410
3411
        if (
3412 24
            $only_array_or_object_results_are_valid === true
3413
            &&
3414 24
            \is_object($json) === false
3415
            &&
3416 24
            \is_array($json) === false
3417
        ) {
3418 5
            return false;
3419
        }
3420
3421
        /** @noinspection PhpComposerExtensionStubsInspection */
3422 19
        return \json_last_error() === \JSON_ERROR_NONE;
3423
    }
3424
3425
    /**
3426
     * @param string $str <p>The input string.</p>
3427
     *
3428
     * @return bool
3429
     *              <p>Whether or not $str contains only lowercase chars.</p>
3430
     */
3431 8
    public static function is_lowercase(string $str): bool
3432
    {
3433 8
        if (self::$SUPPORT['mbstring'] === true) {
3434
            /** @noinspection PhpComposerExtensionStubsInspection */
3435 8
            return \mb_ereg_match('^[[:lower:]]*$', $str);
3436
        }
3437
3438
        return self::str_matches_pattern($str, '^[[:lower:]]*$');
3439
    }
3440
3441
    /**
3442
     * Returns true if the string is serialized, false otherwise.
3443
     *
3444
     * @param string $str <p>The input string.</p>
3445
     *
3446
     * @return bool
3447
     *              <p>Whether or not $str is serialized.</p>
3448
     */
3449 7
    public static function is_serialized(string $str): bool
3450
    {
3451 7
        if ($str === '') {
3452 1
            return false;
3453
        }
3454
3455
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
3456
        /** @noinspection UnserializeExploitsInspection */
3457 6
        return $str === 'b:0;'
3458
               ||
3459 6
               @\unserialize($str) !== false;
3460
    }
3461
3462
    /**
3463
     * Returns true if the string contains only lower case chars, false
3464
     * otherwise.
3465
     *
3466
     * @param string $str <p>The input string.</p>
3467
     *
3468
     * @return bool
3469
     *              <p>Whether or not $str contains only lower case characters.</p>
3470
     */
3471 8
    public static function is_uppercase(string $str): bool
3472
    {
3473 8
        if (self::$SUPPORT['mbstring'] === true) {
3474
            /** @noinspection PhpComposerExtensionStubsInspection */
3475 8
            return \mb_ereg_match('^[[:upper:]]*$', $str);
3476
        }
3477
3478
        return self::str_matches_pattern($str, '^[[:upper:]]*$');
3479
    }
3480
3481
    /**
3482
     * Check if the string is UTF-16.
3483
     *
3484
     * @param mixed $str                       <p>The input string.</p>
3485
     * @param bool  $check_if_string_is_binary
3486
     *
3487
     * @return false|int
3488
     *                   <strong>false</strong> if is't not UTF-16,<br>
3489
     *                   <strong>1</strong> for UTF-16LE,<br>
3490
     *                   <strong>2</strong> for UTF-16BE
3491
     */
3492 22
    public static function is_utf16($str, $check_if_string_is_binary = true)
3493
    {
3494
        // init
3495 22
        $str = (string) $str;
3496 22
        $str_chars = [];
3497
3498
        if (
3499 22
            $check_if_string_is_binary === true
3500
            &&
3501 22
            self::is_binary($str, true) === false
3502
        ) {
3503 2
            return false;
3504
        }
3505
3506 22
        if (self::$SUPPORT['mbstring'] === false) {
3507 3
            \trigger_error('UTF8::is_utf16() without mbstring may did not work correctly', \E_USER_WARNING);
3508
        }
3509
3510 22
        $str = self::remove_bom($str);
3511
3512 22
        $maybe_utf16le = 0;
3513 22
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
3514 22
        if ($test) {
3515 15
            $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
3516 15
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
3517 15
            if ($test3 === $test) {
3518
                /**
3519
                 * @psalm-suppress RedundantCondition
3520
                 */
3521 15
                if ($str_chars === []) {
3522 15
                    $str_chars = self::count_chars($str, true, false);
3523
                }
3524 15
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3525 15
                    if (\in_array($test3char, $str_chars, true) === true) {
3526 15
                        ++$maybe_utf16le;
3527
                    }
3528
                }
3529 15
                unset($test3charEmpty);
3530
            }
3531
        }
3532
3533 22
        $maybe_utf16be = 0;
3534 22
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
3535 22
        if ($test) {
3536 15
            $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
3537 15
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
3538 15
            if ($test3 === $test) {
3539 15
                if ($str_chars === []) {
3540 7
                    $str_chars = self::count_chars($str, true, false);
3541
                }
3542 15
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3543 15
                    if (\in_array($test3char, $str_chars, true) === true) {
3544 15
                        ++$maybe_utf16be;
3545
                    }
3546
                }
3547 15
                unset($test3charEmpty);
3548
            }
3549
        }
3550
3551 22
        if ($maybe_utf16be !== $maybe_utf16le) {
3552 7
            if ($maybe_utf16le > $maybe_utf16be) {
3553 5
                return 1;
3554
            }
3555
3556 6
            return 2;
3557
        }
3558
3559 18
        return false;
3560
    }
3561
3562
    /**
3563
     * Check if the string is UTF-32.
3564
     *
3565
     * @param mixed $str                       <p>The input string.</p>
3566
     * @param bool  $check_if_string_is_binary
3567
     *
3568
     * @return false|int
3569
     *                   <strong>false</strong> if is't not UTF-32,<br>
3570
     *                   <strong>1</strong> for UTF-32LE,<br>
3571
     *                   <strong>2</strong> for UTF-32BE
3572
     */
3573 20
    public static function is_utf32($str, $check_if_string_is_binary = true)
3574
    {
3575
        // init
3576 20
        $str = (string) $str;
3577 20
        $str_chars = [];
3578
3579
        if (
3580 20
            $check_if_string_is_binary === true
3581
            &&
3582 20
            self::is_binary($str, true) === false
3583
        ) {
3584 2
            return false;
3585
        }
3586
3587 20
        if (self::$SUPPORT['mbstring'] === false) {
3588 3
            \trigger_error('UTF8::is_utf32() without mbstring may did not work correctly', \E_USER_WARNING);
3589
        }
3590
3591 20
        $str = self::remove_bom($str);
3592
3593 20
        $maybe_utf32le = 0;
3594 20
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
3595 20
        if ($test) {
3596 13
            $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
3597 13
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
3598 13
            if ($test3 === $test) {
3599
                /**
3600
                 * @psalm-suppress RedundantCondition
3601
                 */
3602 13
                if ($str_chars === []) {
3603 13
                    $str_chars = self::count_chars($str, true, false);
3604
                }
3605 13
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3606 13
                    if (\in_array($test3char, $str_chars, true) === true) {
3607 13
                        ++$maybe_utf32le;
3608
                    }
3609
                }
3610 13
                unset($test3charEmpty);
3611
            }
3612
        }
3613
3614 20
        $maybe_utf32be = 0;
3615 20
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
3616 20
        if ($test) {
3617 13
            $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
3618 13
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
3619 13
            if ($test3 === $test) {
3620 13
                if ($str_chars === []) {
3621 7
                    $str_chars = self::count_chars($str, true, false);
3622
                }
3623 13
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3624 13
                    if (\in_array($test3char, $str_chars, true) === true) {
3625 13
                        ++$maybe_utf32be;
3626
                    }
3627
                }
3628 13
                unset($test3charEmpty);
3629
            }
3630
        }
3631
3632 20
        if ($maybe_utf32be !== $maybe_utf32le) {
3633 3
            if ($maybe_utf32le > $maybe_utf32be) {
3634 2
                return 1;
3635
            }
3636
3637 3
            return 2;
3638
        }
3639
3640 20
        return false;
3641
    }
3642
3643
    /**
3644
     * Checks whether the passed input contains only byte sequences that appear valid UTF-8.
3645
     *
3646
     * @param int|string|string[]|null $str    <p>The input to be checked.</p>
3647
     * @param bool                     $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
3648
     *
3649
     * @return bool
3650
     */
3651 82
    public static function is_utf8($str, bool $strict = false): bool
3652
    {
3653 82
        if (\is_array($str) === true) {
3654 2
            foreach ($str as &$v) {
3655 2
                if (self::is_utf8($v, $strict) === false) {
3656 2
                    return false;
3657
                }
3658
            }
3659
3660
            return true;
3661
        }
3662
3663 82
        return self::is_utf8_string((string) $str, $strict);
3664
    }
3665
3666
    /**
3667
     * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3668
     * Decodes a JSON string
3669
     *
3670
     * @see http://php.net/manual/en/function.json-decode.php
3671
     *
3672
     * @param string $json    <p>
3673
     *                        The <i>json</i> string being decoded.
3674
     *                        </p>
3675
     *                        <p>
3676
     *                        This function only works with UTF-8 encoded strings.
3677
     *                        </p>
3678
     *                        <p>PHP implements a superset of
3679
     *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3680
     *                        only supports these values when they are nested inside an array or an object.
3681
     *                        </p>
3682
     * @param bool   $assoc   [optional] <p>
3683
     *                        When <b>TRUE</b>, returned objects will be converted into
3684
     *                        associative arrays.
3685
     *                        </p>
3686
     * @param int    $depth   [optional] <p>
3687
     *                        User specified recursion depth.
3688
     *                        </p>
3689
     * @param int    $options [optional] <p>
3690
     *                        Bitmask of JSON decode options. Currently only
3691
     *                        <b>JSON_BIGINT_AS_STRING</b>
3692
     *                        is supported (default is to cast large integers as floats)
3693
     *                        </p>
3694
     *
3695
     * @return mixed
3696
     *               The value encoded in <i>json</i> in appropriate PHP type. Values true, false and
3697
     *               null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b> and <b>NULL</b> respectively.
3698
     *               <b>NULL</b> is returned if the <i>json</i> cannot be decoded or if the encoded data
3699
     *               is deeper than the recursion limit.
3700
     */
3701 43
    public static function json_decode(
3702
        string $json,
3703
        bool $assoc = false,
3704
        int $depth = 512,
3705
        int $options = 0
3706
    ) {
3707 43
        $json = self::filter($json);
3708
3709 43
        if (self::$SUPPORT['json'] === false) {
3710
            throw new \RuntimeException('ext-json: is not installed');
3711
        }
3712
3713
        /** @noinspection PhpComposerExtensionStubsInspection */
3714 43
        return \json_decode($json, $assoc, $depth, $options);
3715
    }
3716
3717
    /**
3718
     * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3719
     * Returns the JSON representation of a value.
3720
     *
3721
     * @see http://php.net/manual/en/function.json-encode.php
3722
     *
3723
     * @param mixed $value   <p>
3724
     *                       The <i>value</i> being encoded. Can be any type except
3725
     *                       a resource.
3726
     *                       </p>
3727
     *                       <p>
3728
     *                       All string data must be UTF-8 encoded.
3729
     *                       </p>
3730
     *                       <p>PHP implements a superset of
3731
     *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3732
     *                       only supports these values when they are nested inside an array or an object.
3733
     *                       </p>
3734
     * @param int   $options [optional] <p>
3735
     *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
3736
     *                       <b>JSON_HEX_TAG</b>,
3737
     *                       <b>JSON_HEX_AMP</b>,
3738
     *                       <b>JSON_HEX_APOS</b>,
3739
     *                       <b>JSON_NUMERIC_CHECK</b>,
3740
     *                       <b>JSON_PRETTY_PRINT</b>,
3741
     *                       <b>JSON_UNESCAPED_SLASHES</b>,
3742
     *                       <b>JSON_FORCE_OBJECT</b>,
3743
     *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
3744
     *                       constants is described on
3745
     *                       the JSON constants page.
3746
     *                       </p>
3747
     * @param int   $depth   [optional] <p>
3748
     *                       Set the maximum depth. Must be greater than zero.
3749
     *                       </p>
3750
     *
3751
     * @return false|string
3752
     *                      A JSON encoded <strong>string</strong> on success or<br>
3753
     *                      <strong>FALSE</strong> on failure
3754
     */
3755 5
    public static function json_encode($value, int $options = 0, int $depth = 512)
3756
    {
3757 5
        $value = self::filter($value);
3758
3759 5
        if (self::$SUPPORT['json'] === false) {
3760
            throw new \RuntimeException('ext-json: is not installed');
3761
        }
3762
3763
        /** @noinspection PhpComposerExtensionStubsInspection */
3764 5
        return \json_encode($value, $options, $depth);
3765
    }
3766
3767
    /**
3768
     * Checks whether JSON is available on the server.
3769
     *
3770
     * @return bool
3771
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
3772
     */
3773
    public static function json_loaded(): bool
3774
    {
3775
        return \function_exists('json_decode');
3776
    }
3777
3778
    /**
3779
     * Makes string's first char lowercase.
3780
     *
3781
     * @param string      $str                           <p>The input string</p>
3782
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
3783
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
3784
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt, tr</p>
3785
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
3786
     *
3787
     * @return string the resulting string
3788
     */
3789 46
    public static function lcfirst(
3790
        string $str,
3791
        string $encoding = 'UTF-8',
3792
        bool $clean_utf8 = false,
3793
        string $lang = null,
3794
        bool $try_to_keep_the_string_length = false
3795
    ): string {
3796 46
        if ($clean_utf8 === true) {
3797
            $str = self::clean($str);
3798
        }
3799
3800 46
        $use_mb_functions = ($lang === null && $try_to_keep_the_string_length === false);
3801
3802 46
        if ($encoding === 'UTF-8') {
3803 43
            $str_part_two = (string) \mb_substr($str, 1);
3804
3805 43
            if ($use_mb_functions === true) {
3806 43
                $str_part_one = \mb_strtolower(
3807 43
                    (string) \mb_substr($str, 0, 1)
3808
                );
3809
            } else {
3810
                $str_part_one = self::strtolower(
3811
                    (string) \mb_substr($str, 0, 1),
3812
                    $encoding,
3813
                    false,
3814
                    $lang,
3815 43
                    $try_to_keep_the_string_length
3816
                );
3817
            }
3818
        } else {
3819 3
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
3820
3821 3
            $str_part_two = (string) self::substr($str, 1, null, $encoding);
3822
3823 3
            $str_part_one = self::strtolower(
3824 3
                (string) self::substr($str, 0, 1, $encoding),
3825 3
                $encoding,
3826 3
                false,
3827 3
                $lang,
3828 3
                $try_to_keep_the_string_length
3829
            );
3830
        }
3831
3832 46
        return $str_part_one . $str_part_two;
3833
    }
3834
3835
    /**
3836
     * alias for "UTF8::lcfirst()"
3837
     *
3838
     * @param string      $str
3839
     * @param string      $encoding
3840
     * @param bool        $clean_utf8
3841
     * @param string|null $lang
3842
     * @param bool        $try_to_keep_the_string_length
3843
     *
3844
     * @return string
3845
     *
3846
     * @see UTF8::lcfirst()
3847
     * @deprecated <p>please use "UTF8::lcfirst()"</p>
3848
     */
3849 2
    public static function lcword(
3850
        string $str,
3851
        string $encoding = 'UTF-8',
3852
        bool $clean_utf8 = false,
3853
        string $lang = null,
3854
        bool $try_to_keep_the_string_length = false
3855
    ): string {
3856 2
        return self::lcfirst(
3857 2
            $str,
3858 2
            $encoding,
3859 2
            $clean_utf8,
3860 2
            $lang,
3861 2
            $try_to_keep_the_string_length
3862
        );
3863
    }
3864
3865
    /**
3866
     * Lowercase for all words in the string.
3867
     *
3868
     * @param string      $str                           <p>The input string.</p>
3869
     * @param string[]    $exceptions                    [optional] <p>Exclusion for some words.</p>
3870
     * @param string      $char_list                     [optional] <p>Additional chars that contains to words and do not start
3871
     *                                                   a new word.</p>
3872
     * @param string      $encoding                      [optional] <p>Set the charset.</p>
3873
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
3874
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt, tr</p>
3875
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
3876
     *
3877
     * @return string
3878
     */
3879 2
    public static function lcwords(
3880
        string $str,
3881
        array $exceptions = [],
3882
        string $char_list = '',
3883
        string $encoding = 'UTF-8',
3884
        bool $clean_utf8 = false,
3885
        string $lang = null,
3886
        bool $try_to_keep_the_string_length = false
3887
    ): string {
3888 2
        if (!$str) {
3889 2
            return '';
3890
        }
3891
3892 2
        $words = self::str_to_words($str, $char_list);
3893 2
        $use_exceptions = $exceptions !== [];
3894
3895 2
        $words_str = '';
3896 2
        foreach ($words as &$word) {
3897 2
            if (!$word) {
3898 2
                continue;
3899
            }
3900
3901
            if (
3902 2
                $use_exceptions === false
3903
                ||
3904 2
                !\in_array($word, $exceptions, true)
3905
            ) {
3906 2
                $words_str .= self::lcfirst($word, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
3907
            } else {
3908 2
                $words_str .= $word;
3909
            }
3910
        }
3911
3912 2
        return $words_str;
3913
    }
3914
3915
    /**
3916
     * alias for "UTF8::lcfirst()"
3917
     *
3918
     * @param string      $str
3919
     * @param string      $encoding
3920
     * @param bool        $clean_utf8
3921
     * @param string|null $lang
3922
     * @param bool        $try_to_keep_the_string_length
3923
     *
3924
     * @return string
3925
     *
3926
     * @see UTF8::lcfirst()
3927
     * @deprecated <p>please use "UTF8::lcfirst()"</p>
3928
     */
3929 5
    public static function lowerCaseFirst(
3930
        string $str,
3931
        string $encoding = 'UTF-8',
3932
        bool $clean_utf8 = false,
3933
        string $lang = null,
3934
        bool $try_to_keep_the_string_length = false
3935
    ): string {
3936 5
        return self::lcfirst(
3937 5
            $str,
3938 5
            $encoding,
3939 5
            $clean_utf8,
3940 5
            $lang,
3941 5
            $try_to_keep_the_string_length
3942
        );
3943
    }
3944
3945
    /**
3946
     * Strip whitespace or other characters from the beginning of a UTF-8 string.
3947
     *
3948
     * @param string      $str   <p>The string to be trimmed</p>
3949
     * @param string|null $chars <p>Optional characters to be stripped</p>
3950
     *
3951
     * @return string the string with unwanted characters stripped from the left
3952
     */
3953 22
    public static function ltrim(string $str = '', string $chars = null): string
3954
    {
3955 22
        if ($str === '') {
3956 3
            return '';
3957
        }
3958
3959 21
        if (self::$SUPPORT['mbstring'] === true) {
3960 21
            if ($chars) {
3961
                /** @noinspection PregQuoteUsageInspection */
3962 10
                $chars = \preg_quote($chars);
3963 10
                $pattern = "^[${chars}]+";
3964
            } else {
3965 14
                $pattern = '^[\\s]+';
3966
            }
3967
3968
            /** @noinspection PhpComposerExtensionStubsInspection */
3969 21
            return (string) \mb_ereg_replace($pattern, '', $str);
3970
        }
3971
3972
        if ($chars) {
3973
            $chars = \preg_quote($chars, '/');
3974
            $pattern = "^[${chars}]+";
3975
        } else {
3976
            $pattern = '^[\\s]+';
3977
        }
3978
3979
        return self::regex_replace($str, $pattern, '', '', '/');
3980
    }
3981
3982
    /**
3983
     * Returns the UTF-8 character with the maximum code point in the given data.
3984
     *
3985
     * @param array<string>|string $arg <p>A UTF-8 encoded string or an array of such strings.</p>
3986
     *
3987
     * @return string|null the character with the highest code point than others, returns null on failure or empty input
3988
     */
3989
    public static function max($arg)
3990
    {
3991 2
        if (\is_array($arg) === true) {
3992 2
            $arg = \implode('', $arg);
3993
        }
3994
3995 2
        $codepoints = self::codepoints($arg, false);
3996 2
        if ($codepoints === []) {
3997 2
            return null;
3998
        }
3999
4000 2
        $codepoint_max = \max($codepoints);
4001
4002 2
        return self::chr($codepoint_max);
4003
    }
4004
4005
    /**
4006
     * Calculates and returns the maximum number of bytes taken by any
4007
     * UTF-8 encoded character in the given string.
4008
     *
4009
     * @param string $str <p>The original Unicode string.</p>
4010
     *
4011
     * @return int
4012
     *             <p>Max byte lengths of the given chars.</p>
4013
     */
4014
    public static function max_chr_width(string $str): int
4015
    {
4016 2
        $bytes = self::chr_size_list($str);
4017 2
        if ($bytes !== []) {
4018 2
            return (int) \max($bytes);
4019
        }
4020
4021 2
        return 0;
4022
    }
4023
4024
    /**
4025
     * Checks whether mbstring is available on the server.
4026
     *
4027
     * @return bool
4028
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
4029
     */
4030
    public static function mbstring_loaded(): bool
4031
    {
4032 26
        return \extension_loaded('mbstring');
4033
    }
4034
4035
    /**
4036
     * Returns the UTF-8 character with the minimum code point in the given data.
4037
     *
4038
     * @param mixed $arg <strong>A UTF-8 encoded string or an array of such strings.</strong>
4039
     *
4040
     * @return string|null the character with the lowest code point than others, returns null on failure or empty input
4041
     */
4042
    public static function min($arg)
4043
    {
4044 2
        if (\is_array($arg) === true) {
4045 2
            $arg = \implode('', $arg);
4046
        }
4047
4048 2
        $codepoints = self::codepoints($arg, false);
4049 2
        if ($codepoints === []) {
4050 2
            return null;
4051
        }
4052
4053 2
        $codepoint_min = \min($codepoints);
4054
4055 2
        return self::chr($codepoint_min);
4056
    }
4057
4058
    /**
4059
     * alias for "UTF8::normalize_encoding()"
4060
     *
4061
     * @param mixed $encoding
4062
     * @param mixed $fallback
4063
     *
4064
     * @return mixed
4065
     *
4066
     * @see UTF8::normalize_encoding()
4067
     * @deprecated <p>please use "UTF8::normalize_encoding()"</p>
4068
     */
4069
    public static function normalizeEncoding($encoding, $fallback = '')
4070
    {
4071 2
        return self::normalize_encoding($encoding, $fallback);
4072
    }
4073
4074
    /**
4075
     * Normalize the encoding-"name" input.
4076
     *
4077
     * @param mixed $encoding <p>e.g.: ISO, UTF8, WINDOWS-1251 etc.</p>
4078
     * @param mixed $fallback <p>e.g.: UTF-8</p>
4079
     *
4080
     * @return mixed e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.<br>Will return a empty string as fallback (by default)
4081
     */
4082
    public static function normalize_encoding($encoding, $fallback = '')
4083
    {
4084 331
        static $STATIC_NORMALIZE_ENCODING_CACHE = [];
4085
4086
        // init
4087 331
        $encoding = (string) $encoding;
4088
4089 331
        if (!$encoding) {
4090 285
            return $fallback;
4091
        }
4092
4093
        if (
4094 51
            $encoding === 'UTF-8'
4095
            ||
4096 51
            $encoding === 'UTF8'
4097
        ) {
4098 28
            return 'UTF-8';
4099
        }
4100
4101
        if (
4102 43
            $encoding === '8BIT'
4103
            ||
4104 43
            $encoding === 'BINARY'
4105
        ) {
4106
            return 'CP850';
4107
        }
4108
4109
        if (
4110 43
            $encoding === 'HTML'
4111
            ||
4112 43
            $encoding === 'HTML-ENTITIES'
4113
        ) {
4114 2
            return 'HTML-ENTITIES';
4115
        }
4116
4117
        if (
4118 43
            $encoding === 'ISO'
4119
            ||
4120 43
            $encoding === 'ISO-8859-1'
4121
        ) {
4122 39
            return 'ISO-8859-1';
4123
        }
4124
4125
        if (
4126 12
            $encoding === '1' // only a fallback, for non "strict_types" usage ...
4127
            ||
4128 12
            $encoding === '0' // only a fallback, for non "strict_types" usage ...
4129
        ) {
4130 1
            return $fallback;
4131
        }
4132
4133 11
        if (isset($STATIC_NORMALIZE_ENCODING_CACHE[$encoding])) {
4134 8
            return $STATIC_NORMALIZE_ENCODING_CACHE[$encoding];
4135
        }
4136
4137 5
        if (self::$ENCODINGS === null) {
4138 1
            self::$ENCODINGS = self::getData('encodings');
4139
        }
4140
4141 5
        if (\in_array($encoding, self::$ENCODINGS, true)) {
4142 3
            $STATIC_NORMALIZE_ENCODING_CACHE[$encoding] = $encoding;
4143
4144 3
            return $encoding;
4145
        }
4146
4147 4
        $encoding_original = $encoding;
4148 4
        $encoding = \strtoupper($encoding);
4149 4
        $encoding_upper_helper = (string) \preg_replace('/[^a-zA-Z0-9]/u', '', $encoding);
4150
4151
        $equivalences = [
4152 4
            'ISO8859'     => 'ISO-8859-1',
4153
            'ISO88591'    => 'ISO-8859-1',
4154
            'ISO'         => 'ISO-8859-1',
4155
            'LATIN'       => 'ISO-8859-1',
4156
            'LATIN1'      => 'ISO-8859-1', // Western European
4157
            'ISO88592'    => 'ISO-8859-2',
4158
            'LATIN2'      => 'ISO-8859-2', // Central European
4159
            'ISO88593'    => 'ISO-8859-3',
4160
            'LATIN3'      => 'ISO-8859-3', // Southern European
4161
            'ISO88594'    => 'ISO-8859-4',
4162
            'LATIN4'      => 'ISO-8859-4', // Northern European
4163
            'ISO88595'    => 'ISO-8859-5',
4164
            'ISO88596'    => 'ISO-8859-6', // Greek
4165
            'ISO88597'    => 'ISO-8859-7',
4166
            'ISO88598'    => 'ISO-8859-8', // Hebrew
4167
            'ISO88599'    => 'ISO-8859-9',
4168
            'LATIN5'      => 'ISO-8859-9', // Turkish
4169
            'ISO885911'   => 'ISO-8859-11',
4170
            'TIS620'      => 'ISO-8859-11', // Thai
4171
            'ISO885910'   => 'ISO-8859-10',
4172
            'LATIN6'      => 'ISO-8859-10', // Nordic
4173
            'ISO885913'   => 'ISO-8859-13',
4174
            'LATIN7'      => 'ISO-8859-13', // Baltic
4175
            'ISO885914'   => 'ISO-8859-14',
4176
            'LATIN8'      => 'ISO-8859-14', // Celtic
4177
            'ISO885915'   => 'ISO-8859-15',
4178
            'LATIN9'      => 'ISO-8859-15', // Western European (with some extra chars e.g. €)
4179
            'ISO885916'   => 'ISO-8859-16',
4180
            'LATIN10'     => 'ISO-8859-16', // Southeast European
4181
            'CP1250'      => 'WINDOWS-1250',
4182
            'WIN1250'     => 'WINDOWS-1250',
4183
            'WINDOWS1250' => 'WINDOWS-1250',
4184
            'CP1251'      => 'WINDOWS-1251',
4185
            'WIN1251'     => 'WINDOWS-1251',
4186
            'WINDOWS1251' => 'WINDOWS-1251',
4187
            'CP1252'      => 'WINDOWS-1252',
4188
            'WIN1252'     => 'WINDOWS-1252',
4189
            'WINDOWS1252' => 'WINDOWS-1252',
4190
            'CP1253'      => 'WINDOWS-1253',
4191
            'WIN1253'     => 'WINDOWS-1253',
4192
            'WINDOWS1253' => 'WINDOWS-1253',
4193
            'CP1254'      => 'WINDOWS-1254',
4194
            'WIN1254'     => 'WINDOWS-1254',
4195
            'WINDOWS1254' => 'WINDOWS-1254',
4196
            'CP1255'      => 'WINDOWS-1255',
4197
            'WIN1255'     => 'WINDOWS-1255',
4198
            'WINDOWS1255' => 'WINDOWS-1255',
4199
            'CP1256'      => 'WINDOWS-1256',
4200
            'WIN1256'     => 'WINDOWS-1256',
4201
            'WINDOWS1256' => 'WINDOWS-1256',
4202
            'CP1257'      => 'WINDOWS-1257',
4203
            'WIN1257'     => 'WINDOWS-1257',
4204
            'WINDOWS1257' => 'WINDOWS-1257',
4205
            'CP1258'      => 'WINDOWS-1258',
4206
            'WIN1258'     => 'WINDOWS-1258',
4207
            'WINDOWS1258' => 'WINDOWS-1258',
4208
            'UTF16'       => 'UTF-16',
4209
            'UTF32'       => 'UTF-32',
4210
            'UTF8'        => 'UTF-8',
4211
            'UTF'         => 'UTF-8',
4212
            'UTF7'        => 'UTF-7',
4213
            '8BIT'        => 'CP850',
4214
            'BINARY'      => 'CP850',
4215
        ];
4216
4217 4
        if (!empty($equivalences[$encoding_upper_helper])) {
4218 3
            $encoding = $equivalences[$encoding_upper_helper];
4219
        }
4220
4221 4
        $STATIC_NORMALIZE_ENCODING_CACHE[$encoding_original] = $encoding;
4222
4223 4
        return $encoding;
4224
    }
4225
4226
    /**
4227
     * Standardize line ending to unix-like.
4228
     *
4229
     * @param string $str      <p>The input string.</p>
4230
     * @param string $replacer <p>The replacer char e.g. "\n" (Linux) or "\r\n" (Windows). You can also use \PHP_EOL here.</p>
4231
     *
4232
     * @return string
4233
     *                <p>A string with normalized line ending.</p>
4234
     */
4235
    public static function normalize_line_ending(string $str, $replacer = "\n"): string
4236
    {
4237 5
        return \str_replace(["\r\n", "\r", "\n"], $replacer, $str);
4238
    }
4239
4240
    /**
4241
     * Normalize some MS Word special characters.
4242
     *
4243
     * @param string $str <p>The string to be normalized.</p>
4244
     *
4245
     * @return string
4246
     *                <p>A string with normalized characters for commonly used chars in Word documents.</p>
4247
     */
4248
    public static function normalize_msword(string $str): string
4249
    {
4250 10
        return ASCII::normalize_msword($str);
4251
    }
4252
4253
    /**
4254
     * Normalize the whitespace.
4255
     *
4256
     * @param string $str                        <p>The string to be normalized.</p>
4257
     * @param bool   $keep_non_breaking_space    [optional] <p>Set to true, to keep non-breaking-spaces.</p>
4258
     * @param bool   $keep_bidi_unicode_controls [optional] <p>Set to true, to keep non-printable (for the web)
4259
     *                                           bidirectional text chars.</p>
4260
     *
4261
     * @return string
4262
     *                <p>A string with normalized whitespace.</p>
4263
     */
4264
    public static function normalize_whitespace(
4265
        string $str,
4266
        bool $keep_non_breaking_space = false,
4267
        bool $keep_bidi_unicode_controls = false
4268
    ): string {
4269 61
        return ASCII::normalize_whitespace(
4270 61
            $str,
4271 61
            $keep_non_breaking_space,
4272 61
            $keep_bidi_unicode_controls
4273
        );
4274
    }
4275
4276
    /**
4277
     * Calculates Unicode code point of the given UTF-8 encoded character.
4278
     *
4279
     * INFO: opposite to UTF8::chr()
4280
     *
4281
     * @param string $chr      <p>The character of which to calculate code point.<p/>
4282
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
4283
     *
4284
     * @return int
4285
     *             <p>Unicode code point of the given character,<br>
4286
     *             0 on invalid UTF-8 byte sequence</p>
4287
     */
4288
    public static function ord($chr, string $encoding = 'UTF-8'): int
4289
    {
4290 26
        static $CHAR_CACHE = [];
4291
4292
        // init
4293 26
        $chr = (string) $chr;
4294
4295 26
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
4296 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4297
        }
4298
4299 26
        $cache_key = $chr . $encoding;
4300 26
        if (isset($CHAR_CACHE[$cache_key]) === true) {
4301 26
            return $CHAR_CACHE[$cache_key];
4302
        }
4303
4304
        // check again, if it's still not UTF-8
4305 10
        if ($encoding !== 'UTF-8') {
4306 3
            $chr = self::encode($encoding, $chr);
4307
        }
4308
4309 10
        if (self::$ORD === null) {
4310
            self::$ORD = self::getData('ord');
4311
        }
4312
4313 10
        if (isset(self::$ORD[$chr])) {
4314 10
            return $CHAR_CACHE[$cache_key] = self::$ORD[$chr];
4315
        }
4316
4317
        //
4318
        // fallback via "IntlChar"
4319
        //
4320
4321 6
        if (self::$SUPPORT['intlChar'] === true) {
4322
            /** @noinspection PhpComposerExtensionStubsInspection */
4323 5
            $code = \IntlChar::ord($chr);
4324 5
            if ($code) {
4325 5
                return $CHAR_CACHE[$cache_key] = $code;
4326
            }
4327
        }
4328
4329
        //
4330
        // fallback via vanilla php
4331
        //
4332
4333
        /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
4334 1
        $chr = \unpack('C*', (string) \substr($chr, 0, 4));
4335
        /** @noinspection OffsetOperationsInspection */
4336 1
        $code = $chr ? $chr[1] : 0;
4337
4338
        /** @noinspection OffsetOperationsInspection */
4339 1
        if ($code >= 0xF0 && isset($chr[4])) {
4340
            /** @noinspection UnnecessaryCastingInspection */
4341
            /** @noinspection OffsetOperationsInspection */
4342
            return $CHAR_CACHE[$cache_key] = (int) ((($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80);
4343
        }
4344
4345
        /** @noinspection OffsetOperationsInspection */
4346 1
        if ($code >= 0xE0 && isset($chr[3])) {
4347
            /** @noinspection UnnecessaryCastingInspection */
4348
            /** @noinspection OffsetOperationsInspection */
4349 1
            return $CHAR_CACHE[$cache_key] = (int) ((($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80);
4350
        }
4351
4352
        /** @noinspection OffsetOperationsInspection */
4353 1
        if ($code >= 0xC0 && isset($chr[2])) {
4354
            /** @noinspection UnnecessaryCastingInspection */
4355
            /** @noinspection OffsetOperationsInspection */
4356 1
            return $CHAR_CACHE[$cache_key] = (int) ((($code - 0xC0) << 6) + $chr[2] - 0x80);
4357
        }
4358
4359
        return $CHAR_CACHE[$cache_key] = $code;
4360
    }
4361
4362
    /**
4363
     * Parses the string into an array (into the the second parameter).
4364
     *
4365
     * WARNING: Unlike "parse_str()", this method does not (re-)place variables in the current scope,
4366
     *          if the second parameter is not set!
4367
     *
4368
     * @see http://php.net/manual/en/function.parse-str.php
4369
     *
4370
     * @param string $str        <p>The input string.</p>
4371
     * @param array  $result     <p>The result will be returned into this reference parameter.</p>
4372
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4373
     *
4374
     * @return bool
4375
     *              <p>Will return <strong>false</strong> if php can't parse the string and we haven't any $result.</p>
4376
     */
4377
    public static function parse_str(string $str, &$result, bool $clean_utf8 = false): bool
4378
    {
4379 2
        if ($clean_utf8 === true) {
4380 2
            $str = self::clean($str);
4381
        }
4382
4383 2
        if (self::$SUPPORT['mbstring'] === true) {
4384 2
            $return = \mb_parse_str($str, $result);
4385
4386 2
            return $return !== false && $result !== [];
4387
        }
4388
4389
        /** @noinspection PhpVoidFunctionResultUsedInspection */
4390
        \parse_str($str, $result);
4391
4392
        return $result !== [];
4393
    }
4394
4395
    /**
4396
     * Checks if \u modifier is available that enables Unicode support in PCRE.
4397
     *
4398
     * @return bool
4399
     *              <p>
4400
     *              <strong>true</strong> if support is available,<br>
4401
     *              <strong>false</strong> otherwise
4402
     *              </p>
4403
     */
4404
    public static function pcre_utf8_support(): bool
4405
    {
4406
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
4407 102
        return (bool) @\preg_match('//u', '');
4408
    }
4409
4410
    /**
4411
     * Create an array containing a range of UTF-8 characters.
4412
     *
4413
     * @param mixed     $var1      <p>Numeric or hexadecimal code points, or a UTF-8 character to start from.</p>
4414
     * @param mixed     $var2      <p>Numeric or hexadecimal code points, or a UTF-8 character to end at.</p>
4415
     * @param bool      $use_ctype <p>use ctype to detect numeric and hexadecimal, otherwise we will use a simple "is_numeric"</p>
4416
     * @param string    $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
4417
     * @param float|int $step      [optional] <p>
4418
     *                             If a step value is given, it will be used as the
4419
     *                             increment between elements in the sequence. step
4420
     *                             should be given as a positive number. If not specified,
4421
     *                             step will default to 1.
4422
     *                             </p>
4423
     *
4424
     * @return string[]
4425
     */
4426
    public static function range(
4427
        $var1,
4428
        $var2,
4429
        bool $use_ctype = true,
4430
        string $encoding = 'UTF-8',
4431
        $step = 1
4432
    ): array {
4433 2
        if (!$var1 || !$var2) {
4434 2
            return [];
4435
        }
4436
4437 2
        if ($step !== 1) {
4438
            /**
4439
             * @psalm-suppress RedundantConditionGivenDocblockType
4440
             */
4441 1
            if (!\is_numeric($step)) {
0 ignored issues
show
introduced by
The condition is_numeric($step) is always true.
Loading history...
4442
                throw new \InvalidArgumentException('$step need to be a number, type given: ' . \gettype($step));
4443
            }
4444
4445
            /**
4446
             * @psalm-suppress RedundantConditionGivenDocblockType - false-positive from psalm?
4447
             */
4448 1
            if ($step <= 0) {
4449
                throw new \InvalidArgumentException('$step need to be a positive number, given: ' . $step);
4450
            }
4451
        }
4452
4453 2
        if ($use_ctype && self::$SUPPORT['ctype'] === false) {
4454
            throw new \RuntimeException('ext-ctype: is not installed');
4455
        }
4456
4457 2
        $is_digit = false;
4458 2
        $is_xdigit = false;
4459
4460
        /** @noinspection PhpComposerExtensionStubsInspection */
4461 2
        if ($use_ctype && \ctype_digit((string) $var1) && \ctype_digit((string) $var2)) {
4462 2
            $is_digit = true;
4463 2
            $start = (int) $var1;
4464 2
        } /** @noinspection PhpComposerExtensionStubsInspection */ elseif ($use_ctype && \ctype_xdigit($var1) && \ctype_xdigit($var2)) {
4465
            $is_xdigit = true;
4466
            $start = (int) self::hex_to_int($var1);
4467 2
        } elseif (!$use_ctype && \is_numeric($var1)) {
4468 1
            $start = (int) $var1;
4469
        } else {
4470 2
            $start = self::ord($var1);
4471
        }
4472
4473 2
        if (!$start) {
4474
            return [];
4475
        }
4476
4477 2
        if ($is_digit) {
4478 2
            $end = (int) $var2;
4479 2
        } elseif ($is_xdigit) {
4480
            $end = (int) self::hex_to_int($var2);
4481 2
        } elseif (!$use_ctype && \is_numeric($var2)) {
4482 1
            $end = (int) $var2;
4483
        } else {
4484 2
            $end = self::ord($var2);
4485
        }
4486
4487 2
        if (!$end) {
4488
            return [];
4489
        }
4490
4491 2
        $array = [];
4492 2
        foreach (\range($start, $end, $step) as $i) {
4493 2
            $array[] = (string) self::chr((int) $i, $encoding);
4494
        }
4495
4496 2
        return $array;
4497
    }
4498
4499
    /**
4500
     * Multi decode HTML entity + fix urlencoded-win1252-chars.
4501
     *
4502
     * e.g:
4503
     * 'test+test'                     => 'test+test'
4504
     * 'D&#252;sseldorf'               => 'Düsseldorf'
4505
     * 'D%FCsseldorf'                  => 'Düsseldorf'
4506
     * 'D&#xFC;sseldorf'               => 'Düsseldorf'
4507
     * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
4508
     * 'Düsseldorf'                   => 'Düsseldorf'
4509
     * 'D%C3%BCsseldorf'               => 'Düsseldorf'
4510
     * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
4511
     * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
4512
     *
4513
     * @param string $str          <p>The input string.</p>
4514
     * @param bool   $multi_decode <p>Decode as often as possible.</p>
4515
     *
4516
     * @return string
4517
     *                <p>The decoded URL, as a string.</p>
4518
     */
4519
    public static function rawurldecode(string $str, bool $multi_decode = true): string
4520
    {
4521 7
        if ($str === '') {
4522 4
            return '';
4523
        }
4524
4525
        if (
4526 7
            \strpos($str, '&') === false
4527
            &&
4528 7
            \strpos($str, '%') === false
4529
            &&
4530 7
            \strpos($str, '+') === false
4531
            &&
4532 7
            \strpos($str, '\u') === false
4533
        ) {
4534 4
            return self::fix_simple_utf8($str);
4535
        }
4536
4537 7
        $str = self::urldecode_unicode_helper($str);
4538
4539 7
        if ($multi_decode) {
4540
            do {
4541 6
                $str_compare = $str;
4542
4543
                /**
4544
                 * @psalm-suppress PossiblyInvalidArgument
4545
                 */
4546 6
                $str = self::fix_simple_utf8(
4547 6
                    \rawurldecode(
4548 6
                        self::html_entity_decode(
4549 6
                            self::to_utf8($str),
4550 6
                            \ENT_QUOTES | \ENT_HTML5
4551
                        )
4552
                    )
4553
                );
4554 6
            } while ($str_compare !== $str);
4555
        } else {
4556
            /**
4557
             * @psalm-suppress PossiblyInvalidArgument
4558
             */
4559 1
            $str = self::fix_simple_utf8(
4560 1
                \rawurldecode(
4561 1
                    self::html_entity_decode(
4562 1
                        self::to_utf8($str),
4563 1
                        \ENT_QUOTES | \ENT_HTML5
4564
                    )
4565
                )
4566
            );
4567
        }
4568
4569 7
        return $str;
4570
    }
4571
4572
    /**
4573
     * Replaces all occurrences of $pattern in $str by $replacement.
4574
     *
4575
     * @param string $str         <p>The input string.</p>
4576
     * @param string $pattern     <p>The regular expression pattern.</p>
4577
     * @param string $replacement <p>The string to replace with.</p>
4578
     * @param string $options     [optional] <p>Matching conditions to be used.</p>
4579
     * @param string $delimiter   [optional] <p>Delimiter the the regex. Default: '/'</p>
4580
     *
4581
     * @return string
4582
     */
4583
    public static function regex_replace(
4584
        string $str,
4585
        string $pattern,
4586
        string $replacement,
4587
        string $options = '',
4588
        string $delimiter = '/'
4589
    ): string {
4590 18
        if ($options === 'msr') {
4591 9
            $options = 'ms';
4592
        }
4593
4594
        // fallback
4595 18
        if (!$delimiter) {
4596
            $delimiter = '/';
4597
        }
4598
4599 18
        return (string) \preg_replace(
4600 18
            $delimiter . $pattern . $delimiter . 'u' . $options,
4601 18
            $replacement,
4602 18
            $str
4603
        );
4604
    }
4605
4606
    /**
4607
     * alias for "UTF8::remove_bom()"
4608
     *
4609
     * @param string $str
4610
     *
4611
     * @return string
4612
     *
4613
     * @see UTF8::remove_bom()
4614
     * @deprecated <p>please use "UTF8::remove_bom()"</p>
4615
     */
4616
    public static function removeBOM(string $str): string
4617
    {
4618
        return self::remove_bom($str);
4619
    }
4620
4621
    /**
4622
     * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
4623
     *
4624
     * @param string $str <p>The input string.</p>
4625
     *
4626
     * @return string
4627
     *                <p>A string without UTF-BOM.</p>
4628
     */
4629
    public static function remove_bom(string $str): string
4630
    {
4631 55
        if ($str === '') {
4632 9
            return '';
4633
        }
4634
4635 55
        $str_length = \strlen($str);
4636 55
        foreach (self::$BOM as $bom_string => $bom_byte_length) {
4637 55
            if (\strpos($str, $bom_string, 0) === 0) {
4638
                /** @var false|string $str_tmp - needed for PhpStan (stubs error) */
4639 11
                $str_tmp = \substr($str, $bom_byte_length, $str_length);
4640 11
                if ($str_tmp === false) {
4641
                    return '';
4642
                }
4643
4644 11
                $str_length -= (int) $bom_byte_length;
4645
4646 55
                $str = (string) $str_tmp;
4647
            }
4648
        }
4649
4650 55
        return $str;
4651
    }
4652
4653
    /**
4654
     * Removes duplicate occurrences of a string in another string.
4655
     *
4656
     * @param string          $str  <p>The base string.</p>
4657
     * @param string|string[] $what <p>String to search for in the base string.</p>
4658
     *
4659
     * @return string
4660
     *                <p>A string with removed duplicates.</p>
4661
     */
4662
    public static function remove_duplicates(string $str, $what = ' '): string
4663
    {
4664 2
        if (\is_string($what) === true) {
4665 2
            $what = [$what];
4666
        }
4667
4668
        /**
4669
         * @psalm-suppress RedundantConditionGivenDocblockType
4670
         */
4671 2
        if (\is_array($what) === true) {
0 ignored issues
show
introduced by
The condition is_array($what) === true is always true.
Loading history...
4672 2
            foreach ($what as $item) {
4673 2
                $str = (string) \preg_replace('/(' . \preg_quote($item, '/') . ')+/u', $item, $str);
4674
            }
4675
        }
4676
4677 2
        return $str;
4678
    }
4679
4680
    /**
4681
     * Remove html via "strip_tags()" from the string.
4682
     *
4683
     * @param string $str            <p>The input string.</p>
4684
     * @param string $allowable_tags [optional] <p>You can use the optional second parameter to specify tags which should
4685
     *                               not be stripped. Default: null
4686
     *                               </p>
4687
     *
4688
     * @return string
4689
     *                <p>A string with without html tags.</p>
4690
     */
4691
    public static function remove_html(string $str, string $allowable_tags = ''): string
4692
    {
4693 6
        return \strip_tags($str, $allowable_tags);
4694
    }
4695
4696
    /**
4697
     * Remove all breaks [<br> | \r\n | \r | \n | ...] from the string.
4698
     *
4699
     * @param string $str         <p>The input string.</p>
4700
     * @param string $replacement [optional] <p>Default is a empty string.</p>
4701
     *
4702
     * @return string
4703
     *                <p>A string without breaks.</p>
4704
     */
4705
    public static function remove_html_breaks(string $str, string $replacement = ''): string
4706
    {
4707 6
        return (string) \preg_replace("#/\r\n|\r|\n|<br.*/?>#isU", $replacement, $str);
4708
    }
4709
4710
    /**
4711
     * Remove invisible characters from a string.
4712
     *
4713
     * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
4714
     *
4715
     * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
4716
     *
4717
     * @param string $str         <p>The input string.</p>
4718
     * @param bool   $url_encoded [optional] <p>
4719
     *                            Try to remove url encoded control character.
4720
     *                            WARNING: maybe contains false-positives e.g. aa%0Baa -> aaaa.
4721
     *                            <br>
4722
     *                            Default: false
4723
     *                            </p>
4724
     * @param string $replacement [optional] <p>The replacement character.</p>
4725
     *
4726
     * @return string
4727
     *                <p>A string without invisible chars.</p>
4728
     */
4729
    public static function remove_invisible_characters(
4730
        string $str,
4731
        bool $url_encoded = false,
4732
        string $replacement = ''
4733
    ): string {
4734 89
        return ASCII::remove_invisible_characters(
4735 89
            $str,
4736 89
            $url_encoded,
4737 89
            $replacement
4738
        );
4739
    }
4740
4741
    /**
4742
     * Returns a new string with the prefix $substring removed, if present.
4743
     *
4744
     * @param string $str       <p>The input string.</p>
4745
     * @param string $substring <p>The prefix to remove.</p>
4746
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
4747
     *
4748
     * @return string
4749
     *                <p>A string without the prefix $substring.</p>
4750
     */
4751
    public static function remove_left(
4752
        string $str,
4753
        string $substring,
4754
        string $encoding = 'UTF-8'
4755
    ): string {
4756 12
        if ($substring && \strpos($str, $substring) === 0) {
4757 6
            if ($encoding === 'UTF-8') {
4758 4
                return (string) \mb_substr(
4759 4
                    $str,
4760 4
                    (int) \mb_strlen($substring)
4761
                );
4762
            }
4763
4764 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4765
4766 2
            return (string) self::substr(
4767 2
                $str,
4768 2
                (int) self::strlen($substring, $encoding),
4769 2
                null,
4770 2
                $encoding
4771
            );
4772
        }
4773
4774 6
        return $str;
4775
    }
4776
4777
    /**
4778
     * Returns a new string with the suffix $substring removed, if present.
4779
     *
4780
     * @param string $str
4781
     * @param string $substring <p>The suffix to remove.</p>
4782
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
4783
     *
4784
     * @return string
4785
     *                <p>A string having a $str without the suffix $substring.</p>
4786
     */
4787
    public static function remove_right(
4788
        string $str,
4789
        string $substring,
4790
        string $encoding = 'UTF-8'
4791
    ): string {
4792 12
        if ($substring && \substr($str, -\strlen($substring)) === $substring) {
4793 6
            if ($encoding === 'UTF-8') {
4794 4
                return (string) \mb_substr(
4795 4
                    $str,
4796 4
                    0,
4797 4
                    (int) \mb_strlen($str) - (int) \mb_strlen($substring)
4798
                );
4799
            }
4800
4801 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4802
4803 2
            return (string) self::substr(
4804 2
                $str,
4805 2
                0,
4806 2
                (int) self::strlen($str, $encoding) - (int) self::strlen($substring, $encoding),
4807 2
                $encoding
4808
            );
4809
        }
4810
4811 6
        return $str;
4812
    }
4813
4814
    /**
4815
     * Replaces all occurrences of $search in $str by $replacement.
4816
     *
4817
     * @param string $str            <p>The input string.</p>
4818
     * @param string $search         <p>The needle to search for.</p>
4819
     * @param string $replacement    <p>The string to replace with.</p>
4820
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4821
     *
4822
     * @return string
4823
     *                <p>A string with replaced parts.</p>
4824
     */
4825
    public static function replace(
4826
        string $str,
4827
        string $search,
4828
        string $replacement,
4829
        bool $case_sensitive = true
4830
    ): string {
4831 29
        if ($case_sensitive) {
4832 22
            return \str_replace($search, $replacement, $str);
4833
        }
4834
4835 7
        return self::str_ireplace($search, $replacement, $str);
4836
    }
4837
4838
    /**
4839
     * Replaces all occurrences of $search in $str by $replacement.
4840
     *
4841
     * @param string       $str            <p>The input string.</p>
4842
     * @param array        $search         <p>The elements to search for.</p>
4843
     * @param array|string $replacement    <p>The string to replace with.</p>
4844
     * @param bool         $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4845
     *
4846
     * @return string
4847
     *                <p>A string with replaced parts.</p>
4848
     */
4849
    public static function replace_all(
4850
        string $str,
4851
        array $search,
4852
        $replacement,
4853
        bool $case_sensitive = true
4854
    ): string {
4855 30
        if ($case_sensitive) {
4856 23
            return \str_replace($search, $replacement, $str);
4857
        }
4858
4859 7
        return self::str_ireplace($search, $replacement, $str);
4860
    }
4861
4862
    /**
4863
     * Replace the diamond question mark (�) and invalid-UTF8 chars with the replacement.
4864
     *
4865
     * @param string $str                        <p>The input string</p>
4866
     * @param string $replacement_char           <p>The replacement character.</p>
4867
     * @param bool   $process_invalid_utf8_chars <p>Convert invalid UTF-8 chars </p>
4868
     *
4869
     * @return string
4870
     *                <p>A string without diamond question marks (�).</p>
4871
     */
4872
    public static function replace_diamond_question_mark(
4873
        string $str,
4874
        string $replacement_char = '',
4875
        bool $process_invalid_utf8_chars = true
4876
    ): string {
4877 35
        if ($str === '') {
4878 9
            return '';
4879
        }
4880
4881 35
        if ($process_invalid_utf8_chars === true) {
4882 35
            $replacement_char_helper = $replacement_char;
4883 35
            if ($replacement_char === '') {
4884 35
                $replacement_char_helper = 'none';
4885
            }
4886
4887 35
            if (self::$SUPPORT['mbstring'] === false) {
4888
                // if there is no native support for "mbstring",
4889
                // then we need to clean the string before ...
4890
                $str = self::clean($str);
4891
            }
4892
4893 35
            $save = \mb_substitute_character();
4894 35
            \mb_substitute_character($replacement_char_helper);
4895
            // the polyfill maybe return false, so cast to string
4896 35
            $str = (string) \mb_convert_encoding($str, 'UTF-8', 'UTF-8');
4897 35
            \mb_substitute_character($save);
4898
        }
4899
4900 35
        return \str_replace(
4901
            [
4902 35
                "\xEF\xBF\xBD",
4903
                '�',
4904
            ],
4905
            [
4906 35
                $replacement_char,
4907 35
                $replacement_char,
4908
            ],
4909 35
            $str
4910
        );
4911
    }
4912
4913
    /**
4914
     * Strip whitespace or other characters from the end of a UTF-8 string.
4915
     *
4916
     * @param string      $str   <p>The string to be trimmed.</p>
4917
     * @param string|null $chars <p>Optional characters to be stripped.</p>
4918
     *
4919
     * @return string
4920
     *                <p>A string with unwanted characters stripped from the right.</p>
4921
     */
4922
    public static function rtrim(string $str = '', string $chars = null): string
4923
    {
4924 20
        if ($str === '') {
4925 3
            return '';
4926
        }
4927
4928 19
        if (self::$SUPPORT['mbstring'] === true) {
4929 19
            if ($chars) {
4930
                /** @noinspection PregQuoteUsageInspection */
4931 8
                $chars = \preg_quote($chars);
4932 8
                $pattern = "[${chars}]+$";
4933
            } else {
4934 14
                $pattern = '[\\s]+$';
4935
            }
4936
4937
            /** @noinspection PhpComposerExtensionStubsInspection */
4938 19
            return (string) \mb_ereg_replace($pattern, '', $str);
4939
        }
4940
4941
        if ($chars) {
4942
            $chars = \preg_quote($chars, '/');
4943
            $pattern = "[${chars}]+$";
4944
        } else {
4945
            $pattern = '[\\s]+$';
4946
        }
4947
4948
        return self::regex_replace($str, $pattern, '', '', '/');
4949
    }
4950
4951
    /**
4952
     * WARNING: Print native UTF-8 support (libs), e.g. for debugging.
4953
     *
4954
     * @return void
4955
     */
4956
    public static function showSupport()
4957
    {
4958 2
        echo '<pre>';
4959 2
        foreach (self::$SUPPORT as $key => &$value) {
4960 2
            echo $key . ' - ' . \print_r($value, true) . "\n<br>";
4961
        }
4962 2
        unset($value);
4963 2
        echo '</pre>';
4964 2
    }
4965
4966
    /**
4967
     * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
4968
     *
4969
     * @param string $char             <p>The Unicode character to be encoded as numbered entity.</p>
4970
     * @param bool   $keep_ascii_chars <p>Set to <strong>true</strong> to keep ASCII chars.</>
4971
     * @param string $encoding         [optional] <p>Set the charset for e.g. "mb_" function</p>
4972
     *
4973
     * @return string
4974
     *                <p>The HTML numbered entity for the given character.</p>
4975
     */
4976
    public static function single_chr_html_encode(
4977
        string $char,
4978
        bool $keep_ascii_chars = false,
4979
        string $encoding = 'UTF-8'
4980
    ): string {
4981 2
        if ($char === '') {
4982 2
            return '';
4983
        }
4984
4985
        if (
4986 2
            $keep_ascii_chars === true
4987
            &&
4988 2
            ASCII::is_ascii($char) === true
4989
        ) {
4990 2
            return $char;
4991
        }
4992
4993 2
        return '&#' . self::ord($char, $encoding) . ';';
4994
    }
4995
4996
    /**
4997
     * @param string $str
4998
     * @param int    $tab_length
4999
     *
5000
     * @return string
5001
     */
5002
    public static function spaces_to_tabs(string $str, int $tab_length = 4): string
5003
    {
5004 5
        if ($tab_length === 4) {
5005 3
            $tab = '    ';
5006 2
        } elseif ($tab_length === 2) {
5007 1
            $tab = '  ';
5008
        } else {
5009 1
            $tab = \str_repeat(' ', $tab_length);
5010
        }
5011
5012 5
        return \str_replace($tab, "\t", $str);
5013
    }
5014
5015
    /**
5016
     * alias for "UTF8::str_split()"
5017
     *
5018
     * @param string|string[] $str
5019
     * @param int             $length
5020
     * @param bool            $clean_utf8
5021
     *
5022
     * @return string[]
5023
     *
5024
     * @see UTF8::str_split()
5025
     * @deprecated <p>please use "UTF8::str_split()"</p>
5026
     */
5027
    public static function split(
5028
        $str,
5029
        int $length = 1,
5030
        bool $clean_utf8 = false
5031
    ): array {
5032 9
        return self::str_split($str, $length, $clean_utf8);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::str_split($str, $length, $clean_utf8) returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
5033
    }
5034
5035
    /**
5036
     * alias for "UTF8::str_starts_with()"
5037
     *
5038
     * @param string $haystack
5039
     * @param string $needle
5040
     *
5041
     * @return bool
5042
     *
5043
     * @see UTF8::str_starts_with()
5044
     * @deprecated <p>please use "UTF8::str_starts_with()"</p>
5045
     */
5046
    public static function str_begins(string $haystack, string $needle): bool
5047
    {
5048
        return self::str_starts_with($haystack, $needle);
5049
    }
5050
5051
    /**
5052
     * Returns a camelCase version of the string. Trims surrounding spaces,
5053
     * capitalizes letters following digits, spaces, dashes and underscores,
5054
     * and removes spaces, dashes, as well as underscores.
5055
     *
5056
     * @param string      $str                           <p>The input string.</p>
5057
     * @param string      $encoding                      [optional] <p>Default: 'UTF-8'</p>
5058
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
5059
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt, tr</p>
5060
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
5061
     *
5062
     * @return string
5063
     */
5064
    public static function str_camelize(
5065
        string $str,
5066
        string $encoding = 'UTF-8',
5067
        bool $clean_utf8 = false,
5068
        string $lang = null,
5069
        bool $try_to_keep_the_string_length = false
5070
    ): string {
5071 32
        if ($clean_utf8 === true) {
5072
            $str = self::clean($str);
5073
        }
5074
5075 32
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
5076 26
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
5077
        }
5078
5079 32
        $str = self::lcfirst(
5080 32
            \trim($str),
5081 32
            $encoding,
5082 32
            false,
5083 32
            $lang,
5084 32
            $try_to_keep_the_string_length
5085
        );
5086 32
        $str = (string) \preg_replace('/^[-_]+/', '', $str);
5087
5088 32
        $use_mb_functions = $lang === null && $try_to_keep_the_string_length === false;
5089
5090 32
        $str = (string) \preg_replace_callback(
5091 32
            '/[-_\\s]+(.)?/u',
5092
            /**
5093
             * @param array $match
5094
             *
5095
             * @return string
5096
             */
5097
            static function (array $match) use ($use_mb_functions, $encoding, $lang, $try_to_keep_the_string_length): string {
5098 27
                if (isset($match[1])) {
5099 27
                    if ($use_mb_functions === true) {
5100 27
                        if ($encoding === 'UTF-8') {
5101 27
                            return \mb_strtoupper($match[1]);
5102
                        }
5103
5104
                        return \mb_strtoupper($match[1], $encoding);
5105
                    }
5106
5107
                    return self::strtoupper($match[1], $encoding, false, $lang, $try_to_keep_the_string_length);
5108
                }
5109
5110 1
                return '';
5111 32
            },
5112 32
            $str
5113
        );
5114
5115 32
        return (string) \preg_replace_callback(
5116 32
            '/[\\p{N}]+(.)?/u',
5117
            /**
5118
             * @param array $match
5119
             *
5120
             * @return string
5121
             */
5122
            static function (array $match) use ($use_mb_functions, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length): string {
5123 6
                if ($use_mb_functions === true) {
5124 6
                    if ($encoding === 'UTF-8') {
5125 6
                        return \mb_strtoupper($match[0]);
5126
                    }
5127
5128
                    return \mb_strtoupper($match[0], $encoding);
5129
                }
5130
5131
                return self::strtoupper($match[0], $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
5132 32
            },
5133 32
            $str
5134
        );
5135
    }
5136
5137
    /**
5138
     * Returns the string with the first letter of each word capitalized,
5139
     * except for when the word is a name which shouldn't be capitalized.
5140
     *
5141
     * @param string $str
5142
     *
5143
     * @return string
5144
     *                <p>A string with $str capitalized.</p>
5145
     */
5146
    public static function str_capitalize_name(string $str): string
5147
    {
5148 1
        return self::str_capitalize_name_helper(
5149 1
            self::str_capitalize_name_helper(
5150 1
                self::collapse_whitespace($str),
5151 1
                ' '
5152
            ),
5153 1
            '-'
5154
        );
5155
    }
5156
5157
    /**
5158
     * Returns true if the string contains $needle, false otherwise. By default
5159
     * the comparison is case-sensitive, but can be made insensitive by setting
5160
     * $case_sensitive to false.
5161
     *
5162
     * @param string $haystack       <p>The input string.</p>
5163
     * @param string $needle         <p>Substring to look for.</p>
5164
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5165
     *
5166
     * @return bool whether or not $haystack contains $needle
5167
     */
5168
    public static function str_contains(
5169
        string $haystack,
5170
        string $needle,
5171
        bool $case_sensitive = true
5172
    ): bool {
5173 21
        if ($case_sensitive) {
5174 11
            return \strpos($haystack, $needle) !== false;
5175
        }
5176
5177 10
        return \mb_stripos($haystack, $needle) !== false;
5178
    }
5179
5180
    /**
5181
     * Returns true if the string contains all $needles, false otherwise. By
5182
     * default the comparison is case-sensitive, but can be made insensitive by
5183
     * setting $case_sensitive to false.
5184
     *
5185
     * @param string $haystack       <p>The input string.</p>
5186
     * @param array  $needles        <p>SubStrings to look for.</p>
5187
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5188
     *
5189
     * @return bool whether or not $haystack contains $needle
5190
     */
5191
    public static function str_contains_all(
5192
        string $haystack,
5193
        array $needles,
5194
        bool $case_sensitive = true
5195
    ): bool {
5196 45
        if ($haystack === '' || $needles === []) {
5197 1
            return false;
5198
        }
5199
5200
        /** @noinspection LoopWhichDoesNotLoopInspection */
5201 44
        foreach ($needles as &$needle) {
5202 44
            if ($case_sensitive) {
5203
                /** @noinspection NestedPositiveIfStatementsInspection */
5204 24
                if (\strpos($haystack, $needle) === false) {
5205 12
                    return  false;
5206
                }
5207
            }
5208
5209 33
            if (\mb_stripos($haystack, $needle) === false) {
5210 33
                return  false;
5211
            }
5212
        }
5213
5214 24
        return true;
5215
    }
5216
5217
    /**
5218
     * Returns true if the string contains any $needles, false otherwise. By
5219
     * default the comparison is case-sensitive, but can be made insensitive by
5220
     * setting $case_sensitive to false.
5221
     *
5222
     * @param string $haystack       <p>The input string.</p>
5223
     * @param array  $needles        <p>SubStrings to look for.</p>
5224
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5225
     *
5226
     * @return bool
5227
     *              Whether or not $str contains $needle
5228
     */
5229
    public static function str_contains_any(
5230
        string $haystack,
5231
        array $needles,
5232
        bool $case_sensitive = true
5233
    ): bool {
5234 46
        if ($haystack === '' || $needles === []) {
5235 1
            return false;
5236
        }
5237
5238
        /** @noinspection LoopWhichDoesNotLoopInspection */
5239 45
        foreach ($needles as &$needle) {
5240 45
            if (!$needle) {
5241
                continue;
5242
            }
5243
5244 45
            if ($case_sensitive) {
5245 25
                if (\strpos($haystack, $needle) !== false) {
5246 14
                    return true;
5247
                }
5248
5249 13
                continue;
5250
            }
5251
5252 20
            if (\mb_stripos($haystack, $needle) !== false) {
5253 20
                return true;
5254
            }
5255
        }
5256
5257 19
        return false;
5258
    }
5259
5260
    /**
5261
     * Returns a lowercase and trimmed string separated by dashes. Dashes are
5262
     * inserted before uppercase characters (with the exception of the first
5263
     * character of the string), and in place of spaces as well as underscores.
5264
     *
5265
     * @param string $str      <p>The input string.</p>
5266
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5267
     *
5268
     * @return string
5269
     */
5270
    public static function str_dasherize(string $str, string $encoding = 'UTF-8'): string
5271
    {
5272 19
        return self::str_delimit($str, '-', $encoding);
5273
    }
5274
5275
    /**
5276
     * Returns a lowercase and trimmed string separated by the given delimiter.
5277
     * Delimiters are inserted before uppercase characters (with the exception
5278
     * of the first character of the string), and in place of spaces, dashes,
5279
     * and underscores. Alpha delimiters are not converted to lowercase.
5280
     *
5281
     * @param string      $str                           <p>The input string.</p>
5282
     * @param string      $delimiter                     <p>Sequence used to separate parts of the string.</p>
5283
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
5284
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
5285
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
5286
     *                                                   tr</p>
5287
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ ->
5288
     *                                                   ß</p>
5289
     *
5290
     * @return string
5291
     */
5292
    public static function str_delimit(
5293
        string $str,
5294
        string $delimiter,
5295
        string $encoding = 'UTF-8',
5296
        bool $clean_utf8 = false,
5297
        string $lang = null,
5298
        bool $try_to_keep_the_string_length = false
5299
    ): string {
5300 49
        if (self::$SUPPORT['mbstring'] === true) {
5301
            /** @noinspection PhpComposerExtensionStubsInspection */
5302 49
            $str = (string) \mb_ereg_replace('\\B(\\p{Lu})', '-\1', \trim($str));
5303
5304 49
            $use_mb_functions = $lang === null && $try_to_keep_the_string_length === false;
5305 49
            if ($use_mb_functions === true && $encoding === 'UTF-8') {
5306 22
                $str = \mb_strtolower($str);
5307
            } else {
5308 27
                $str = self::strtolower($str, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
5309
            }
5310
5311
            /** @noinspection PhpComposerExtensionStubsInspection */
5312 49
            return (string) \mb_ereg_replace('[\\-_\\s]+', $delimiter, $str);
5313
        }
5314
5315
        $str = (string) \preg_replace('/\\B(\\p{Lu})/u', '-\1', \trim($str));
5316
5317
        $use_mb_functions = $lang === null && $try_to_keep_the_string_length === false;
5318
        if ($use_mb_functions === true && $encoding === 'UTF-8') {
5319
            $str = \mb_strtolower($str);
5320
        } else {
5321
            $str = self::strtolower($str, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
5322
        }
5323
5324
        return (string) \preg_replace('/[\\-_\\s]+/u', $delimiter, $str);
5325
    }
5326
5327
    /**
5328
     * Optimized "mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
5329
     *
5330
     * @param string $str <p>The input string.</p>
5331
     *
5332
     * @return false|string
5333
     *                      The detected string-encoding e.g. UTF-8 or UTF-16BE,<br>
5334
     *                      otherwise it will return false e.g. for BINARY or not detected encoding.
5335
     */
5336
    public static function str_detect_encoding($str)
5337
    {
5338
        // init
5339 30
        $str = (string) $str;
5340
5341
        //
5342
        // 1.) check binary strings (010001001...) like UTF-16 / UTF-32 / PDF / Images / ...
5343
        //
5344
5345 30
        if (self::is_binary($str, true) === true) {
5346 11
            $is_utf32 = self::is_utf32($str, false);
5347 11
            if ($is_utf32 === 1) {
5348
                return 'UTF-32LE';
5349
            }
5350 11
            if ($is_utf32 === 2) {
5351 1
                return 'UTF-32BE';
5352
            }
5353
5354 11
            $is_utf16 = self::is_utf16($str, false);
5355 11
            if ($is_utf16 === 1) {
5356 3
                return 'UTF-16LE';
5357
            }
5358 11
            if ($is_utf16 === 2) {
5359 2
                return 'UTF-16BE';
5360
            }
5361
5362
            // is binary but not "UTF-16" or "UTF-32"
5363 9
            return false;
5364
        }
5365
5366
        //
5367
        // 2.) simple check for ASCII chars
5368
        //
5369
5370 26
        if (ASCII::is_ascii($str) === true) {
5371 10
            return 'ASCII';
5372
        }
5373
5374
        //
5375
        // 3.) simple check for UTF-8 chars
5376
        //
5377
5378 26
        if (self::is_utf8_string($str) === true) {
5379 19
            return 'UTF-8';
5380
        }
5381
5382
        //
5383
        // 4.) check via "mb_detect_encoding()"
5384
        //
5385
        // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "mb_detect_encoding()"
5386
5387
        $encoding_detecting_order = [
5388 15
            'ISO-8859-1',
5389
            'ISO-8859-2',
5390
            'ISO-8859-3',
5391
            'ISO-8859-4',
5392
            'ISO-8859-5',
5393
            'ISO-8859-6',
5394
            'ISO-8859-7',
5395
            'ISO-8859-8',
5396
            'ISO-8859-9',
5397
            'ISO-8859-10',
5398
            'ISO-8859-13',
5399
            'ISO-8859-14',
5400
            'ISO-8859-15',
5401
            'ISO-8859-16',
5402
            'WINDOWS-1251',
5403
            'WINDOWS-1252',
5404
            'WINDOWS-1254',
5405
            'CP932',
5406
            'CP936',
5407
            'CP950',
5408
            'CP866',
5409
            'CP850',
5410
            'CP51932',
5411
            'CP50220',
5412
            'CP50221',
5413
            'CP50222',
5414
            'ISO-2022-JP',
5415
            'ISO-2022-KR',
5416
            'JIS',
5417
            'JIS-ms',
5418
            'EUC-CN',
5419
            'EUC-JP',
5420
        ];
5421
5422 15
        if (self::$SUPPORT['mbstring'] === true) {
5423
            // info: do not use the symfony polyfill here
5424 15
            $encoding = \mb_detect_encoding($str, $encoding_detecting_order, true);
5425 15
            if ($encoding) {
5426 15
                return $encoding;
5427
            }
5428
        }
5429
5430
        //
5431
        // 5.) check via "iconv()"
5432
        //
5433
5434
        if (self::$ENCODINGS === null) {
5435
            self::$ENCODINGS = self::getData('encodings');
5436
        }
5437
5438
        foreach (self::$ENCODINGS as $encoding_tmp) {
5439
            // INFO: //IGNORE but still throw notice
5440
            /** @noinspection PhpUsageOfSilenceOperatorInspection */
5441
            if ((string) @\iconv($encoding_tmp, $encoding_tmp . '//IGNORE', $str) === $str) {
5442
                return $encoding_tmp;
5443
            }
5444
        }
5445
5446
        return false;
5447
    }
5448
5449
    /**
5450
     * alias for "UTF8::str_ends_with()"
5451
     *
5452
     * @param string $haystack
5453
     * @param string $needle
5454
     *
5455
     * @return bool
5456
     *
5457
     * @see UTF8::str_ends_with()
5458
     * @deprecated <p>please use "UTF8::str_ends_with()"</p>
5459
     */
5460
    public static function str_ends(string $haystack, string $needle): bool
5461
    {
5462
        return self::str_ends_with($haystack, $needle);
5463
    }
5464
5465
    /**
5466
     * Check if the string ends with the given substring.
5467
     *
5468
     * @param string $haystack <p>The string to search in.</p>
5469
     * @param string $needle   <p>The substring to search for.</p>
5470
     *
5471
     * @return bool
5472
     */
5473
    public static function str_ends_with(string $haystack, string $needle): bool
5474
    {
5475 9
        if ($needle === '') {
5476 2
            return true;
5477
        }
5478
5479 9
        if ($haystack === '') {
5480
            return false;
5481
        }
5482
5483 9
        return \substr($haystack, -\strlen($needle)) === $needle;
5484
    }
5485
5486
    /**
5487
     * Returns true if the string ends with any of $substrings, false otherwise.
5488
     *
5489
     * - case-sensitive
5490
     *
5491
     * @param string   $str        <p>The input string.</p>
5492
     * @param string[] $substrings <p>Substrings to look for.</p>
5493
     *
5494
     * @return bool whether or not $str ends with $substring
5495
     */
5496
    public static function str_ends_with_any(string $str, array $substrings): bool
5497
    {
5498 7
        if ($substrings === []) {
5499
            return false;
5500
        }
5501
5502 7
        foreach ($substrings as &$substring) {
5503 7
            if (\substr($str, -\strlen($substring)) === $substring) {
5504 7
                return true;
5505
            }
5506
        }
5507
5508 6
        return false;
5509
    }
5510
5511
    /**
5512
     * Ensures that the string begins with $substring. If it doesn't, it's
5513
     * prepended.
5514
     *
5515
     * @param string $str       <p>The input string.</p>
5516
     * @param string $substring <p>The substring to add if not present.</p>
5517
     *
5518
     * @return string
5519
     */
5520
    public static function str_ensure_left(string $str, string $substring): string
5521
    {
5522
        if (
5523 10
            $substring !== ''
5524
            &&
5525 10
            \strpos($str, $substring) === 0
5526
        ) {
5527 6
            return $str;
5528
        }
5529
5530 4
        return $substring . $str;
5531
    }
5532
5533
    /**
5534
     * Ensures that the string ends with $substring. If it doesn't, it's appended.
5535
     *
5536
     * @param string $str       <p>The input string.</p>
5537
     * @param string $substring <p>The substring to add if not present.</p>
5538
     *
5539
     * @return string
5540
     */
5541
    public static function str_ensure_right(string $str, string $substring): string
5542
    {
5543
        if (
5544 10
            $str === ''
5545
            ||
5546 10
            $substring === ''
5547
            ||
5548 10
            \substr($str, -\strlen($substring)) !== $substring
5549
        ) {
5550 4
            $str .= $substring;
5551
        }
5552
5553 10
        return $str;
5554
    }
5555
5556
    /**
5557
     * Capitalizes the first word of the string, replaces underscores with
5558
     * spaces, and strips '_id'.
5559
     *
5560
     * @param string $str
5561
     *
5562
     * @return string
5563
     */
5564
    public static function str_humanize($str): string
5565
    {
5566 3
        $str = \str_replace(
5567
            [
5568 3
                '_id',
5569
                '_',
5570
            ],
5571
            [
5572 3
                '',
5573
                ' ',
5574
            ],
5575 3
            $str
5576
        );
5577
5578 3
        return self::ucfirst(\trim($str));
5579
    }
5580
5581
    /**
5582
     * alias for "UTF8::str_istarts_with()"
5583
     *
5584
     * @param string $haystack
5585
     * @param string $needle
5586
     *
5587
     * @return bool
5588
     *
5589
     * @see UTF8::str_istarts_with()
5590
     * @deprecated <p>please use "UTF8::str_istarts_with()"</p>
5591
     */
5592
    public static function str_ibegins(string $haystack, string $needle): bool
5593
    {
5594
        return self::str_istarts_with($haystack, $needle);
5595
    }
5596
5597
    /**
5598
     * alias for "UTF8::str_iends_with()"
5599
     *
5600
     * @param string $haystack
5601
     * @param string $needle
5602
     *
5603
     * @return bool
5604
     *
5605
     * @see UTF8::str_iends_with()
5606
     * @deprecated <p>please use "UTF8::str_iends_with()"</p>
5607
     */
5608
    public static function str_iends(string $haystack, string $needle): bool
5609
    {
5610
        return self::str_iends_with($haystack, $needle);
5611
    }
5612
5613
    /**
5614
     * Check if the string ends with the given substring, case-insensitive.
5615
     *
5616
     * @param string $haystack <p>The string to search in.</p>
5617
     * @param string $needle   <p>The substring to search for.</p>
5618
     *
5619
     * @return bool
5620
     */
5621
    public static function str_iends_with(string $haystack, string $needle): bool
5622
    {
5623 12
        if ($needle === '') {
5624 2
            return true;
5625
        }
5626
5627 12
        if ($haystack === '') {
5628
            return false;
5629
        }
5630
5631 12
        return self::strcasecmp(\substr($haystack, -\strlen($needle)), $needle) === 0;
5632
    }
5633
5634
    /**
5635
     * Returns true if the string ends with any of $substrings, false otherwise.
5636
     *
5637
     * - case-insensitive
5638
     *
5639
     * @param string   $str        <p>The input string.</p>
5640
     * @param string[] $substrings <p>Substrings to look for.</p>
5641
     *
5642
     * @return bool
5643
     *              <p>Whether or not $str ends with $substring.</p>
5644
     */
5645
    public static function str_iends_with_any(string $str, array $substrings): bool
5646
    {
5647 4
        if ($substrings === []) {
5648
            return false;
5649
        }
5650
5651 4
        foreach ($substrings as &$substring) {
5652 4
            if (self::str_iends_with($str, $substring)) {
5653 4
                return true;
5654
            }
5655
        }
5656
5657
        return false;
5658
    }
5659
5660
    /**
5661
     * Returns the index of the first occurrence of $needle in the string,
5662
     * and false if not found. Accepts an optional offset from which to begin
5663
     * the search.
5664
     *
5665
     * @param string $str      <p>The input string.</p>
5666
     * @param string $needle   <p>Substring to look for.</p>
5667
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5668
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5669
     *
5670
     * @return false|int
5671
     *                   <p>The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.</p>
5672
     *
5673
     * @see UTF8::stripos()
5674
     * @deprecated <p>please use "UTF8::stripos()"</p>
5675
     */
5676
    public static function str_iindex_first(
5677
        string $str,
5678
        string $needle,
5679
        int $offset = 0,
5680
        string $encoding = 'UTF-8'
5681
    ) {
5682
        return self::stripos(
5683
            $str,
5684
            $needle,
5685
            $offset,
5686
            $encoding
5687
        );
5688
    }
5689
5690
    /**
5691
     * Returns the index of the last occurrence of $needle in the string,
5692
     * and false if not found. Accepts an optional offset from which to begin
5693
     * the search. Offsets may be negative to count from the last character
5694
     * in the string.
5695
     *
5696
     * @param string $str      <p>The input string.</p>
5697
     * @param string $needle   <p>Substring to look for.</p>
5698
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5699
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5700
     *
5701
     * @return false|int
5702
     *                   <p>The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.</p>
5703
     *
5704
     * @see UTF8::strripos()
5705
     * @deprecated <p>please use "UTF8::strripos()"</p>
5706
     */
5707
    public static function str_iindex_last(
5708
        string $str,
5709
        string $needle,
5710
        int $offset = 0,
5711
        string $encoding = 'UTF-8'
5712
    ) {
5713
        return self::strripos(
5714
            $str,
5715
            $needle,
5716
            $offset,
5717
            $encoding
5718
        );
5719
    }
5720
5721
    /**
5722
     * Returns the index of the first occurrence of $needle in the string,
5723
     * and false if not found. Accepts an optional offset from which to begin
5724
     * the search.
5725
     *
5726
     * @param string $str      <p>The input string.</p>
5727
     * @param string $needle   <p>Substring to look for.</p>
5728
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5729
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5730
     *
5731
     * @return false|int
5732
     *                   <p>The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.</p>
5733
     *
5734
     * @see UTF8::strpos()
5735
     * @deprecated <p>please use "UTF8::strpos()"</p>
5736
     */
5737
    public static function str_index_first(
5738
        string $str,
5739
        string $needle,
5740
        int $offset = 0,
5741
        string $encoding = 'UTF-8'
5742
    ) {
5743 10
        return self::strpos(
5744 10
            $str,
5745 10
            $needle,
5746 10
            $offset,
5747 10
            $encoding
5748
        );
5749
    }
5750
5751
    /**
5752
     * Returns the index of the last occurrence of $needle in the string,
5753
     * and false if not found. Accepts an optional offset from which to begin
5754
     * the search. Offsets may be negative to count from the last character
5755
     * in the string.
5756
     *
5757
     * @param string $str      <p>The input string.</p>
5758
     * @param string $needle   <p>Substring to look for.</p>
5759
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5760
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5761
     *
5762
     * @return false|int
5763
     *                   <p>The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.</p>
5764
     *
5765
     * @see UTF8::strrpos()
5766
     * @deprecated <p>please use "UTF8::strrpos()"</p>
5767
     */
5768
    public static function str_index_last(
5769
        string $str,
5770
        string $needle,
5771
        int $offset = 0,
5772
        string $encoding = 'UTF-8'
5773
    ) {
5774 10
        return self::strrpos(
5775 10
            $str,
5776 10
            $needle,
5777 10
            $offset,
5778 10
            $encoding
5779
        );
5780
    }
5781
5782
    /**
5783
     * Inserts $substring into the string at the $index provided.
5784
     *
5785
     * @param string $str       <p>The input string.</p>
5786
     * @param string $substring <p>String to be inserted.</p>
5787
     * @param int    $index     <p>The index at which to insert the substring.</p>
5788
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
5789
     *
5790
     * @return string
5791
     */
5792
    public static function str_insert(
5793
        string $str,
5794
        string $substring,
5795
        int $index,
5796
        string $encoding = 'UTF-8'
5797
    ): string {
5798 8
        if ($encoding === 'UTF-8') {
5799 4
            $len = (int) \mb_strlen($str);
5800 4
            if ($index > $len) {
5801
                return $str;
5802
            }
5803
5804
            /** @noinspection UnnecessaryCastingInspection */
5805 4
            return (string) \mb_substr($str, 0, $index) .
5806 4
                   $substring .
5807 4
                   (string) \mb_substr($str, $index, $len);
5808
        }
5809
5810 4
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
5811
5812 4
        $len = (int) self::strlen($str, $encoding);
5813 4
        if ($index > $len) {
5814 1
            return $str;
5815
        }
5816
5817 3
        return ((string) self::substr($str, 0, $index, $encoding)) .
5818 3
               $substring .
5819 3
               ((string) self::substr($str, $index, $len, $encoding));
5820
    }
5821
5822
    /**
5823
     * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
5824
     *
5825
     * @see http://php.net/manual/en/function.str-ireplace.php
5826
     *
5827
     * @param mixed $search  <p>
5828
     *                       Every replacement with search array is
5829
     *                       performed on the result of previous replacement.
5830
     *                       </p>
5831
     * @param mixed $replace <p>
5832
     *                       </p>
5833
     * @param mixed $subject <p>
5834
     *                       If subject is an array, then the search and
5835
     *                       replace is performed with every entry of
5836
     *                       subject, and the return value is an array as
5837
     *                       well.
5838
     *                       </p>
5839
     * @param int   $count   [optional] <p>
5840
     *                       The number of matched and replaced needles will
5841
     *                       be returned in count which is passed by
5842
     *                       reference.
5843
     *                       </p>
5844
     *
5845
     * @return mixed a string or an array of replacements
5846
     */
5847
    public static function str_ireplace($search, $replace, $subject, &$count = null)
5848
    {
5849 29
        $search = (array) $search;
5850
5851
        /** @noinspection AlterInForeachInspection */
5852 29
        foreach ($search as &$s) {
5853 29
            $s = (string) $s;
5854 29
            if ($s === '') {
5855 6
                $s = '/^(?<=.)$/';
5856
            } else {
5857 29
                $s = '/' . \preg_quote($s, '/') . '/ui';
5858
            }
5859
        }
5860
5861 29
        $subject = \preg_replace($search, $replace, $subject, -1, $replace);
5862 29
        $count = $replace; // used as reference parameter
5863
5864 29
        return $subject;
5865
    }
5866
5867
    /**
5868
     * Replaces $search from the beginning of string with $replacement.
5869
     *
5870
     * @param string $str         <p>The input string.</p>
5871
     * @param string $search      <p>The string to search for.</p>
5872
     * @param string $replacement <p>The replacement.</p>
5873
     *
5874
     * @return string string after the replacements
5875
     */
5876
    public static function str_ireplace_beginning(string $str, string $search, string $replacement): string
5877
    {
5878 17
        if ($str === '') {
5879 4
            if ($replacement === '') {
5880 2
                return '';
5881
            }
5882
5883 2
            if ($search === '') {
5884 2
                return $replacement;
5885
            }
5886
        }
5887
5888 13
        if ($search === '') {
5889 2
            return $str . $replacement;
5890
        }
5891
5892 11
        if (\stripos($str, $search) === 0) {
5893 10
            return $replacement . \substr($str, \strlen($search));
5894
        }
5895
5896 1
        return $str;
5897
    }
5898
5899
    /**
5900
     * Replaces $search from the ending of string with $replacement.
5901
     *
5902
     * @param string $str         <p>The input string.</p>
5903
     * @param string $search      <p>The string to search for.</p>
5904
     * @param string $replacement <p>The replacement.</p>
5905
     *
5906
     * @return string
5907
     *                <p>string after the replacements.</p>
5908
     */
5909
    public static function str_ireplace_ending(string $str, string $search, string $replacement): string
5910
    {
5911 17
        if ($str === '') {
5912 4
            if ($replacement === '') {
5913 2
                return '';
5914
            }
5915
5916 2
            if ($search === '') {
5917 2
                return $replacement;
5918
            }
5919
        }
5920
5921 13
        if ($search === '') {
5922 2
            return $str . $replacement;
5923
        }
5924
5925 11
        if (\stripos($str, $search, \strlen($str) - \strlen($search)) !== false) {
5926 9
            $str = \substr($str, 0, -\strlen($search)) . $replacement;
5927
        }
5928
5929 11
        return $str;
5930
    }
5931
5932
    /**
5933
     * Check if the string starts with the given substring, case-insensitive.
5934
     *
5935
     * @param string $haystack <p>The string to search in.</p>
5936
     * @param string $needle   <p>The substring to search for.</p>
5937
     *
5938
     * @return bool
5939
     */
5940
    public static function str_istarts_with(string $haystack, string $needle): bool
5941
    {
5942 12
        if ($needle === '') {
5943 2
            return true;
5944
        }
5945
5946 12
        if ($haystack === '') {
5947
            return false;
5948
        }
5949
5950 12
        return self::stripos($haystack, $needle) === 0;
5951
    }
5952
5953
    /**
5954
     * Returns true if the string begins with any of $substrings, false otherwise.
5955
     *
5956
     * - case-insensitive
5957
     *
5958
     * @param string $str        <p>The input string.</p>
5959
     * @param array  $substrings <p>Substrings to look for.</p>
5960
     *
5961
     * @return bool whether or not $str starts with $substring
5962
     */
5963
    public static function str_istarts_with_any(string $str, array $substrings): bool
5964
    {
5965 4
        if ($str === '') {
5966
            return false;
5967
        }
5968
5969 4
        if ($substrings === []) {
5970
            return false;
5971
        }
5972
5973 4
        foreach ($substrings as &$substring) {
5974 4
            if (self::str_istarts_with($str, $substring)) {
5975 4
                return true;
5976
            }
5977
        }
5978
5979
        return false;
5980
    }
5981
5982
    /**
5983
     * Gets the substring after the first occurrence of a separator.
5984
     *
5985
     * @param string $str       <p>The input string.</p>
5986
     * @param string $separator <p>The string separator.</p>
5987
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
5988
     *
5989
     * @return string
5990
     */
5991
    public static function str_isubstr_after_first_separator(
5992
        string $str,
5993
        string $separator,
5994
        string $encoding = 'UTF-8'
5995
    ): string {
5996 1
        if ($separator === '' || $str === '') {
5997 1
            return '';
5998
        }
5999
6000 1
        $offset = self::stripos($str, $separator);
6001 1
        if ($offset === false) {
6002 1
            return '';
6003
        }
6004
6005 1
        if ($encoding === 'UTF-8') {
6006 1
            return (string) \mb_substr(
6007 1
                $str,
6008 1
                $offset + (int) \mb_strlen($separator)
6009
            );
6010
        }
6011
6012
        return (string) self::substr(
6013
            $str,
6014
            $offset + (int) self::strlen($separator, $encoding),
6015
            null,
6016
            $encoding
6017
        );
6018
    }
6019
6020
    /**
6021
     * Gets the substring after the last occurrence of a separator.
6022
     *
6023
     * @param string $str       <p>The input string.</p>
6024
     * @param string $separator <p>The string separator.</p>
6025
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6026
     *
6027
     * @return string
6028
     */
6029
    public static function str_isubstr_after_last_separator(
6030
        string $str,
6031
        string $separator,
6032
        string $encoding = 'UTF-8'
6033
    ): string {
6034 1
        if ($separator === '' || $str === '') {
6035 1
            return '';
6036
        }
6037
6038 1
        $offset = self::strripos($str, $separator);
6039 1
        if ($offset === false) {
6040 1
            return '';
6041
        }
6042
6043 1
        if ($encoding === 'UTF-8') {
6044 1
            return (string) \mb_substr(
6045 1
                $str,
6046 1
                $offset + (int) self::strlen($separator)
6047
            );
6048
        }
6049
6050
        return (string) self::substr(
6051
            $str,
6052
            $offset + (int) self::strlen($separator, $encoding),
6053
            null,
6054
            $encoding
6055
        );
6056
    }
6057
6058
    /**
6059
     * Gets the substring before the first occurrence of a separator.
6060
     *
6061
     * @param string $str       <p>The input string.</p>
6062
     * @param string $separator <p>The string separator.</p>
6063
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6064
     *
6065
     * @return string
6066
     */
6067
    public static function str_isubstr_before_first_separator(
6068
        string $str,
6069
        string $separator,
6070
        string $encoding = 'UTF-8'
6071
    ): string {
6072 1
        if ($separator === '' || $str === '') {
6073 1
            return '';
6074
        }
6075
6076 1
        $offset = self::stripos($str, $separator);
6077 1
        if ($offset === false) {
6078 1
            return '';
6079
        }
6080
6081 1
        if ($encoding === 'UTF-8') {
6082 1
            return (string) \mb_substr($str, 0, $offset);
6083
        }
6084
6085
        return (string) self::substr($str, 0, $offset, $encoding);
6086
    }
6087
6088
    /**
6089
     * Gets the substring before the last occurrence of a separator.
6090
     *
6091
     * @param string $str       <p>The input string.</p>
6092
     * @param string $separator <p>The string separator.</p>
6093
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6094
     *
6095
     * @return string
6096
     */
6097
    public static function str_isubstr_before_last_separator(
6098
        string $str,
6099
        string $separator,
6100
        string $encoding = 'UTF-8'
6101
    ): string {
6102 1
        if ($separator === '' || $str === '') {
6103 1
            return '';
6104
        }
6105
6106 1
        if ($encoding === 'UTF-8') {
6107 1
            $offset = \mb_strripos($str, $separator);
6108 1
            if ($offset === false) {
6109 1
                return '';
6110
            }
6111
6112 1
            return (string) \mb_substr($str, 0, $offset);
6113
        }
6114
6115
        $offset = self::strripos($str, $separator, 0, $encoding);
6116
        if ($offset === false) {
6117
            return '';
6118
        }
6119
6120
        return (string) self::substr($str, 0, $offset, $encoding);
6121
    }
6122
6123
    /**
6124
     * Gets the substring after (or before via "$before_needle") the first occurrence of the "$needle".
6125
     *
6126
     * @param string $str           <p>The input string.</p>
6127
     * @param string $needle        <p>The string to look for.</p>
6128
     * @param bool   $before_needle [optional] <p>Default: false</p>
6129
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
6130
     *
6131
     * @return string
6132
     */
6133
    public static function str_isubstr_first(
6134
        string $str,
6135
        string $needle,
6136
        bool $before_needle = false,
6137
        string $encoding = 'UTF-8'
6138
    ): string {
6139
        if (
6140 2
            $needle === ''
6141
            ||
6142 2
            $str === ''
6143
        ) {
6144 2
            return '';
6145
        }
6146
6147 2
        $part = self::stristr(
6148 2
            $str,
6149 2
            $needle,
6150 2
            $before_needle,
6151 2
            $encoding
6152
        );
6153 2
        if ($part === false) {
6154 2
            return '';
6155
        }
6156
6157 2
        return $part;
6158
    }
6159
6160
    /**
6161
     * Gets the substring after (or before via "$before_needle") the last occurrence of the "$needle".
6162
     *
6163
     * @param string $str           <p>The input string.</p>
6164
     * @param string $needle        <p>The string to look for.</p>
6165
     * @param bool   $before_needle [optional] <p>Default: false</p>
6166
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
6167
     *
6168
     * @return string
6169
     */
6170
    public static function str_isubstr_last(
6171
        string $str,
6172
        string $needle,
6173
        bool $before_needle = false,
6174
        string $encoding = 'UTF-8'
6175
    ): string {
6176
        if (
6177 1
            $needle === ''
6178
            ||
6179 1
            $str === ''
6180
        ) {
6181 1
            return '';
6182
        }
6183
6184 1
        $part = self::strrichr(
6185 1
            $str,
6186 1
            $needle,
6187 1
            $before_needle,
6188 1
            $encoding
6189
        );
6190 1
        if ($part === false) {
6191 1
            return '';
6192
        }
6193
6194 1
        return $part;
6195
    }
6196
6197
    /**
6198
     * Returns the last $n characters of the string.
6199
     *
6200
     * @param string $str      <p>The input string.</p>
6201
     * @param int    $n        <p>Number of characters to retrieve from the end.</p>
6202
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6203
     *
6204
     * @return string
6205
     */
6206
    public static function str_last_char(
6207
        string $str,
6208
        int $n = 1,
6209
        string $encoding = 'UTF-8'
6210
    ): string {
6211 12
        if ($str === '' || $n <= 0) {
6212 4
            return '';
6213
        }
6214
6215 8
        if ($encoding === 'UTF-8') {
6216 4
            return (string) \mb_substr($str, -$n);
6217
        }
6218
6219 4
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6220
6221 4
        return (string) self::substr($str, -$n, null, $encoding);
6222
    }
6223
6224
    /**
6225
     * Limit the number of characters in a string.
6226
     *
6227
     * @param string $str        <p>The input string.</p>
6228
     * @param int    $length     [optional] <p>Default: 100</p>
6229
     * @param string $str_add_on [optional] <p>Default: …</p>
6230
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
6231
     *
6232
     * @return string
6233
     */
6234
    public static function str_limit(
6235
        string $str,
6236
        int $length = 100,
6237
        string $str_add_on = '…',
6238
        string $encoding = 'UTF-8'
6239
    ): string {
6240 2
        if ($str === '' || $length <= 0) {
6241 2
            return '';
6242
        }
6243
6244 2
        if ($encoding === 'UTF-8') {
6245 2
            if ((int) \mb_strlen($str) <= $length) {
6246 2
                return $str;
6247
            }
6248
6249
            /** @noinspection UnnecessaryCastingInspection */
6250 2
            return (string) \mb_substr($str, 0, $length - (int) self::strlen($str_add_on)) . $str_add_on;
6251
        }
6252
6253
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6254
6255
        if ((int) self::strlen($str, $encoding) <= $length) {
6256
            return $str;
6257
        }
6258
6259
        return ((string) self::substr($str, 0, $length - (int) self::strlen($str_add_on), $encoding)) . $str_add_on;
6260
    }
6261
6262
    /**
6263
     * Limit the number of characters in a string, but also after the next word.
6264
     *
6265
     * @param string $str        <p>The input string.</p>
6266
     * @param int    $length     [optional] <p>Default: 100</p>
6267
     * @param string $str_add_on [optional] <p>Default: …</p>
6268
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
6269
     *
6270
     * @return string
6271
     */
6272
    public static function str_limit_after_word(
6273
        string $str,
6274
        int $length = 100,
6275
        string $str_add_on = '…',
6276
        string $encoding = 'UTF-8'
6277
    ): string {
6278 6
        if ($str === '' || $length <= 0) {
6279 2
            return '';
6280
        }
6281
6282 6
        if ($encoding === 'UTF-8') {
6283
            /** @noinspection UnnecessaryCastingInspection */
6284 2
            if ((int) \mb_strlen($str) <= $length) {
6285 2
                return $str;
6286
            }
6287
6288 2
            if (\mb_substr($str, $length - 1, 1) === ' ') {
6289 2
                return ((string) \mb_substr($str, 0, $length - 1)) . $str_add_on;
6290
            }
6291
6292 2
            $str = \mb_substr($str, 0, $length);
6293
6294 2
            $array = \explode(' ', $str);
6295 2
            \array_pop($array);
6296 2
            $new_str = \implode(' ', $array);
6297
6298 2
            if ($new_str === '') {
6299 2
                return ((string) \mb_substr($str, 0, $length - 1)) . $str_add_on;
6300
            }
6301
        } else {
6302 4
            if ((int) self::strlen($str, $encoding) <= $length) {
6303
                return $str;
6304
            }
6305
6306 4
            if (self::substr($str, $length - 1, 1, $encoding) === ' ') {
6307 3
                return ((string) self::substr($str, 0, $length - 1, $encoding)) . $str_add_on;
6308
            }
6309
6310
            /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
6311 1
            $str = self::substr($str, 0, $length, $encoding);
6312
            /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
6313 1
            if ($str === false) {
6314
                return '' . $str_add_on;
6315
            }
6316
6317 1
            $array = \explode(' ', $str);
6318 1
            \array_pop($array);
6319 1
            $new_str = \implode(' ', $array);
6320
6321 1
            if ($new_str === '') {
6322
                return ((string) self::substr($str, 0, $length - 1, $encoding)) . $str_add_on;
6323
            }
6324
        }
6325
6326 3
        return $new_str . $str_add_on;
6327
    }
6328
6329
    /**
6330
     * Returns the longest common prefix between the $str1 and $str2.
6331
     *
6332
     * @param string $str1     <p>The input sting.</p>
6333
     * @param string $str2     <p>Second string for comparison.</p>
6334
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6335
     *
6336
     * @return string
6337
     */
6338
    public static function str_longest_common_prefix(
6339
        string $str1,
6340
        string $str2,
6341
        string $encoding = 'UTF-8'
6342
    ): string {
6343
        // init
6344 10
        $longest_common_prefix = '';
6345
6346 10
        if ($encoding === 'UTF-8') {
6347 5
            $max_length = (int) \min(
6348 5
                \mb_strlen($str1),
6349 5
                \mb_strlen($str2)
6350
            );
6351
6352 5
            for ($i = 0; $i < $max_length; ++$i) {
6353 4
                $char = \mb_substr($str1, $i, 1);
6354
6355
                if (
6356 4
                    $char !== false
6357
                    &&
6358 4
                    $char === \mb_substr($str2, $i, 1)
6359
                ) {
6360 3
                    $longest_common_prefix .= $char;
6361
                } else {
6362 3
                    break;
6363
                }
6364
            }
6365
        } else {
6366 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
6367
6368 5
            $max_length = (int) \min(
6369 5
                self::strlen($str1, $encoding),
6370 5
                self::strlen($str2, $encoding)
6371
            );
6372
6373 5
            for ($i = 0; $i < $max_length; ++$i) {
6374 4
                $char = self::substr($str1, $i, 1, $encoding);
6375
6376
                if (
6377 4
                    $char !== false
6378
                    &&
6379 4
                    $char === self::substr($str2, $i, 1, $encoding)
6380
                ) {
6381 3
                    $longest_common_prefix .= $char;
6382
                } else {
6383 3
                    break;
6384
                }
6385
            }
6386
        }
6387
6388 10
        return $longest_common_prefix;
6389
    }
6390
6391
    /**
6392
     * Returns the longest common substring between the $str1 and $str2.
6393
     * In the case of ties, it returns that which occurs first.
6394
     *
6395
     * @param string $str1
6396
     * @param string $str2     <p>Second string for comparison.</p>
6397
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6398
     *
6399
     * @return string
6400
     *                <p>A string with its $str being the longest common substring.</p>
6401
     */
6402
    public static function str_longest_common_substring(
6403
        string $str1,
6404
        string $str2,
6405
        string $encoding = 'UTF-8'
6406
    ): string {
6407 11
        if ($str1 === '' || $str2 === '') {
6408 2
            return '';
6409
        }
6410
6411
        // Uses dynamic programming to solve
6412
        // http://en.wikipedia.org/wiki/Longest_common_substring_problem
6413
6414 9
        if ($encoding === 'UTF-8') {
6415 4
            $str_length = (int) \mb_strlen($str1);
6416 4
            $other_length = (int) \mb_strlen($str2);
6417
        } else {
6418 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
6419
6420 5
            $str_length = (int) self::strlen($str1, $encoding);
6421 5
            $other_length = (int) self::strlen($str2, $encoding);
6422
        }
6423
6424
        // Return if either string is empty
6425 9
        if ($str_length === 0 || $other_length === 0) {
6426
            return '';
6427
        }
6428
6429 9
        $len = 0;
6430 9
        $end = 0;
6431 9
        $table = \array_fill(
6432 9
            0,
6433 9
            $str_length + 1,
6434 9
            \array_fill(0, $other_length + 1, 0)
6435
        );
6436
6437 9
        if ($encoding === 'UTF-8') {
6438 9
            for ($i = 1; $i <= $str_length; ++$i) {
6439 9
                for ($j = 1; $j <= $other_length; ++$j) {
6440 9
                    $str_char = \mb_substr($str1, $i - 1, 1);
6441 9
                    $other_char = \mb_substr($str2, $j - 1, 1);
6442
6443 9
                    if ($str_char === $other_char) {
6444 8
                        $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
6445 8
                        if ($table[$i][$j] > $len) {
6446 8
                            $len = $table[$i][$j];
6447 8
                            $end = $i;
6448
                        }
6449
                    } else {
6450 9
                        $table[$i][$j] = 0;
6451
                    }
6452
                }
6453
            }
6454
        } else {
6455
            for ($i = 1; $i <= $str_length; ++$i) {
6456
                for ($j = 1; $j <= $other_length; ++$j) {
6457
                    $str_char = self::substr($str1, $i - 1, 1, $encoding);
6458
                    $other_char = self::substr($str2, $j - 1, 1, $encoding);
6459
6460
                    if ($str_char === $other_char) {
6461
                        $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
6462
                        if ($table[$i][$j] > $len) {
6463
                            $len = $table[$i][$j];
6464
                            $end = $i;
6465
                        }
6466
                    } else {
6467
                        $table[$i][$j] = 0;
6468
                    }
6469
                }
6470
            }
6471
        }
6472
6473 9
        if ($encoding === 'UTF-8') {
6474 9
            return (string) \mb_substr($str1, $end - $len, $len);
6475
        }
6476
6477
        return (string) self::substr($str1, $end - $len, $len, $encoding);
6478
    }
6479
6480
    /**
6481
     * Returns the longest common suffix between the $str1 and $str2.
6482
     *
6483
     * @param string $str1
6484
     * @param string $str2     <p>Second string for comparison.</p>
6485
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6486
     *
6487
     * @return string
6488
     */
6489
    public static function str_longest_common_suffix(
6490
        string $str1,
6491
        string $str2,
6492
        string $encoding = 'UTF-8'
6493
    ): string {
6494 10
        if ($str1 === '' || $str2 === '') {
6495 2
            return '';
6496
        }
6497
6498 8
        if ($encoding === 'UTF-8') {
6499 4
            $max_length = (int) \min(
6500 4
                \mb_strlen($str1, $encoding),
6501 4
                \mb_strlen($str2, $encoding)
6502
            );
6503
6504 4
            $longest_common_suffix = '';
6505 4
            for ($i = 1; $i <= $max_length; ++$i) {
6506 4
                $char = \mb_substr($str1, -$i, 1);
6507
6508
                if (
6509 4
                    $char !== false
6510
                    &&
6511 4
                    $char === \mb_substr($str2, -$i, 1)
6512
                ) {
6513 3
                    $longest_common_suffix = $char . $longest_common_suffix;
6514
                } else {
6515 3
                    break;
6516
                }
6517
            }
6518
        } else {
6519 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
6520
6521 4
            $max_length = (int) \min(
6522 4
                self::strlen($str1, $encoding),
6523 4
                self::strlen($str2, $encoding)
6524
            );
6525
6526 4
            $longest_common_suffix = '';
6527 4
            for ($i = 1; $i <= $max_length; ++$i) {
6528 4
                $char = self::substr($str1, -$i, 1, $encoding);
6529
6530
                if (
6531 4
                    $char !== false
6532
                    &&
6533 4
                    $char === self::substr($str2, -$i, 1, $encoding)
6534
                ) {
6535 3
                    $longest_common_suffix = $char . $longest_common_suffix;
6536
                } else {
6537 3
                    break;
6538
                }
6539
            }
6540
        }
6541
6542 8
        return $longest_common_suffix;
6543
    }
6544
6545
    /**
6546
     * Returns true if $str matches the supplied pattern, false otherwise.
6547
     *
6548
     * @param string $str     <p>The input string.</p>
6549
     * @param string $pattern <p>Regex pattern to match against.</p>
6550
     *
6551
     * @return bool whether or not $str matches the pattern
6552
     */
6553
    public static function str_matches_pattern(string $str, string $pattern): bool
6554
    {
6555
        return (bool) \preg_match('/' . $pattern . '/u', $str);
6556
    }
6557
6558
    /**
6559
     * Returns whether or not a character exists at an index. Offsets may be
6560
     * negative to count from the last character in the string. Implements
6561
     * part of the ArrayAccess interface.
6562
     *
6563
     * @param string $str      <p>The input string.</p>
6564
     * @param int    $offset   <p>The index to check.</p>
6565
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6566
     *
6567
     * @return bool whether or not the index exists
6568
     */
6569
    public static function str_offset_exists(string $str, int $offset, string $encoding = 'UTF-8'): bool
6570
    {
6571
        // init
6572 6
        $length = (int) self::strlen($str, $encoding);
6573
6574 6
        if ($offset >= 0) {
6575 3
            return $length > $offset;
6576
        }
6577
6578 3
        return $length >= \abs($offset);
6579
    }
6580
6581
    /**
6582
     * Returns the character at the given index. Offsets may be negative to
6583
     * count from the last character in the string. Implements part of the
6584
     * ArrayAccess interface, and throws an OutOfBoundsException if the index
6585
     * does not exist.
6586
     *
6587
     * @param string $str      <p>The input string.</p>
6588
     * @param int    $index    <p>The <strong>index</strong> from which to retrieve the char.</p>
6589
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6590
     *
6591
     * @throws \OutOfBoundsException if the positive or negative offset does not exist
6592
     *
6593
     * @return string
6594
     *                <p>The character at the specified index.</p>
6595
     */
6596
    public static function str_offset_get(string $str, int $index, string $encoding = 'UTF-8'): string
6597
    {
6598
        // init
6599 2
        $length = (int) self::strlen($str);
6600
6601
        if (
6602 2
            ($index >= 0 && $length <= $index)
6603
            ||
6604 2
            $length < \abs($index)
6605
        ) {
6606 1
            throw new \OutOfBoundsException('No character exists at the index');
6607
        }
6608
6609 1
        return self::char_at($str, $index, $encoding);
6610
    }
6611
6612
    /**
6613
     * Pad a UTF-8 string to a given length with another string.
6614
     *
6615
     * @param string     $str        <p>The input string.</p>
6616
     * @param int        $pad_length <p>The length of return string.</p>
6617
     * @param string     $pad_string [optional] <p>String to use for padding the input string.</p>
6618
     * @param int|string $pad_type   [optional] <p>
6619
     *                               Can be <strong>STR_PAD_RIGHT</strong> (default), [or string "right"]<br>
6620
     *                               <strong>STR_PAD_LEFT</strong> [or string "left"] or<br>
6621
     *                               <strong>STR_PAD_BOTH</strong> [or string "both"]
6622
     *                               </p>
6623
     * @param string     $encoding   [optional] <p>Default: 'UTF-8'</p>
6624
     *
6625
     * @return string
6626
     *                <p>Returns the padded string.</p>
6627
     */
6628
    public static function str_pad(
6629
        string $str,
6630
        int $pad_length,
6631
        string $pad_string = ' ',
6632
        $pad_type = \STR_PAD_RIGHT,
6633
        string $encoding = 'UTF-8'
6634
    ): string {
6635 41
        if ($pad_length === 0 || $pad_string === '') {
6636 1
            return $str;
6637
        }
6638
6639 41
        if ($pad_type !== (int) $pad_type) {
6640 13
            if ($pad_type === 'left') {
6641 3
                $pad_type = \STR_PAD_LEFT;
6642 10
            } elseif ($pad_type === 'right') {
6643 6
                $pad_type = \STR_PAD_RIGHT;
6644 4
            } elseif ($pad_type === 'both') {
6645 3
                $pad_type = \STR_PAD_BOTH;
6646
            } else {
6647 1
                throw new \InvalidArgumentException(
6648 1
                    'Pad expects $pad_type to be "STR_PAD_*" or ' . "to be one of 'left', 'right' or 'both'"
6649
                );
6650
            }
6651
        }
6652
6653 40
        if ($encoding === 'UTF-8') {
6654 25
            $str_length = (int) \mb_strlen($str);
6655
6656 25
            if ($pad_length >= $str_length) {
6657
                switch ($pad_type) {
6658 25
                    case \STR_PAD_LEFT:
6659 8
                        $ps_length = (int) \mb_strlen($pad_string);
6660
6661 8
                        $diff = ($pad_length - $str_length);
6662
6663 8
                        $pre = (string) \mb_substr(
6664 8
                            \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
6665 8
                            0,
6666 8
                            $diff
6667
                        );
6668 8
                        $post = '';
6669
6670 8
                        break;
6671
6672 20
                    case \STR_PAD_BOTH:
6673 14
                        $diff = ($pad_length - $str_length);
6674
6675 14
                        $ps_length_left = (int) \floor($diff / 2);
6676
6677 14
                        $ps_length_right = (int) \ceil($diff / 2);
6678
6679 14
                        $pre = (string) \mb_substr(
6680 14
                            \str_repeat($pad_string, $ps_length_left),
6681 14
                            0,
6682 14
                            $ps_length_left
6683
                        );
6684 14
                        $post = (string) \mb_substr(
6685 14
                            \str_repeat($pad_string, $ps_length_right),
6686 14
                            0,
6687 14
                            $ps_length_right
6688
                        );
6689
6690 14
                        break;
6691
6692 9
                    case \STR_PAD_RIGHT:
6693
                    default:
6694 9
                        $ps_length = (int) \mb_strlen($pad_string);
6695
6696 9
                        $diff = ($pad_length - $str_length);
6697
6698 9
                        $post = (string) \mb_substr(
6699 9
                            \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
6700 9
                            0,
6701 9
                            $diff
6702
                        );
6703 9
                        $pre = '';
6704
                }
6705
6706 25
                return $pre . $str . $post;
6707
            }
6708
6709 3
            return $str;
6710
        }
6711
6712 15
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6713
6714 15
        $str_length = (int) self::strlen($str, $encoding);
6715
6716 15
        if ($pad_length >= $str_length) {
6717
            switch ($pad_type) {
6718 14
                case \STR_PAD_LEFT:
6719 5
                    $ps_length = (int) self::strlen($pad_string, $encoding);
6720
6721 5
                    $diff = ($pad_length - $str_length);
6722
6723 5
                    $pre = (string) self::substr(
6724 5
                        \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
6725 5
                        0,
6726 5
                        $diff,
6727 5
                        $encoding
6728
                    );
6729 5
                    $post = '';
6730
6731 5
                    break;
6732
6733 9
                case \STR_PAD_BOTH:
6734 3
                    $diff = ($pad_length - $str_length);
6735
6736 3
                    $ps_length_left = (int) \floor($diff / 2);
6737
6738 3
                    $ps_length_right = (int) \ceil($diff / 2);
6739
6740 3
                    $pre = (string) self::substr(
6741 3
                        \str_repeat($pad_string, $ps_length_left),
6742 3
                        0,
6743 3
                        $ps_length_left,
6744 3
                        $encoding
6745
                    );
6746 3
                    $post = (string) self::substr(
6747 3
                        \str_repeat($pad_string, $ps_length_right),
6748 3
                        0,
6749 3
                        $ps_length_right,
6750 3
                        $encoding
6751
                    );
6752
6753 3
                    break;
6754
6755 6
                case \STR_PAD_RIGHT:
6756
                default:
6757 6
                    $ps_length = (int) self::strlen($pad_string, $encoding);
6758
6759 6
                    $diff = ($pad_length - $str_length);
6760
6761 6
                    $post = (string) self::substr(
6762 6
                        \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
6763 6
                        0,
6764 6
                        $diff,
6765 6
                        $encoding
6766
                    );
6767 6
                    $pre = '';
6768
            }
6769
6770 14
            return $pre . $str . $post;
6771
        }
6772
6773 1
        return $str;
6774
    }
6775
6776
    /**
6777
     * Returns a new string of a given length such that both sides of the
6778
     * string are padded. Alias for "UTF8::str_pad()" with a $pad_type of 'both'.
6779
     *
6780
     * @param string $str
6781
     * @param int    $length   <p>Desired string length after padding.</p>
6782
     * @param string $pad_str  [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6783
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6784
     *
6785
     * @return string
6786
     *                <p>The string with padding applied.</p>
6787
     */
6788
    public static function str_pad_both(
6789
        string $str,
6790
        int $length,
6791
        string $pad_str = ' ',
6792
        string $encoding = 'UTF-8'
6793
    ): string {
6794 11
        return self::str_pad(
6795 11
            $str,
6796 11
            $length,
6797 11
            $pad_str,
6798 11
            \STR_PAD_BOTH,
6799 11
            $encoding
6800
        );
6801
    }
6802
6803
    /**
6804
     * Returns a new string of a given length such that the beginning of the
6805
     * string is padded. Alias for "UTF8::str_pad()" with a $pad_type of 'left'.
6806
     *
6807
     * @param string $str
6808
     * @param int    $length   <p>Desired string length after padding.</p>
6809
     * @param string $pad_str  [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6810
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6811
     *
6812
     * @return string
6813
     *                <p>The string with left padding.</p>
6814
     */
6815
    public static function str_pad_left(
6816
        string $str,
6817
        int $length,
6818
        string $pad_str = ' ',
6819
        string $encoding = 'UTF-8'
6820
    ): string {
6821 7
        return self::str_pad(
6822 7
            $str,
6823 7
            $length,
6824 7
            $pad_str,
6825 7
            \STR_PAD_LEFT,
6826 7
            $encoding
6827
        );
6828
    }
6829
6830
    /**
6831
     * Returns a new string of a given length such that the end of the string
6832
     * is padded. Alias for "UTF8::str_pad()" with a $pad_type of 'right'.
6833
     *
6834
     * @param string $str
6835
     * @param int    $length   <p>Desired string length after padding.</p>
6836
     * @param string $pad_str  [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6837
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6838
     *
6839
     * @return string
6840
     *                <p>The string with right padding.</p>
6841
     */
6842
    public static function str_pad_right(
6843
        string $str,
6844
        int $length,
6845
        string $pad_str = ' ',
6846
        string $encoding = 'UTF-8'
6847
    ): string {
6848 7
        return self::str_pad(
6849 7
            $str,
6850 7
            $length,
6851 7
            $pad_str,
6852 7
            \STR_PAD_RIGHT,
6853 7
            $encoding
6854
        );
6855
    }
6856
6857
    /**
6858
     * Repeat a string.
6859
     *
6860
     * @param string $str        <p>
6861
     *                           The string to be repeated.
6862
     *                           </p>
6863
     * @param int    $multiplier <p>
6864
     *                           Number of time the input string should be
6865
     *                           repeated.
6866
     *                           </p>
6867
     *                           <p>
6868
     *                           multiplier has to be greater than or equal to 0.
6869
     *                           If the multiplier is set to 0, the function
6870
     *                           will return an empty string.
6871
     *                           </p>
6872
     *
6873
     * @return string
6874
     *                <p>The repeated string.</P>
6875
     */
6876
    public static function str_repeat(string $str, int $multiplier): string
6877
    {
6878 9
        $str = self::filter($str);
6879
6880 9
        return \str_repeat($str, $multiplier);
6881
    }
6882
6883
    /**
6884
     * INFO: This is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe.
6885
     *
6886
     * Replace all occurrences of the search string with the replacement string
6887
     *
6888
     * @see http://php.net/manual/en/function.str-replace.php
6889
     *
6890
     * @param mixed $search  <p>
6891
     *                       The value being searched for, otherwise known as the needle.
6892
     *                       An array may be used to designate multiple needles.
6893
     *                       </p>
6894
     * @param mixed $replace <p>
6895
     *                       The replacement value that replaces found search
6896
     *                       values. An array may be used to designate multiple replacements.
6897
     *                       </p>
6898
     * @param mixed $subject <p>
6899
     *                       The string or array being searched and replaced on,
6900
     *                       otherwise known as the haystack.
6901
     *                       </p>
6902
     *                       <p>
6903
     *                       If subject is an array, then the search and
6904
     *                       replace is performed with every entry of
6905
     *                       subject, and the return value is an array as
6906
     *                       well.
6907
     *                       </p>
6908
     * @param int   $count   [optional] If passed, this will hold the number of matched and replaced needles
6909
     *
6910
     * @return mixed this function returns a string or an array with the replaced values
6911
     */
6912
    public static function str_replace(
6913
        $search,
6914
        $replace,
6915
        $subject,
6916
        int &$count = null
6917
    ) {
6918
        /**
6919
         * @psalm-suppress PossiblyNullArgument
6920
         */
6921 12
        return \str_replace(
6922 12
            $search,
6923 12
            $replace,
6924 12
            $subject,
6925 12
            $count
6926
        );
6927
    }
6928
6929
    /**
6930
     * Replaces $search from the beginning of string with $replacement.
6931
     *
6932
     * @param string $str         <p>The input string.</p>
6933
     * @param string $search      <p>The string to search for.</p>
6934
     * @param string $replacement <p>The replacement.</p>
6935
     *
6936
     * @return string
6937
     *                <p>A string after the replacements.</p>
6938
     */
6939
    public static function str_replace_beginning(
6940
        string $str,
6941
        string $search,
6942
        string $replacement
6943
    ): string {
6944 17
        if ($str === '') {
6945 4
            if ($replacement === '') {
6946 2
                return '';
6947
            }
6948
6949 2
            if ($search === '') {
6950 2
                return $replacement;
6951
            }
6952
        }
6953
6954 13
        if ($search === '') {
6955 2
            return $str . $replacement;
6956
        }
6957
6958 11
        if (\strpos($str, $search) === 0) {
6959 9
            return $replacement . \substr($str, \strlen($search));
6960
        }
6961
6962 2
        return $str;
6963
    }
6964
6965
    /**
6966
     * Replaces $search from the ending of string with $replacement.
6967
     *
6968
     * @param string $str         <p>The input string.</p>
6969
     * @param string $search      <p>The string to search for.</p>
6970
     * @param string $replacement <p>The replacement.</p>
6971
     *
6972
     * @return string
6973
     *                <p>A string after the replacements.</p>
6974
     */
6975
    public static function str_replace_ending(
6976
        string $str,
6977
        string $search,
6978
        string $replacement
6979
    ): string {
6980 17
        if ($str === '') {
6981 4
            if ($replacement === '') {
6982 2
                return '';
6983
            }
6984
6985 2
            if ($search === '') {
6986 2
                return $replacement;
6987
            }
6988
        }
6989
6990 13
        if ($search === '') {
6991 2
            return $str . $replacement;
6992
        }
6993
6994 11
        if (\strpos($str, $search, \strlen($str) - \strlen($search)) !== false) {
6995 8
            $str = \substr($str, 0, -\strlen($search)) . $replacement;
6996
        }
6997
6998 11
        return $str;
6999
    }
7000
7001
    /**
7002
     * Replace the first "$search"-term with the "$replace"-term.
7003
     *
7004
     * @param string $search
7005
     * @param string $replace
7006
     * @param string $subject
7007
     *
7008
     * @return string
7009
     *
7010
     * @psalm-suppress InvalidReturnType
7011
     */
7012
    public static function str_replace_first(
7013
        string $search,
7014
        string $replace,
7015
        string $subject
7016
    ): string {
7017 2
        $pos = self::strpos($subject, $search);
7018
7019 2
        if ($pos !== false) {
7020
            /**
7021
             * @psalm-suppress InvalidReturnStatement
7022
             */
7023 2
            return self::substr_replace(
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr_repl...)self::strlen($search)) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
7024 2
                $subject,
7025 2
                $replace,
7026 2
                $pos,
7027 2
                (int) self::strlen($search)
7028
            );
7029
        }
7030
7031 2
        return $subject;
7032
    }
7033
7034
    /**
7035
     * Replace the last "$search"-term with the "$replace"-term.
7036
     *
7037
     * @param string $search
7038
     * @param string $replace
7039
     * @param string $subject
7040
     *
7041
     * @return string
7042
     *
7043
     * @psalm-suppress InvalidReturnType
7044
     */
7045
    public static function str_replace_last(
7046
        string $search,
7047
        string $replace,
7048
        string $subject
7049
    ): string {
7050 2
        $pos = self::strrpos($subject, $search);
7051 2
        if ($pos !== false) {
7052
            /**
7053
             * @psalm-suppress InvalidReturnStatement
7054
             */
7055 2
            return self::substr_replace(
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr_repl...)self::strlen($search)) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
7056 2
                $subject,
7057 2
                $replace,
7058 2
                $pos,
7059 2
                (int) self::strlen($search)
7060
            );
7061
        }
7062
7063 2
        return $subject;
7064
    }
7065
7066
    /**
7067
     * Shuffles all the characters in the string.
7068
     *
7069
     * PS: uses random algorithm which is weak for cryptography purposes
7070
     *
7071
     * @param string $str      <p>The input string</p>
7072
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7073
     *
7074
     * @return string
7075
     *                <p>The shuffled string.</p>
7076
     */
7077
    public static function str_shuffle(string $str, string $encoding = 'UTF-8'): string
7078
    {
7079 5
        if ($encoding === 'UTF-8') {
7080 5
            $indexes = \range(0, (int) \mb_strlen($str) - 1);
7081
            /** @noinspection NonSecureShuffleUsageInspection */
7082 5
            \shuffle($indexes);
7083
7084
            // init
7085 5
            $shuffled_str = '';
7086
7087 5
            foreach ($indexes as &$i) {
7088 5
                $tmp_sub_str = \mb_substr($str, $i, 1);
7089 5
                if ($tmp_sub_str !== false) {
7090 5
                    $shuffled_str .= $tmp_sub_str;
7091
                }
7092
            }
7093
        } else {
7094
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7095
7096
            $indexes = \range(0, (int) self::strlen($str, $encoding) - 1);
7097
            /** @noinspection NonSecureShuffleUsageInspection */
7098
            \shuffle($indexes);
7099
7100
            // init
7101
            $shuffled_str = '';
7102
7103
            foreach ($indexes as &$i) {
7104
                $tmp_sub_str = self::substr($str, $i, 1, $encoding);
7105
                if ($tmp_sub_str !== false) {
7106
                    $shuffled_str .= $tmp_sub_str;
7107
                }
7108
            }
7109
        }
7110
7111 5
        return $shuffled_str;
7112
    }
7113
7114
    /**
7115
     * Returns the substring beginning at $start, and up to, but not including
7116
     * the index specified by $end. If $end is omitted, the function extracts
7117
     * the remaining string. If $end is negative, it is computed from the end
7118
     * of the string.
7119
     *
7120
     * @param string $str
7121
     * @param int    $start    <p>Initial index from which to begin extraction.</p>
7122
     * @param int    $end      [optional] <p>Index at which to end extraction. Default: null</p>
7123
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7124
     *
7125
     * @return false|string
7126
     *                      <p>The extracted substring.</p><p>If <i>str</i> is shorter than <i>start</i>
7127
     *                      characters long, <b>FALSE</b> will be returned.
7128
     */
7129
    public static function str_slice(
7130
        string $str,
7131
        int $start,
7132
        int $end = null,
7133
        string $encoding = 'UTF-8'
7134
    ) {
7135 18
        if ($encoding === 'UTF-8') {
7136 7
            if ($end === null) {
7137 1
                $length = (int) \mb_strlen($str);
7138 6
            } elseif ($end >= 0 && $end <= $start) {
7139 2
                return '';
7140 4
            } elseif ($end < 0) {
7141 1
                $length = (int) \mb_strlen($str) + $end - $start;
7142
            } else {
7143 3
                $length = $end - $start;
7144
            }
7145
7146 5
            return \mb_substr($str, $start, $length);
7147
        }
7148
7149 11
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
7150
7151 11
        if ($end === null) {
7152 5
            $length = (int) self::strlen($str, $encoding);
7153 6
        } elseif ($end >= 0 && $end <= $start) {
7154 2
            return '';
7155 4
        } elseif ($end < 0) {
7156 1
            $length = (int) self::strlen($str, $encoding) + $end - $start;
7157
        } else {
7158 3
            $length = $end - $start;
7159
        }
7160
7161 9
        return self::substr($str, $start, $length, $encoding);
7162
    }
7163
7164
    /**
7165
     * Convert a string to e.g.: "snake_case"
7166
     *
7167
     * @param string $str
7168
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7169
     *
7170
     * @return string
7171
     *                <p>A string in snake_case.</p>
7172
     */
7173
    public static function str_snakeize(string $str, string $encoding = 'UTF-8'): string
7174
    {
7175 22
        if ($str === '') {
7176
            return '';
7177
        }
7178
7179 22
        $str = \str_replace(
7180 22
            '-',
7181 22
            '_',
7182 22
            self::normalize_whitespace($str)
7183
        );
7184
7185 22
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7186 19
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7187
        }
7188
7189 22
        $str = (string) \preg_replace_callback(
7190 22
            '/([\\p{N}|\\p{Lu}])/u',
7191
            /**
7192
             * @param string[] $matches
7193
             *
7194
             * @return string
7195
             */
7196
            static function (array $matches) use ($encoding): string {
7197 9
                $match = $matches[1];
7198 9
                $match_int = (int) $match;
7199
7200 9
                if ((string) $match_int === $match) {
7201 4
                    return '_' . $match . '_';
7202
                }
7203
7204 5
                if ($encoding === 'UTF-8') {
7205 5
                    return '_' . \mb_strtolower($match);
7206
                }
7207
7208
                return '_' . self::strtolower($match, $encoding);
7209 22
            },
7210 22
            $str
7211
        );
7212
7213 22
        $str = (string) \preg_replace(
7214
            [
7215 22
                '/\\s+/u',           // convert spaces to "_"
7216
                '/^\\s+|\\s+$/u', // trim leading & trailing spaces
7217
                '/_+/',                 // remove double "_"
7218
            ],
7219
            [
7220 22
                '_',
7221
                '',
7222
                '_',
7223
            ],
7224 22
            $str
7225
        );
7226
7227 22
        return \trim(\trim($str, '_')); // trim leading & trailing "_" + whitespace
7228
    }
7229
7230
    /**
7231
     * Sort all characters according to code points.
7232
     *
7233
     * @param string $str    <p>A UTF-8 string.</p>
7234
     * @param bool   $unique <p>Sort unique. If <strong>true</strong>, repeated characters are ignored.</p>
7235
     * @param bool   $desc   <p>If <strong>true</strong>, will sort characters in reverse code point order.</p>
7236
     *
7237
     * @return string
7238
     *                <p>A string of sorted characters.</p>
7239
     */
7240
    public static function str_sort(string $str, bool $unique = false, bool $desc = false): string
7241
    {
7242 2
        $array = self::codepoints($str);
7243
7244 2
        if ($unique) {
7245 2
            $array = \array_flip(\array_flip($array));
7246
        }
7247
7248 2
        if ($desc) {
7249 2
            \arsort($array);
0 ignored issues
show
Bug introduced by
It seems like $array can also be of type null; however, parameter $array of arsort() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

7249
            \arsort(/** @scrutinizer ignore-type */ $array);
Loading history...
7250
        } else {
7251 2
            \asort($array);
0 ignored issues
show
Bug introduced by
It seems like $array can also be of type null; however, parameter $array of asort() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

7251
            \asort(/** @scrutinizer ignore-type */ $array);
Loading history...
7252
        }
7253
7254 2
        return self::string($array);
7255
    }
7256
7257
    /**
7258
     * Convert a string to an array of Unicode characters.
7259
     *
7260
     * @param int|int[]|string|string[] $str                     <p>The string to split into array.</p>
7261
     * @param int                       $length                  [optional] <p>Max character length of each array
7262
     *                                                           element.</p>
7263
     * @param bool                      $clean_utf8              [optional] <p>Remove non UTF-8 chars from the string.</p>
7264
     * @param bool                      $try_to_use_mb_functions [optional] <p>Set to false, if you don't want to use
7265
     *                                                           "mb_substr"</p>
7266
     *
7267
     * @return array
7268
     *               <p>An array containing chunks of the input.</p>
7269
     */
7270
    public static function str_split(
7271
        $str,
7272
        int $length = 1,
7273
        bool $clean_utf8 = false,
7274
        bool $try_to_use_mb_functions = true
7275
    ): array {
7276 89
        if ($length <= 0) {
7277 3
            return [];
7278
        }
7279
7280 88
        if (\is_array($str) === true) {
7281 2
            foreach ($str as $k => &$v) {
7282 2
                $v = self::str_split(
7283 2
                    $v,
7284 2
                    $length,
7285 2
                    $clean_utf8,
7286 2
                    $try_to_use_mb_functions
7287
                );
7288
            }
7289
7290 2
            return $str;
7291
        }
7292
7293
        // init
7294 88
        $str = (string) $str;
7295
7296 88
        if ($str === '') {
7297 13
            return [];
7298
        }
7299
7300 85
        if ($clean_utf8 === true) {
7301 19
            $str = self::clean($str);
7302
        }
7303
7304
        if (
7305 85
            $try_to_use_mb_functions === true
7306
            &&
7307 85
            self::$SUPPORT['mbstring'] === true
7308
        ) {
7309 81
            if (Bootup::is_php('7.4')) {
7310
                $return = \mb_str_split($str, $length);
7311
                if ($return !== false) {
7312
                    return $return;
7313
                }
7314
            }
7315
7316 81
            $i_max = \mb_strlen($str);
7317 81
            if ($i_max <= 127) {
7318 75
                $ret = [];
7319 75
                for ($i = 0; $i < $i_max; ++$i) {
7320 75
                    $ret[] = \mb_substr($str, $i, 1);
7321
                }
7322
            } else {
7323 16
                $return_array = [];
7324 16
                \preg_match_all('/./us', $str, $return_array);
7325 81
                $ret = $return_array[0] ?? [];
7326
            }
7327 23
        } elseif (self::$SUPPORT['pcre_utf8'] === true) {
7328 17
            $return_array = [];
7329 17
            \preg_match_all('/./us', $str, $return_array);
7330 17
            $ret = $return_array[0] ?? [];
7331
        } else {
7332
7333
            // fallback
7334
7335 8
            $ret = [];
7336 8
            $len = \strlen($str);
7337
7338
            /** @noinspection ForeachInvariantsInspection */
7339 8
            for ($i = 0; $i < $len; ++$i) {
7340 8
                if (($str[$i] & "\x80") === "\x00") {
7341 8
                    $ret[] = $str[$i];
7342
                } elseif (
7343 8
                    isset($str[$i + 1])
7344
                    &&
7345 8
                    ($str[$i] & "\xE0") === "\xC0"
7346
                ) {
7347 4
                    if (($str[$i + 1] & "\xC0") === "\x80") {
7348 4
                        $ret[] = $str[$i] . $str[$i + 1];
7349
7350 4
                        ++$i;
7351
                    }
7352
                } elseif (
7353 6
                    isset($str[$i + 2])
7354
                    &&
7355 6
                    ($str[$i] & "\xF0") === "\xE0"
7356
                ) {
7357
                    if (
7358 6
                        ($str[$i + 1] & "\xC0") === "\x80"
7359
                        &&
7360 6
                        ($str[$i + 2] & "\xC0") === "\x80"
7361
                    ) {
7362 6
                        $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2];
7363
7364 6
                        $i += 2;
7365
                    }
7366
                } elseif (
7367
                    isset($str[$i + 3])
7368
                    &&
7369
                    ($str[$i] & "\xF8") === "\xF0"
7370
                ) {
7371
                    if (
7372
                        ($str[$i + 1] & "\xC0") === "\x80"
7373
                        &&
7374
                        ($str[$i + 2] & "\xC0") === "\x80"
7375
                        &&
7376
                        ($str[$i + 3] & "\xC0") === "\x80"
7377
                    ) {
7378
                        $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3];
7379
7380
                        $i += 3;
7381
                    }
7382
                }
7383
            }
7384
        }
7385
7386 85
        if ($length > 1) {
7387 11
            $ret = \array_chunk($ret, $length);
7388
7389 11
            return \array_map(
7390
                static function (array &$item): string {
7391 11
                    return \implode('', $item);
7392 11
                },
7393 11
                $ret
7394
            );
7395
        }
7396
7397 78
        if (isset($ret[0]) && $ret[0] === '') {
7398
            return [];
7399
        }
7400
7401 78
        return $ret;
7402
    }
7403
7404
    /**
7405
     * Splits the string with the provided regular expression, returning an
7406
     * array of strings. An optional integer $limit will truncate the
7407
     * results.
7408
     *
7409
     * @param string $str
7410
     * @param string $pattern <p>The regex with which to split the string.</p>
7411
     * @param int    $limit   [optional] <p>Maximum number of results to return. Default: -1 === no limit</p>
7412
     *
7413
     * @return string[]
7414
     *                  <p>An array of strings.</p>
7415
     */
7416
    public static function str_split_pattern(string $str, string $pattern, int $limit = -1): array
7417
    {
7418 16
        if ($limit === 0) {
7419 2
            return [];
7420
        }
7421
7422 14
        if ($pattern === '') {
7423 1
            return [$str];
7424
        }
7425
7426 13
        if (self::$SUPPORT['mbstring'] === true) {
7427 13
            if ($limit >= 0) {
7428
                /** @noinspection PhpComposerExtensionStubsInspection */
7429 8
                $result_tmp = \mb_split($pattern, $str);
7430
7431 8
                $result = [];
7432 8
                foreach ($result_tmp as $item_tmp) {
7433 8
                    if ($limit === 0) {
7434 4
                        break;
7435
                    }
7436 8
                    --$limit;
7437
7438 8
                    $result[] = $item_tmp;
7439
                }
7440
7441 8
                return $result;
7442
            }
7443
7444
            /** @noinspection PhpComposerExtensionStubsInspection */
7445 5
            return \mb_split($pattern, $str);
7446
        }
7447
7448
        if ($limit > 0) {
7449
            ++$limit;
7450
        } else {
7451
            $limit = -1;
7452
        }
7453
7454
        $array = \preg_split('/' . \preg_quote($pattern, '/') . '/u', $str, $limit);
7455
7456
        if ($array === false) {
7457
            return [];
7458
        }
7459
7460
        if ($limit > 0 && \count($array) === $limit) {
7461
            \array_pop($array);
7462
        }
7463
7464
        return $array;
7465
    }
7466
7467
    /**
7468
     * Check if the string starts with the given substring.
7469
     *
7470
     * @param string $haystack <p>The string to search in.</p>
7471
     * @param string $needle   <p>The substring to search for.</p>
7472
     *
7473
     * @return bool
7474
     */
7475
    public static function str_starts_with(string $haystack, string $needle): bool
7476
    {
7477 19
        if ($needle === '') {
7478 2
            return true;
7479
        }
7480
7481 19
        if ($haystack === '') {
7482
            return false;
7483
        }
7484
7485 19
        return \strpos($haystack, $needle) === 0;
7486
    }
7487
7488
    /**
7489
     * Returns true if the string begins with any of $substrings, false otherwise.
7490
     *
7491
     * - case-sensitive
7492
     *
7493
     * @param string $str        <p>The input string.</p>
7494
     * @param array  $substrings <p>Substrings to look for.</p>
7495
     *
7496
     * @return bool whether or not $str starts with $substring
7497
     */
7498
    public static function str_starts_with_any(string $str, array $substrings): bool
7499
    {
7500 8
        if ($str === '') {
7501
            return false;
7502
        }
7503
7504 8
        if ($substrings === []) {
7505
            return false;
7506
        }
7507
7508 8
        foreach ($substrings as &$substring) {
7509 8
            if (self::str_starts_with($str, $substring)) {
7510 8
                return true;
7511
            }
7512
        }
7513
7514 6
        return false;
7515
    }
7516
7517
    /**
7518
     * Gets the substring after the first occurrence of a separator.
7519
     *
7520
     * @param string $str       <p>The input string.</p>
7521
     * @param string $separator <p>The string separator.</p>
7522
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7523
     *
7524
     * @return string
7525
     */
7526
    public static function str_substr_after_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
7527
    {
7528 1
        if ($separator === '' || $str === '') {
7529 1
            return '';
7530
        }
7531
7532 1
        if ($encoding === 'UTF-8') {
7533 1
            $offset = \mb_strpos($str, $separator);
7534 1
            if ($offset === false) {
7535 1
                return '';
7536
            }
7537
7538 1
            return (string) \mb_substr(
7539 1
                $str,
7540 1
                $offset + (int) \mb_strlen($separator)
7541
            );
7542
        }
7543
7544
        $offset = self::strpos($str, $separator, 0, $encoding);
7545
        if ($offset === false) {
7546
            return '';
7547
        }
7548
7549
        return (string) \mb_substr(
7550
            $str,
7551
            $offset + (int) self::strlen($separator, $encoding),
7552
            null,
7553
            $encoding
7554
        );
7555
    }
7556
7557
    /**
7558
     * Gets the substring after the last occurrence of a separator.
7559
     *
7560
     * @param string $str       <p>The input string.</p>
7561
     * @param string $separator <p>The string separator.</p>
7562
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7563
     *
7564
     * @return string
7565
     */
7566
    public static function str_substr_after_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
7567
    {
7568 1
        if ($separator === '' || $str === '') {
7569 1
            return '';
7570
        }
7571
7572 1
        if ($encoding === 'UTF-8') {
7573 1
            $offset = \mb_strrpos($str, $separator);
7574 1
            if ($offset === false) {
7575 1
                return '';
7576
            }
7577
7578 1
            return (string) \mb_substr(
7579 1
                $str,
7580 1
                $offset + (int) \mb_strlen($separator)
7581
            );
7582
        }
7583
7584
        $offset = self::strrpos($str, $separator, 0, $encoding);
7585
        if ($offset === false) {
7586
            return '';
7587
        }
7588
7589
        return (string) self::substr(
7590
            $str,
7591
            $offset + (int) self::strlen($separator, $encoding),
7592
            null,
7593
            $encoding
7594
        );
7595
    }
7596
7597
    /**
7598
     * Gets the substring before the first occurrence of a separator.
7599
     *
7600
     * @param string $str       <p>The input string.</p>
7601
     * @param string $separator <p>The string separator.</p>
7602
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7603
     *
7604
     * @return string
7605
     */
7606
    public static function str_substr_before_first_separator(
7607
        string $str,
7608
        string $separator,
7609
        string $encoding = 'UTF-8'
7610
    ): string {
7611 1
        if ($separator === '' || $str === '') {
7612 1
            return '';
7613
        }
7614
7615 1
        if ($encoding === 'UTF-8') {
7616 1
            $offset = \mb_strpos($str, $separator);
7617 1
            if ($offset === false) {
7618 1
                return '';
7619
            }
7620
7621 1
            return (string) \mb_substr(
7622 1
                $str,
7623 1
                0,
7624 1
                $offset
7625
            );
7626
        }
7627
7628
        $offset = self::strpos($str, $separator, 0, $encoding);
7629
        if ($offset === false) {
7630
            return '';
7631
        }
7632
7633
        return (string) self::substr(
7634
            $str,
7635
            0,
7636
            $offset,
7637
            $encoding
7638
        );
7639
    }
7640
7641
    /**
7642
     * Gets the substring before the last occurrence of a separator.
7643
     *
7644
     * @param string $str       <p>The input string.</p>
7645
     * @param string $separator <p>The string separator.</p>
7646
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7647
     *
7648
     * @return string
7649
     */
7650
    public static function str_substr_before_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
7651
    {
7652 1
        if ($separator === '' || $str === '') {
7653 1
            return '';
7654
        }
7655
7656 1
        if ($encoding === 'UTF-8') {
7657 1
            $offset = \mb_strrpos($str, $separator);
7658 1
            if ($offset === false) {
7659 1
                return '';
7660
            }
7661
7662 1
            return (string) \mb_substr(
7663 1
                $str,
7664 1
                0,
7665 1
                $offset
7666
            );
7667
        }
7668
7669
        $offset = self::strrpos($str, $separator, 0, $encoding);
7670
        if ($offset === false) {
7671
            return '';
7672
        }
7673
7674
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
7675
7676
        return (string) self::substr(
7677
            $str,
7678
            0,
7679
            $offset,
7680
            $encoding
7681
        );
7682
    }
7683
7684
    /**
7685
     * Gets the substring after (or before via "$before_needle") the first occurrence of the "$needle".
7686
     *
7687
     * @param string $str           <p>The input string.</p>
7688
     * @param string $needle        <p>The string to look for.</p>
7689
     * @param bool   $before_needle [optional] <p>Default: false</p>
7690
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
7691
     *
7692
     * @return string
7693
     */
7694
    public static function str_substr_first(
7695
        string $str,
7696
        string $needle,
7697
        bool $before_needle = false,
7698
        string $encoding = 'UTF-8'
7699
    ): string {
7700 2
        if ($str === '' || $needle === '') {
7701 2
            return '';
7702
        }
7703
7704 2
        if ($encoding === 'UTF-8') {
7705 2
            if ($before_needle === true) {
7706 1
                $part = \mb_strstr(
7707 1
                    $str,
7708 1
                    $needle,
7709 1
                    $before_needle
7710
                );
7711
            } else {
7712 1
                $part = \mb_strstr(
7713 1
                    $str,
7714 2
                    $needle
7715
                );
7716
            }
7717
        } else {
7718
            $part = self::strstr(
7719
                $str,
7720
                $needle,
7721
                $before_needle,
7722
                $encoding
7723
            );
7724
        }
7725
7726 2
        return $part === false ? '' : $part;
7727
    }
7728
7729
    /**
7730
     * Gets the substring after (or before via "$before_needle") the last occurrence of the "$needle".
7731
     *
7732
     * @param string $str           <p>The input string.</p>
7733
     * @param string $needle        <p>The string to look for.</p>
7734
     * @param bool   $before_needle [optional] <p>Default: false</p>
7735
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
7736
     *
7737
     * @return string
7738
     */
7739
    public static function str_substr_last(
7740
        string $str,
7741
        string $needle,
7742
        bool $before_needle = false,
7743
        string $encoding = 'UTF-8'
7744
    ): string {
7745 2
        if ($str === '' || $needle === '') {
7746 2
            return '';
7747
        }
7748
7749 2
        if ($encoding === 'UTF-8') {
7750 2
            if ($before_needle === true) {
7751 1
                $part = \mb_strrchr(
7752 1
                    $str,
7753 1
                    $needle,
7754 1
                    $before_needle
7755
                );
7756
            } else {
7757 1
                $part = \mb_strrchr(
7758 1
                    $str,
7759 2
                    $needle
7760
                );
7761
            }
7762
        } else {
7763
            $part = self::strrchr(
7764
                $str,
7765
                $needle,
7766
                $before_needle,
7767
                $encoding
7768
            );
7769
        }
7770
7771 2
        return $part === false ? '' : $part;
7772
    }
7773
7774
    /**
7775
     * Surrounds $str with the given substring.
7776
     *
7777
     * @param string $str
7778
     * @param string $substring <p>The substring to add to both sides.</P>
7779
     *
7780
     * @return string
7781
     *                <p>A string with the substring both prepended and appended.</p>
7782
     */
7783
    public static function str_surround(string $str, string $substring): string
7784
    {
7785 5
        return $substring . $str . $substring;
7786
    }
7787
7788
    /**
7789
     * Returns a trimmed string with the first letter of each word capitalized.
7790
     * Also accepts an array, $ignore, allowing you to list words not to be
7791
     * capitalized.
7792
     *
7793
     * @param string              $str
7794
     * @param array|string[]|null $ignore                        [optional] <p>An array of words not to capitalize or null.
7795
     *                                                           Default: null</p>
7796
     * @param string              $encoding                      [optional] <p>Default: 'UTF-8'</p>
7797
     * @param bool                $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
7798
     * @param string|null         $lang                          [optional] <p>Set the language for special cases: az, el, lt,
7799
     *                                                           tr</p>
7800
     * @param bool                $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ ->
7801
     *                                                           ß</p>
7802
     * @param bool                $use_trim_first                [optional] <p>true === trim the input string, first</p>
7803
     * @param string|null         $word_define_chars             [optional] <p>An string of chars that will be used as whitespace separator === words.</p>
7804
     *
7805
     * @return string
7806
     *                <p>The titleized string.</p>
7807
     */
7808
    public static function str_titleize(
7809
        string $str,
7810
        array $ignore = null,
7811
        string $encoding = 'UTF-8',
7812
        bool $clean_utf8 = false,
7813
        string $lang = null,
7814
        bool $try_to_keep_the_string_length = false,
7815
        bool $use_trim_first = true,
7816
        string $word_define_chars = null
7817
    ): string {
7818 10
        if ($str === '') {
7819
            return '';
7820
        }
7821
7822 10
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7823 9
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7824
        }
7825
7826 10
        if ($use_trim_first === true) {
7827 10
            $str = \trim($str);
7828
        }
7829
7830 10
        if ($clean_utf8 === true) {
7831
            $str = self::clean($str);
7832
        }
7833
7834 10
        $use_mb_functions = $lang === null && $try_to_keep_the_string_length === false;
7835
7836 10
        if ($word_define_chars) {
7837 4
            $word_define_chars = \preg_quote($word_define_chars, '/');
7838
        } else {
7839 6
            $word_define_chars = '';
7840
        }
7841
7842 10
        $str = (string) \preg_replace_callback(
7843 10
            '/([^\\s' . $word_define_chars . ']+)/u',
7844
            static function (array $match) use ($try_to_keep_the_string_length, $lang, $ignore, $use_mb_functions, $encoding): string {
7845 10
                if ($ignore !== null && \in_array($match[0], $ignore, true)) {
7846 4
                    return $match[0];
7847
                }
7848
7849 10
                if ($use_mb_functions === true) {
7850 10
                    if ($encoding === 'UTF-8') {
7851 10
                        return \mb_strtoupper(\mb_substr($match[0], 0, 1))
7852 10
                               . \mb_strtolower(\mb_substr($match[0], 1));
7853
                    }
7854
7855
                    return \mb_strtoupper(\mb_substr($match[0], 0, 1, $encoding), $encoding)
7856
                           . \mb_strtolower(\mb_substr($match[0], 1, null, $encoding), $encoding);
7857
                }
7858
7859
                return self::ucfirst(
7860
                    self::strtolower(
7861
                        $match[0],
7862
                        $encoding,
7863
                        false,
7864
                        $lang,
7865
                        $try_to_keep_the_string_length
7866
                    ),
7867
                    $encoding,
7868
                    false,
7869
                    $lang,
7870
                    $try_to_keep_the_string_length
7871
                );
7872 10
            },
7873 10
            $str
7874
        );
7875
7876 10
        return $str;
7877
    }
7878
7879
    /**
7880
     * Returns a trimmed string in proper title case.
7881
     *
7882
     * Also accepts an array, $ignore, allowing you to list words not to be
7883
     * capitalized.
7884
     *
7885
     * Adapted from John Gruber's script.
7886
     *
7887
     * @see https://gist.github.com/gruber/9f9e8650d68b13ce4d78
7888
     *
7889
     * @param string $str
7890
     * @param array  $ignore   <p>An array of words not to capitalize.</p>
7891
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7892
     *
7893
     * @return string
7894
     *                <p>The titleized string.</p>
7895
     */
7896
    public static function str_titleize_for_humans(
7897
        string $str,
7898
        array $ignore = [],
7899
        string $encoding = 'UTF-8'
7900
    ): string {
7901 35
        if ($str === '') {
7902
            return '';
7903
        }
7904
7905
        $small_words = [
7906 35
            '(?<!q&)a',
7907
            'an',
7908
            'and',
7909
            'as',
7910
            'at(?!&t)',
7911
            'but',
7912
            'by',
7913
            'en',
7914
            'for',
7915
            'if',
7916
            'in',
7917
            'of',
7918
            'on',
7919
            'or',
7920
            'the',
7921
            'to',
7922
            'v[.]?',
7923
            'via',
7924
            'vs[.]?',
7925
        ];
7926
7927 35
        if ($ignore !== []) {
7928 1
            $small_words = \array_merge($small_words, $ignore);
7929
        }
7930
7931 35
        $small_words_rx = \implode('|', $small_words);
7932 35
        $apostrophe_rx = '(?x: [\'’] [[:lower:]]* )?';
7933
7934 35
        $str = \trim($str);
7935
7936 35
        if (self::has_lowercase($str) === false) {
7937 2
            $str = self::strtolower($str, $encoding);
7938
        }
7939
7940
        // the main substitutions
7941 35
        $str = (string) \preg_replace_callback(
7942
            '~\\b (_*) (?:                                                         # 1. Leading underscore and
7943
                        ( (?<=[ ][/\\\\]) [[:alpha:]]+ [-_[:alpha:]/\\\\]+ |              # 2. file path or 
7944 35
                          [-_[:alpha:]]+ [@.:] [-_[:alpha:]@.:/]+ ' . $apostrophe_rx . ' ) #    URL, domain, or email
7945
                        |
7946 35
                        ( (?i: ' . $small_words_rx . ' ) ' . $apostrophe_rx . ' )            # 3. or small word (case-insensitive)
7947
                        |
7948 35
                        ( [[:alpha:]] [[:lower:]\'’()\[\]{}]* ' . $apostrophe_rx . ' )     # 4. or word w/o internal caps
7949
                        |
7950 35
                        ( [[:alpha:]] [[:alpha:]\'’()\[\]{}]* ' . $apostrophe_rx . ' )     # 5. or some other word
7951
                      ) (_*) \\b                                                          # 6. With trailing underscore
7952
                    ~ux',
7953
            /**
7954
             * @param string[] $matches
7955
             *
7956
             * @return string
7957
             */
7958
            static function (array $matches) use ($encoding): string {
7959
                // preserve leading underscore
7960 35
                $str = $matches[1];
7961 35
                if ($matches[2]) {
7962
                    // preserve URLs, domains, emails and file paths
7963 5
                    $str .= $matches[2];
7964 35
                } elseif ($matches[3]) {
7965
                    // lower-case small words
7966 25
                    $str .= self::strtolower($matches[3], $encoding);
7967 35
                } elseif ($matches[4]) {
7968
                    // capitalize word w/o internal caps
7969 34
                    $str .= static::ucfirst($matches[4], $encoding);
7970
                } else {
7971
                    // preserve other kinds of word (iPhone)
7972 7
                    $str .= $matches[5];
7973
                }
7974
                // preserve trailing underscore
7975 35
                $str .= $matches[6];
7976
7977 35
                return $str;
7978 35
            },
7979 35
            $str
7980
        );
7981
7982
        // Exceptions for small words: capitalize at start of title...
7983 35
        $str = (string) \preg_replace_callback(
7984
            '~(  \\A [[:punct:]]*            # start of title...
7985
                      |  [:.;?!][ ]+                # or of subsentence...
7986
                      |  [ ][\'"“‘(\[][ ]* )        # or of inserted subphrase...
7987 35
                      ( ' . $small_words_rx . ' ) \\b # ...followed by small word
7988
                     ~uxi',
7989
            /**
7990
             * @param string[] $matches
7991
             *
7992
             * @return string
7993
             */
7994
            static function (array $matches) use ($encoding): string {
7995 11
                return $matches[1] . static::ucfirst($matches[2], $encoding);
7996 35
            },
7997 35
            $str
7998
        );
7999
8000
        // ...and end of title
8001 35
        $str = (string) \preg_replace_callback(
8002 35
            '~\\b ( ' . $small_words_rx . ' ) # small word...
8003
                      (?= [[:punct:]]* \Z          # ...at the end of the title...
8004
                      |   [\'"’”)\]] [ ] )         # ...or of an inserted subphrase?
8005
                     ~uxi',
8006
            /**
8007
             * @param string[] $matches
8008
             *
8009
             * @return string
8010
             */
8011
            static function (array $matches) use ($encoding): string {
8012 3
                return static::ucfirst($matches[1], $encoding);
8013 35
            },
8014 35
            $str
8015
        );
8016
8017
        // Exceptions for small words in hyphenated compound words.
8018
        // e.g. "in-flight" -> In-Flight
8019 35
        $str = (string) \preg_replace_callback(
8020
            '~\\b
8021
                        (?<! -)                   # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (in-flight)
8022 35
                        ( ' . $small_words_rx . ' )
8023
                        (?= -[[:alpha:]]+)        # lookahead for "-someword"
8024
                       ~uxi',
8025
            /**
8026
             * @param string[] $matches
8027
             *
8028
             * @return string
8029
             */
8030
            static function (array $matches) use ($encoding): string {
8031
                return static::ucfirst($matches[1], $encoding);
8032 35
            },
8033 35
            $str
8034
        );
8035
8036
        // e.g. "Stand-in" -> "Stand-In" (Stand is already capped at this point)
8037 35
        $str = (string) \preg_replace_callback(
8038
            '~\\b
8039
                      (?<!…)                    # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (stand-in)
8040
                      ( [[:alpha:]]+- )         # $1 = first word and hyphen, should already be properly capped
8041 35
                      ( ' . $small_words_rx . ' ) # ...followed by small word
8042
                      (?!	- )                 # Negative lookahead for another -
8043
                     ~uxi',
8044
            /**
8045
             * @param string[] $matches
8046
             *
8047
             * @return string
8048
             */
8049
            static function (array $matches) use ($encoding): string {
8050
                return $matches[1] . static::ucfirst($matches[2], $encoding);
8051 35
            },
8052 35
            $str
8053
        );
8054
8055 35
        return $str;
8056
    }
8057
8058
    /**
8059
     * Get a binary representation of a specific string.
8060
     *
8061
     * @param string $str <p>The input string.</p>
8062
     *
8063
     * @return false|string
8064
     *                      <p>false on error</p>
8065
     */
8066
    public static function str_to_binary(string $str)
8067
    {
8068
        /** @var array|false $value - needed for PhpStan (stubs error) */
8069 2
        $value = \unpack('H*', $str);
8070 2
        if ($value === false) {
8071
            return false;
8072
        }
8073
8074
        /** @noinspection OffsetOperationsInspection */
8075 2
        return \base_convert($value[1], 16, 2);
8076
    }
8077
8078
    /**
8079
     * @param string   $str
8080
     * @param bool     $remove_empty_values <p>Remove empty values.</p>
8081
     * @param int|null $remove_short_values <p>The min. string length or null to disable</p>
8082
     *
8083
     * @return string[]
8084
     */
8085
    public static function str_to_lines(string $str, bool $remove_empty_values = false, int $remove_short_values = null): array
8086
    {
8087 17
        if ($str === '') {
8088 1
            return $remove_empty_values === true ? [] : [''];
8089
        }
8090
8091 16
        if (self::$SUPPORT['mbstring'] === true) {
8092
            /** @noinspection PhpComposerExtensionStubsInspection */
8093 16
            $return = \mb_split("[\r\n]{1,2}", $str);
8094
        } else {
8095
            $return = \preg_split("/[\r\n]{1,2}/u", $str);
8096
        }
8097
8098 16
        if ($return === false) {
8099
            return $remove_empty_values === true ? [] : [''];
8100
        }
8101
8102
        if (
8103 16
            $remove_short_values === null
8104
            &&
8105 16
            $remove_empty_values === false
8106
        ) {
8107 16
            return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
8108
        }
8109
8110
        return self::reduce_string_array(
8111
            $return,
8112
            $remove_empty_values,
8113
            $remove_short_values
8114
        );
8115
    }
8116
8117
    /**
8118
     * Convert a string into an array of words.
8119
     *
8120
     * @param string   $str
8121
     * @param string   $char_list           <p>Additional chars for the definition of "words".</p>
8122
     * @param bool     $remove_empty_values <p>Remove empty values.</p>
8123
     * @param int|null $remove_short_values <p>The min. string length or null to disable</p>
8124
     *
8125
     * @return string[]
8126
     */
8127
    public static function str_to_words(
8128
        string $str,
8129
        string $char_list = '',
8130
        bool $remove_empty_values = false,
8131
        int $remove_short_values = null
8132
    ): array {
8133 13
        if ($str === '') {
8134 4
            return $remove_empty_values === true ? [] : [''];
8135
        }
8136
8137 13
        $char_list = self::rxClass($char_list, '\pL');
8138
8139 13
        $return = \preg_split("/({$char_list}+(?:[\p{Pd}’']{$char_list}+)*)/u", $str, -1, \PREG_SPLIT_DELIM_CAPTURE);
8140 13
        if ($return === false) {
8141
            return $remove_empty_values === true ? [] : [''];
8142
        }
8143
8144
        if (
8145 13
            $remove_short_values === null
8146
            &&
8147 13
            $remove_empty_values === false
8148
        ) {
8149 13
            return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
8150
        }
8151
8152 2
        $tmp_return = self::reduce_string_array(
8153 2
            $return,
8154 2
            $remove_empty_values,
8155 2
            $remove_short_values
8156
        );
8157
8158 2
        foreach ($tmp_return as &$item) {
8159 2
            $item = (string) $item;
8160
        }
8161
8162 2
        return $tmp_return;
8163
    }
8164
8165
    /**
8166
     * alias for "UTF8::to_ascii()"
8167
     *
8168
     * @param string $str
8169
     * @param string $unknown
8170
     * @param bool   $strict
8171
     *
8172
     * @return string
8173
     *
8174
     * @see UTF8::to_ascii()
8175
     * @deprecated <p>please use "UTF8::to_ascii()"</p>
8176
     */
8177
    public static function str_transliterate(
8178
        string $str,
8179
        string $unknown = '?',
8180
        bool $strict = false
8181
    ): string {
8182 7
        return self::to_ascii($str, $unknown, $strict);
8183
    }
8184
8185
    /**
8186
     * Truncates the string to a given length. If $substring is provided, and
8187
     * truncating occurs, the string is further truncated so that the substring
8188
     * may be appended without exceeding the desired length.
8189
     *
8190
     * @param string $str
8191
     * @param int    $length    <p>Desired length of the truncated string.</p>
8192
     * @param string $substring [optional] <p>The substring to append if it can fit. Default: ''</p>
8193
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
8194
     *
8195
     * @return string
8196
     *                <p>A string after truncating.</p>
8197
     */
8198
    public static function str_truncate(
8199
        string $str,
8200
        int $length,
8201
        string $substring = '',
8202
        string $encoding = 'UTF-8'
8203
    ): string {
8204 22
        if ($str === '') {
8205
            return '';
8206
        }
8207
8208 22
        if ($encoding === 'UTF-8') {
8209 10
            if ($length >= (int) \mb_strlen($str)) {
8210 2
                return $str;
8211
            }
8212
8213 8
            if ($substring !== '') {
8214 4
                $length -= (int) \mb_strlen($substring);
8215
8216
                /** @noinspection UnnecessaryCastingInspection */
8217 4
                return (string) \mb_substr($str, 0, $length) . $substring;
8218
            }
8219
8220
            /** @noinspection UnnecessaryCastingInspection */
8221 4
            return (string) \mb_substr($str, 0, $length);
8222
        }
8223
8224 12
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
8225
8226 12
        if ($length >= (int) self::strlen($str, $encoding)) {
8227 2
            return $str;
8228
        }
8229
8230 10
        if ($substring !== '') {
8231 6
            $length -= (int) self::strlen($substring, $encoding);
8232
        }
8233
8234
        return (
8235 10
               (string) self::substr(
8236 10
                   $str,
8237 10
                   0,
8238 10
                   $length,
8239 10
                   $encoding
8240
               )
8241 10
               ) . $substring;
8242
    }
8243
8244
    /**
8245
     * Truncates the string to a given length, while ensuring that it does not
8246
     * split words. If $substring is provided, and truncating occurs, the
8247
     * string is further truncated so that the substring may be appended without
8248
     * exceeding the desired length.
8249
     *
8250
     * @param string $str
8251
     * @param int    $length                                 <p>Desired length of the truncated string.</p>
8252
     * @param string $substring                              [optional] <p>The substring to append if it can fit. Default:
8253
     *                                                       ''</p>
8254
     * @param string $encoding                               [optional] <p>Default: 'UTF-8'</p>
8255
     * @param bool   $ignore_do_not_split_words_for_one_word [optional] <p>Default: false</p>
8256
     *
8257
     * @return string
8258
     *                <p>A string after truncating.</p>
8259
     */
8260
    public static function str_truncate_safe(
8261
        string $str,
8262
        int $length,
8263
        string $substring = '',
8264
        string $encoding = 'UTF-8',
8265
        bool $ignore_do_not_split_words_for_one_word = false
8266
    ): string {
8267 47
        if ($str === '' || $length <= 0) {
8268 1
            return $substring;
8269
        }
8270
8271 47
        if ($encoding === 'UTF-8') {
8272 21
            if ($length >= (int) \mb_strlen($str)) {
8273 5
                return $str;
8274
            }
8275
8276
            // need to further trim the string so we can append the substring
8277 17
            $length -= (int) \mb_strlen($substring);
8278 17
            if ($length <= 0) {
8279 1
                return $substring;
8280
            }
8281
8282
            /** @var false|string $truncated - needed for PhpStan (stubs error) */
8283 17
            $truncated = \mb_substr($str, 0, $length);
8284 17
            if ($truncated === false) {
8285
                return '';
8286
            }
8287
8288
            // if the last word was truncated
8289 17
            $space_position = \mb_strpos($str, ' ', $length - 1);
8290 17
            if ($space_position !== $length) {
8291
                // find pos of the last occurrence of a space, get up to that
8292 13
                $last_position = \mb_strrpos($truncated, ' ', 0);
8293
8294
                if (
8295 13
                    $last_position !== false
8296
                    ||
8297 13
                    ($space_position !== false && $ignore_do_not_split_words_for_one_word === false)
8298
                ) {
8299 17
                    $truncated = (string) \mb_substr($truncated, 0, (int) $last_position);
8300
                }
8301
            }
8302
        } else {
8303 26
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8304
8305 26
            if ($length >= (int) self::strlen($str, $encoding)) {
8306 4
                return $str;
8307
            }
8308
8309
            // need to further trim the string so we can append the substring
8310 22
            $length -= (int) self::strlen($substring, $encoding);
8311 22
            if ($length <= 0) {
8312
                return $substring;
8313
            }
8314
8315 22
            $truncated = self::substr($str, 0, $length, $encoding);
8316
8317 22
            if ($truncated === false) {
8318
                return '';
8319
            }
8320
8321
            // if the last word was truncated
8322 22
            $space_position = self::strpos($str, ' ', $length - 1, $encoding);
8323 22
            if ($space_position !== $length) {
8324
                // find pos of the last occurrence of a space, get up to that
8325 12
                $last_position = self::strrpos($truncated, ' ', 0, $encoding);
8326
8327
                if (
8328 12
                    $last_position !== false
8329
                    ||
8330 12
                    ($space_position !== false && $ignore_do_not_split_words_for_one_word === false)
8331
                ) {
8332 9
                    $truncated = (string) self::substr($truncated, 0, (int) $last_position, $encoding);
8333
                }
8334
            }
8335
        }
8336
8337 39
        return $truncated . $substring;
8338
    }
8339
8340
    /**
8341
     * Returns a lowercase and trimmed string separated by underscores.
8342
     * Underscores are inserted before uppercase characters (with the exception
8343
     * of the first character of the string), and in place of spaces as well as
8344
     * dashes.
8345
     *
8346
     * @param string $str
8347
     *
8348
     * @return string
8349
     *                <p>The underscored string.</p>
8350
     */
8351
    public static function str_underscored(string $str): string
8352
    {
8353 16
        return self::str_delimit($str, '_');
8354
    }
8355
8356
    /**
8357
     * Returns an UpperCamelCase version of the supplied string. It trims
8358
     * surrounding spaces, capitalizes letters following digits, spaces, dashes
8359
     * and underscores, and removes spaces, dashes, underscores.
8360
     *
8361
     * @param string      $str                           <p>The input string.</p>
8362
     * @param string      $encoding                      [optional] <p>Default: 'UTF-8'</p>
8363
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
8364
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt, tr</p>
8365
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
8366
     *
8367
     * @return string
8368
     *                <p>A string in UpperCamelCase.</p>
8369
     */
8370
    public static function str_upper_camelize(
8371
        string $str,
8372
        string $encoding = 'UTF-8',
8373
        bool $clean_utf8 = false,
8374
        string $lang = null,
8375
        bool $try_to_keep_the_string_length = false
8376
    ): string {
8377 13
        return self::ucfirst(self::str_camelize($str, $encoding), $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
8378
    }
8379
8380
    /**
8381
     * alias for "UTF8::ucfirst()"
8382
     *
8383
     * @param string      $str
8384
     * @param string      $encoding
8385
     * @param bool        $clean_utf8
8386
     * @param string|null $lang
8387
     * @param bool        $try_to_keep_the_string_length
8388
     *
8389
     * @return string
8390
     *
8391
     * @see UTF8::ucfirst()
8392
     * @deprecated <p>please use "UTF8::ucfirst()"</p>
8393
     */
8394
    public static function str_upper_first(
8395
        string $str,
8396
        string $encoding = 'UTF-8',
8397
        bool $clean_utf8 = false,
8398
        string $lang = null,
8399
        bool $try_to_keep_the_string_length = false
8400
    ): string {
8401 5
        return self::ucfirst(
8402 5
            $str,
8403 5
            $encoding,
8404 5
            $clean_utf8,
8405 5
            $lang,
8406 5
            $try_to_keep_the_string_length
8407
        );
8408
    }
8409
8410
    /**
8411
     * Get the number of words in a specific string.
8412
     *
8413
     * @param string $str       <p>The input string.</p>
8414
     * @param int    $format    [optional] <p>
8415
     *                          <strong>0</strong> => return a number of words (default)<br>
8416
     *                          <strong>1</strong> => return an array of words<br>
8417
     *                          <strong>2</strong> => return an array of words with word-offset as key
8418
     *                          </p>
8419
     * @param string $char_list [optional] <p>Additional chars that contains to words and do not start a new word.</p>
8420
     *
8421
     * @return int|string[] The number of words in the string
8422
     */
8423
    public static function str_word_count(string $str, int $format = 0, string $char_list = '')
8424
    {
8425 2
        $str_parts = self::str_to_words($str, $char_list);
8426
8427 2
        $len = \count($str_parts);
8428
8429 2
        if ($format === 1) {
8430 2
            $number_of_words = [];
8431 2
            for ($i = 1; $i < $len; $i += 2) {
8432 2
                $number_of_words[] = $str_parts[$i];
8433
            }
8434 2
        } elseif ($format === 2) {
8435 2
            $number_of_words = [];
8436 2
            $offset = (int) self::strlen($str_parts[0]);
8437 2
            for ($i = 1; $i < $len; $i += 2) {
8438 2
                $number_of_words[$offset] = $str_parts[$i];
8439 2
                $offset += (int) self::strlen($str_parts[$i]) + (int) self::strlen($str_parts[$i + 1]);
8440
            }
8441
        } else {
8442 2
            $number_of_words = (int) (($len - 1) / 2);
8443
        }
8444
8445 2
        return $number_of_words;
8446
    }
8447
8448
    /**
8449
     * Case-insensitive string comparison.
8450
     *
8451
     * INFO: Case-insensitive version of UTF8::strcmp()
8452
     *
8453
     * @param string $str1     <p>The first string.</p>
8454
     * @param string $str2     <p>The second string.</p>
8455
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8456
     *
8457
     * @return int
8458
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
8459
     *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
8460
     *             <strong>0</strong> if they are equal
8461
     */
8462
    public static function strcasecmp(
8463
        string $str1,
8464
        string $str2,
8465
        string $encoding = 'UTF-8'
8466
    ): int {
8467 23
        return self::strcmp(
8468 23
            self::strtocasefold(
8469 23
                $str1,
8470 23
                true,
8471 23
                false,
8472 23
                $encoding,
8473 23
                null,
8474 23
                false
8475
            ),
8476 23
            self::strtocasefold(
8477 23
                $str2,
8478 23
                true,
8479 23
                false,
8480 23
                $encoding,
8481 23
                null,
8482 23
                false
8483
            )
8484
        );
8485
    }
8486
8487
    /**
8488
     * alias for "UTF8::strstr()"
8489
     *
8490
     * @param string $haystack
8491
     * @param string $needle
8492
     * @param bool   $before_needle
8493
     * @param string $encoding
8494
     * @param bool   $clean_utf8
8495
     *
8496
     * @return false|string
8497
     *
8498
     * @see UTF8::strstr()
8499
     * @deprecated <p>please use "UTF8::strstr()"</p>
8500
     */
8501
    public static function strchr(
8502
        string $haystack,
8503
        string $needle,
8504
        bool $before_needle = false,
8505
        string $encoding = 'UTF-8',
8506
        bool $clean_utf8 = false
8507
    ) {
8508 2
        return self::strstr(
8509 2
            $haystack,
8510 2
            $needle,
8511 2
            $before_needle,
8512 2
            $encoding,
8513 2
            $clean_utf8
8514
        );
8515
    }
8516
8517
    /**
8518
     * Case-sensitive string comparison.
8519
     *
8520
     * @param string $str1 <p>The first string.</p>
8521
     * @param string $str2 <p>The second string.</p>
8522
     *
8523
     * @return int
8524
     *             <strong>&lt; 0</strong> if str1 is less than str2<br>
8525
     *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
8526
     *             <strong>0</strong> if they are equal
8527
     */
8528
    public static function strcmp(string $str1, string $str2): int
8529
    {
8530 29
        if ($str1 === $str2) {
8531 21
            return 0;
8532
        }
8533
8534 24
        return \strcmp(
8535 24
            \Normalizer::normalize($str1, \Normalizer::NFD),
8536 24
            \Normalizer::normalize($str2, \Normalizer::NFD)
8537
        );
8538
    }
8539
8540
    /**
8541
     * Find length of initial segment not matching mask.
8542
     *
8543
     * @param string $str
8544
     * @param string $char_list
8545
     * @param int    $offset
8546
     * @param int    $length
8547
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
8548
     *
8549
     * @return int
8550
     */
8551
    public static function strcspn(
8552
        string $str,
8553
        string $char_list,
8554
        int $offset = null,
8555
        int $length = null,
8556
        string $encoding = 'UTF-8'
8557
    ): int {
8558 12
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8559
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8560
        }
8561
8562 12
        if ($char_list === '') {
8563 2
            return (int) self::strlen($str, $encoding);
8564
        }
8565
8566 11
        if ($offset !== null || $length !== null) {
8567 3
            if ($encoding === 'UTF-8') {
8568 3
                if ($length === null) {
8569
                    /** @noinspection UnnecessaryCastingInspection */
8570 2
                    $str_tmp = \mb_substr($str, (int) $offset);
8571
                } else {
8572
                    /** @noinspection UnnecessaryCastingInspection */
8573 3
                    $str_tmp = \mb_substr($str, (int) $offset, $length);
8574
                }
8575
            } else {
8576
                /** @noinspection UnnecessaryCastingInspection */
8577
                $str_tmp = self::substr($str, (int) $offset, $length, $encoding);
8578
            }
8579
8580 3
            if ($str_tmp === false) {
8581
                return 0;
8582
            }
8583
8584
            /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
8585 3
            $str = $str_tmp;
8586
        }
8587
8588 11
        if ($str === '') {
8589 2
            return 0;
8590
        }
8591
8592 10
        $matches = [];
8593 10
        if (\preg_match('/^(.*?)' . self::rxClass($char_list) . '/us', $str, $matches)) {
8594 9
            $return = self::strlen($matches[1], $encoding);
8595 9
            if ($return === false) {
8596
                return 0;
8597
            }
8598
8599 9
            return $return;
8600
        }
8601
8602 2
        return (int) self::strlen($str, $encoding);
8603
    }
8604
8605
    /**
8606
     * alias for "UTF8::stristr()"
8607
     *
8608
     * @param string $haystack
8609
     * @param string $needle
8610
     * @param bool   $before_needle
8611
     * @param string $encoding
8612
     * @param bool   $clean_utf8
8613
     *
8614
     * @return false|string
8615
     *
8616
     * @see UTF8::stristr()
8617
     * @deprecated <p>please use "UTF8::stristr()"</p>
8618
     */
8619
    public static function strichr(
8620
        string $haystack,
8621
        string $needle,
8622
        bool $before_needle = false,
8623
        string $encoding = 'UTF-8',
8624
        bool $clean_utf8 = false
8625
    ) {
8626 1
        return self::stristr(
8627 1
            $haystack,
8628 1
            $needle,
8629 1
            $before_needle,
8630 1
            $encoding,
8631 1
            $clean_utf8
8632
        );
8633
    }
8634
8635
    /**
8636
     * Create a UTF-8 string from code points.
8637
     *
8638
     * INFO: opposite to UTF8::codepoints()
8639
     *
8640
     * @param array $array <p>Integer or Hexadecimal codepoints.</p>
8641
     *
8642
     * @return string
8643
     *                <p>A UTF-8 encoded string.</p>
8644
     */
8645
    public static function string(array $array): string
8646
    {
8647 4
        if ($array === []) {
8648 4
            return  '';
8649
        }
8650
8651 4
        $str = '';
8652 4
        foreach ($array as $strPart) {
8653 4
            $str .= '&#' . (int) $strPart . ';';
8654
        }
8655
8656 4
        return self::html_entity_decode($str, \ENT_QUOTES | \ENT_HTML5);
8657
    }
8658
8659
    /**
8660
     * Checks if string starts with "BOM" (Byte Order Mark Character) character.
8661
     *
8662
     * @param string $str <p>The input string.</p>
8663
     *
8664
     * @return bool
8665
     *              <strong>true</strong> if the string has BOM at the start,<br>
8666
     *              <strong>false</strong> otherwise
8667
     */
8668
    public static function string_has_bom(string $str): bool
8669
    {
8670
        /** @noinspection PhpUnusedLocalVariableInspection */
8671 6
        foreach (self::$BOM as $bom_string => &$bom_byte_length) {
8672 6
            if (\strpos($str, $bom_string) === 0) {
8673 6
                return true;
8674
            }
8675
        }
8676
8677 6
        return false;
8678
    }
8679
8680
    /**
8681
     * Strip HTML and PHP tags from a string + clean invalid UTF-8.
8682
     *
8683
     * @see http://php.net/manual/en/function.strip-tags.php
8684
     *
8685
     * @param string $str            <p>
8686
     *                               The input string.
8687
     *                               </p>
8688
     * @param string $allowable_tags [optional] <p>
8689
     *                               You can use the optional second parameter to specify tags which should
8690
     *                               not be stripped.
8691
     *                               </p>
8692
     *                               <p>
8693
     *                               HTML comments and PHP tags are also stripped. This is hardcoded and
8694
     *                               can not be changed with allowable_tags.
8695
     *                               </p>
8696
     * @param bool   $clean_utf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
8697
     *
8698
     * @return string
8699
     *                <p>The stripped string.</p>
8700
     */
8701
    public static function strip_tags(
8702
        string $str,
8703
        string $allowable_tags = null,
8704
        bool $clean_utf8 = false
8705
    ): string {
8706 4
        if ($str === '') {
8707 1
            return '';
8708
        }
8709
8710 4
        if ($clean_utf8 === true) {
8711 2
            $str = self::clean($str);
8712
        }
8713
8714 4
        if ($allowable_tags === null) {
8715 4
            return \strip_tags($str);
8716
        }
8717
8718 2
        return \strip_tags($str, $allowable_tags);
8719
    }
8720
8721
    /**
8722
     * Strip all whitespace characters. This includes tabs and newline
8723
     * characters, as well as multibyte whitespace such as the thin space
8724
     * and ideographic space.
8725
     *
8726
     * @param string $str
8727
     *
8728
     * @return string
8729
     */
8730
    public static function strip_whitespace(string $str): string
8731
    {
8732 36
        if ($str === '') {
8733 3
            return '';
8734
        }
8735
8736 33
        return (string) \preg_replace('/[[:space:]]+/u', '', $str);
8737
    }
8738
8739
    /**
8740
     * Find the position of the first occurrence of a substring in a string, case-insensitive.
8741
     *
8742
     * @see http://php.net/manual/en/function.mb-stripos.php
8743
     *
8744
     * @param string $haystack   <p>The string from which to get the position of the first occurrence of needle.</p>
8745
     * @param string $needle     <p>The string to find in haystack.</p>
8746
     * @param int    $offset     [optional] <p>The position in haystack to start searching.</p>
8747
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
8748
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8749
     *
8750
     * @return false|int
8751
     *                   Return the <strong>(int)</strong> numeric position of the first occurrence of needle in the
8752
     *                   haystack string,<br> or <strong>false</strong> if needle is not found
8753
     */
8754
    public static function stripos(
8755
        string $haystack,
8756
        string $needle,
8757
        int $offset = 0,
8758
        $encoding = 'UTF-8',
8759
        bool $clean_utf8 = false
8760
    ) {
8761 24
        if ($haystack === '' || $needle === '') {
8762 5
            return false;
8763
        }
8764
8765 23
        if ($clean_utf8 === true) {
8766
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8767
            // if invalid characters are found in $haystack before $needle
8768 1
            $haystack = self::clean($haystack);
8769 1
            $needle = self::clean($needle);
8770
        }
8771
8772 23
        if (self::$SUPPORT['mbstring'] === true) {
8773 23
            if ($encoding === 'UTF-8') {
8774 23
                return \mb_stripos($haystack, $needle, $offset);
8775
            }
8776
8777 3
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8778
8779 3
            return \mb_stripos($haystack, $needle, $offset, $encoding);
8780
        }
8781
8782 2
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
8783
8784
        if (
8785 2
            $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
8786
            &&
8787 2
            $offset >= 0 // grapheme_stripos() can't handle negative offset
8788
            &&
8789 2
            self::$SUPPORT['intl'] === true
8790
        ) {
8791
            $return_tmp = \grapheme_stripos($haystack, $needle, $offset);
8792
            if ($return_tmp !== false) {
8793
                return $return_tmp;
8794
            }
8795
        }
8796
8797
        //
8798
        // fallback for ascii only
8799
        //
8800
8801 2
        if (ASCII::is_ascii($haystack . $needle)) {
8802
            return \stripos($haystack, $needle, $offset);
8803
        }
8804
8805
        //
8806
        // fallback via vanilla php
8807
        //
8808
8809 2
        $haystack = self::strtocasefold($haystack, true, false, $encoding, null, false);
8810 2
        $needle = self::strtocasefold($needle, true, false, $encoding, null, false);
8811
8812 2
        return self::strpos($haystack, $needle, $offset, $encoding);
8813
    }
8814
8815
    /**
8816
     * Returns all of haystack starting from and including the first occurrence of needle to the end.
8817
     *
8818
     * @param string $haystack      <p>The input string. Must be valid UTF-8.</p>
8819
     * @param string $needle        <p>The string to look for. Must be valid UTF-8.</p>
8820
     * @param bool   $before_needle [optional] <p>
8821
     *                              If <b>TRUE</b>, it returns the part of the
8822
     *                              haystack before the first occurrence of the needle (excluding the needle).
8823
     *                              </p>
8824
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
8825
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
8826
     *
8827
     * @return false|string
8828
     *                      <p>A sub-string,<br>or <strong>false</strong> if needle is not found.</p>
8829
     */
8830
    public static function stristr(
8831
        string $haystack,
8832
        string $needle,
8833
        bool $before_needle = false,
8834
        string $encoding = 'UTF-8',
8835
        bool $clean_utf8 = false
8836
    ) {
8837 12
        if ($haystack === '' || $needle === '') {
8838 3
            return false;
8839
        }
8840
8841 9
        if ($clean_utf8 === true) {
8842
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8843
            // if invalid characters are found in $haystack before $needle
8844 1
            $needle = self::clean($needle);
8845 1
            $haystack = self::clean($haystack);
8846
        }
8847
8848 9
        if (!$needle) {
8849
            return $haystack;
8850
        }
8851
8852 9
        if (self::$SUPPORT['mbstring'] === true) {
8853 9
            if ($encoding === 'UTF-8') {
8854 9
                return \mb_stristr($haystack, $needle, $before_needle);
8855
            }
8856
8857 1
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8858
8859 1
            return \mb_stristr($haystack, $needle, $before_needle, $encoding);
8860
        }
8861
8862
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
8863
8864
        if (
8865
            $encoding !== 'UTF-8'
8866
            &&
8867
            self::$SUPPORT['mbstring'] === false
8868
        ) {
8869
            \trigger_error('UTF8::stristr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
8870
        }
8871
8872
        if (
8873
            $encoding === 'UTF-8' // INFO: "grapheme_stristr()" can't handle other encodings
8874
            &&
8875
            self::$SUPPORT['intl'] === true
8876
        ) {
8877
            $return_tmp = \grapheme_stristr($haystack, $needle, $before_needle);
8878
            if ($return_tmp !== false) {
8879
                return $return_tmp;
8880
            }
8881
        }
8882
8883
        if (ASCII::is_ascii($needle . $haystack)) {
8884
            return \stristr($haystack, $needle, $before_needle);
8885
        }
8886
8887
        \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/usi', $haystack, $match);
8888
8889
        if (!isset($match[1])) {
8890
            return false;
8891
        }
8892
8893
        if ($before_needle) {
8894
            return $match[1];
8895
        }
8896
8897
        return self::substr($haystack, (int) self::strlen($match[1], $encoding), null, $encoding);
8898
    }
8899
8900
    /**
8901
     * Get the string length, not the byte-length!
8902
     *
8903
     * @see http://php.net/manual/en/function.mb-strlen.php
8904
     *
8905
     * @param string $str        <p>The string being checked for length.</p>
8906
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
8907
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8908
     *
8909
     * @return false|int
8910
     *                   <p>
8911
     *                   The number <strong>(int)</strong> of characters in the string $str having character encoding
8912
     *                   $encoding.
8913
     *                   (One multi-byte character counted as +1).
8914
     *                   <br>
8915
     *                   Can return <strong>false</strong>, if e.g. mbstring is not installed and we process invalid
8916
     *                   chars.
8917
     *                   </p>
8918
     */
8919
    public static function strlen(
8920
        string $str,
8921
        string $encoding = 'UTF-8',
8922
        bool $clean_utf8 = false
8923
    ) {
8924 173
        if ($str === '') {
8925 21
            return 0;
8926
        }
8927
8928 171
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8929 12
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8930
        }
8931
8932 171
        if ($clean_utf8 === true) {
8933
            // "mb_strlen" and "\iconv_strlen" returns wrong length,
8934
            // if invalid characters are found in $str
8935 4
            $str = self::clean($str);
8936
        }
8937
8938
        //
8939
        // fallback via mbstring
8940
        //
8941
8942 171
        if (self::$SUPPORT['mbstring'] === true) {
8943 165
            if ($encoding === 'UTF-8') {
8944 165
                return \mb_strlen($str);
8945
            }
8946
8947 4
            return \mb_strlen($str, $encoding);
8948
        }
8949
8950
        //
8951
        // fallback for binary || ascii only
8952
        //
8953
8954
        if (
8955 8
            $encoding === 'CP850'
8956
            ||
8957 8
            $encoding === 'ASCII'
8958
        ) {
8959
            return \strlen($str);
8960
        }
8961
8962
        if (
8963 8
            $encoding !== 'UTF-8'
8964
            &&
8965 8
            self::$SUPPORT['mbstring'] === false
8966
            &&
8967 8
            self::$SUPPORT['iconv'] === false
8968
        ) {
8969 2
            \trigger_error('UTF8::strlen() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
8970
        }
8971
8972
        //
8973
        // fallback via iconv
8974
        //
8975
8976 8
        if (self::$SUPPORT['iconv'] === true) {
8977
            $return_tmp = \iconv_strlen($str, $encoding);
8978
            if ($return_tmp !== false) {
8979
                return $return_tmp;
8980
            }
8981
        }
8982
8983
        //
8984
        // fallback via intl
8985
        //
8986
8987
        if (
8988 8
            $encoding === 'UTF-8' // INFO: "grapheme_strlen()" can't handle other encodings
8989
            &&
8990 8
            self::$SUPPORT['intl'] === true
8991
        ) {
8992
            $return_tmp = \grapheme_strlen($str);
8993
            if ($return_tmp !== null) {
8994
                return $return_tmp;
8995
            }
8996
        }
8997
8998
        //
8999
        // fallback for ascii only
9000
        //
9001
9002 8
        if (ASCII::is_ascii($str)) {
9003 4
            return \strlen($str);
9004
        }
9005
9006
        //
9007
        // fallback via vanilla php
9008
        //
9009
9010 8
        \preg_match_all('/./us', $str, $parts);
9011
9012 8
        $return_tmp = \count($parts[0]);
9013 8
        if ($return_tmp === 0) {
9014
            return false;
9015
        }
9016
9017 8
        return $return_tmp;
9018
    }
9019
9020
    /**
9021
     * Get string length in byte.
9022
     *
9023
     * @param string $str
9024
     *
9025
     * @return int
9026
     */
9027
    public static function strlen_in_byte(string $str): int
9028
    {
9029
        if ($str === '') {
9030
            return 0;
9031
        }
9032
9033
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
9034
            // "mb_" is available if overload is used, so use it ...
9035
            return \mb_strlen($str, 'CP850'); // 8-BIT
9036
        }
9037
9038
        return \strlen($str);
9039
    }
9040
9041
    /**
9042
     * Case-insensitive string comparisons using a "natural order" algorithm.
9043
     *
9044
     * INFO: natural order version of UTF8::strcasecmp()
9045
     *
9046
     * @param string $str1     <p>The first string.</p>
9047
     * @param string $str2     <p>The second string.</p>
9048
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
9049
     *
9050
     * @return int
9051
     *             <strong>&lt; 0</strong> if str1 is less than str2<br>
9052
     *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
9053
     *             <strong>0</strong> if they are equal
9054
     */
9055
    public static function strnatcasecmp(string $str1, string $str2, string $encoding = 'UTF-8'): int
9056
    {
9057 2
        return self::strnatcmp(
9058 2
            self::strtocasefold($str1, true, false, $encoding, null, false),
9059 2
            self::strtocasefold($str2, true, false, $encoding, null, false)
9060
        );
9061
    }
9062
9063
    /**
9064
     * String comparisons using a "natural order" algorithm
9065
     *
9066
     * INFO: natural order version of UTF8::strcmp()
9067
     *
9068
     * @see http://php.net/manual/en/function.strnatcmp.php
9069
     *
9070
     * @param string $str1 <p>The first string.</p>
9071
     * @param string $str2 <p>The second string.</p>
9072
     *
9073
     * @return int
9074
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
9075
     *             <strong>&gt; 0</strong> if str1 is greater than str2;<br>
9076
     *             <strong>0</strong> if they are equal
9077
     */
9078
    public static function strnatcmp(string $str1, string $str2): int
9079
    {
9080 4
        if ($str1 === $str2) {
9081 4
            return 0;
9082
        }
9083
9084 4
        return \strnatcmp(
9085 4
            (string) self::strtonatfold($str1),
9086 4
            (string) self::strtonatfold($str2)
9087
        );
9088
    }
9089
9090
    /**
9091
     * Case-insensitive string comparison of the first n characters.
9092
     *
9093
     * @see http://php.net/manual/en/function.strncasecmp.php
9094
     *
9095
     * @param string $str1     <p>The first string.</p>
9096
     * @param string $str2     <p>The second string.</p>
9097
     * @param int    $len      <p>The length of strings to be used in the comparison.</p>
9098
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
9099
     *
9100
     * @return int
9101
     *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
9102
     *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
9103
     *             <strong>0</strong> if they are equal
9104
     */
9105
    public static function strncasecmp(
9106
        string $str1,
9107
        string $str2,
9108
        int $len,
9109
        string $encoding = 'UTF-8'
9110
    ): int {
9111 2
        return self::strncmp(
9112 2
            self::strtocasefold($str1, true, false, $encoding, null, false),
9113 2
            self::strtocasefold($str2, true, false, $encoding, null, false),
9114 2
            $len
9115
        );
9116
    }
9117
9118
    /**
9119
     * String comparison of the first n characters.
9120
     *
9121
     * @see http://php.net/manual/en/function.strncmp.php
9122
     *
9123
     * @param string $str1     <p>The first string.</p>
9124
     * @param string $str2     <p>The second string.</p>
9125
     * @param int    $len      <p>Number of characters to use in the comparison.</p>
9126
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
9127
     *
9128
     * @return int
9129
     *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
9130
     *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
9131
     *             <strong>0</strong> if they are equal
9132
     */
9133
    public static function strncmp(
9134
        string $str1,
9135
        string $str2,
9136
        int $len,
9137
        string $encoding = 'UTF-8'
9138
    ): int {
9139 4
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9140
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9141
        }
9142
9143 4
        if ($encoding === 'UTF-8') {
9144 4
            $str1 = (string) \mb_substr($str1, 0, $len);
9145 4
            $str2 = (string) \mb_substr($str2, 0, $len);
9146
        } else {
9147
            $str1 = (string) self::substr($str1, 0, $len, $encoding);
9148
            $str2 = (string) self::substr($str2, 0, $len, $encoding);
9149
        }
9150
9151 4
        return self::strcmp($str1, $str2);
9152
    }
9153
9154
    /**
9155
     * Search a string for any of a set of characters.
9156
     *
9157
     * @see http://php.net/manual/en/function.strpbrk.php
9158
     *
9159
     * @param string $haystack  <p>The string where char_list is looked for.</p>
9160
     * @param string $char_list <p>This parameter is case-sensitive.</p>
9161
     *
9162
     * @return false|string string starting from the character found, or false if it is not found
9163
     */
9164
    public static function strpbrk(string $haystack, string $char_list)
9165
    {
9166 2
        if ($haystack === '' || $char_list === '') {
9167 2
            return false;
9168
        }
9169
9170 2
        if (\preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
9171 2
            return \substr($haystack, (int) \strpos($haystack, $m[0]));
9172
        }
9173
9174 2
        return false;
9175
    }
9176
9177
    /**
9178
     * Find the position of the first occurrence of a substring in a string.
9179
     *
9180
     * @see http://php.net/manual/en/function.mb-strpos.php
9181
     *
9182
     * @param string     $haystack   <p>The string from which to get the position of the first occurrence of needle.</p>
9183
     * @param int|string $needle     <p>The string to find in haystack.<br>Or a code point as int.</p>
9184
     * @param int        $offset     [optional] <p>The search offset. If it is not specified, 0 is used.</p>
9185
     * @param string     $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
9186
     * @param bool       $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9187
     *
9188
     * @return false|int
9189
     *                   The <strong>(int)</strong> numeric position of the first occurrence of needle in the haystack
9190
     *                   string.<br> If needle is not found it returns false.
9191
     */
9192
    public static function strpos(
9193
        string $haystack,
9194
        $needle,
9195
        int $offset = 0,
9196
        $encoding = 'UTF-8',
9197
        bool $clean_utf8 = false
9198
    ) {
9199 53
        if ($haystack === '') {
9200 4
            return false;
9201
        }
9202
9203
        // iconv and mbstring do not support integer $needle
9204 52
        if ((int) $needle === $needle) {
9205
            $needle = (string) self::chr($needle);
9206
        }
9207 52
        $needle = (string) $needle;
9208
9209 52
        if ($needle === '') {
9210 2
            return false;
9211
        }
9212
9213 52
        if ($clean_utf8 === true) {
9214
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9215
            // if invalid characters are found in $haystack before $needle
9216 3
            $needle = self::clean($needle);
9217 3
            $haystack = self::clean($haystack);
9218
        }
9219
9220 52
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9221 11
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9222
        }
9223
9224
        //
9225
        // fallback via mbstring
9226
        //
9227
9228 52
        if (self::$SUPPORT['mbstring'] === true) {
9229 50
            if ($encoding === 'UTF-8') {
9230 50
                return \mb_strpos($haystack, $needle, $offset);
9231
            }
9232
9233 2
            return \mb_strpos($haystack, $needle, $offset, $encoding);
9234
        }
9235
9236
        //
9237
        // fallback for binary || ascii only
9238
        //
9239
        if (
9240 4
            $encoding === 'CP850'
9241
            ||
9242 4
            $encoding === 'ASCII'
9243
        ) {
9244 2
            return \strpos($haystack, $needle, $offset);
9245
        }
9246
9247
        if (
9248 4
            $encoding !== 'UTF-8'
9249
            &&
9250 4
            self::$SUPPORT['iconv'] === false
9251
            &&
9252 4
            self::$SUPPORT['mbstring'] === false
9253
        ) {
9254 2
            \trigger_error('UTF8::strpos() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9255
        }
9256
9257
        //
9258
        // fallback via intl
9259
        //
9260
9261
        if (
9262 4
            $encoding === 'UTF-8' // INFO: "grapheme_strpos()" can't handle other encodings
9263
            &&
9264 4
            $offset >= 0 // grapheme_strpos() can't handle negative offset
9265
            &&
9266 4
            self::$SUPPORT['intl'] === true
9267
        ) {
9268
            $return_tmp = \grapheme_strpos($haystack, $needle, $offset);
9269
            if ($return_tmp !== false) {
9270
                return $return_tmp;
9271
            }
9272
        }
9273
9274
        //
9275
        // fallback via iconv
9276
        //
9277
9278
        if (
9279 4
            $offset >= 0 // iconv_strpos() can't handle negative offset
9280
            &&
9281 4
            self::$SUPPORT['iconv'] === true
9282
        ) {
9283
            // ignore invalid negative offset to keep compatibility
9284
            // with php < 5.5.35, < 5.6.21, < 7.0.6
9285
            $return_tmp = \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
9286
            if ($return_tmp !== false) {
9287
                return $return_tmp;
9288
            }
9289
        }
9290
9291
        //
9292
        // fallback for ascii only
9293
        //
9294
9295 4
        if (ASCII::is_ascii($haystack . $needle)) {
9296 2
            return \strpos($haystack, $needle, $offset);
9297
        }
9298
9299
        //
9300
        // fallback via vanilla php
9301
        //
9302
9303 4
        $haystack_tmp = self::substr($haystack, $offset, null, $encoding);
9304 4
        if ($haystack_tmp === false) {
9305
            $haystack_tmp = '';
9306
        }
9307 4
        $haystack = (string) $haystack_tmp;
9308
9309 4
        if ($offset < 0) {
9310
            $offset = 0;
9311
        }
9312
9313 4
        $pos = \strpos($haystack, $needle);
9314 4
        if ($pos === false) {
9315 2
            return false;
9316
        }
9317
9318 4
        if ($pos) {
9319 4
            return $offset + (int) self::strlen(\substr($haystack, 0, $pos), $encoding);
9320
        }
9321
9322 2
        return $offset + 0;
9323
    }
9324
9325
    /**
9326
     * Find the position of the first occurrence of a substring in a string.
9327
     *
9328
     * @param string $haystack <p>
9329
     *                         The string being checked.
9330
     *                         </p>
9331
     * @param string $needle   <p>
9332
     *                         The position counted from the beginning of haystack.
9333
     *                         </p>
9334
     * @param int    $offset   [optional] <p>
9335
     *                         The search offset. If it is not specified, 0 is used.
9336
     *                         </p>
9337
     *
9338
     * @return false|int The numeric position of the first occurrence of needle in the
9339
     *                   haystack string. If needle is not found, it returns false.
9340
     */
9341
    public static function strpos_in_byte(string $haystack, string $needle, int $offset = 0)
9342
    {
9343
        if ($haystack === '' || $needle === '') {
9344
            return false;
9345
        }
9346
9347
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
9348
            // "mb_" is available if overload is used, so use it ...
9349
            return \mb_strpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
9350
        }
9351
9352
        return \strpos($haystack, $needle, $offset);
9353
    }
9354
9355
    /**
9356
     * Find the last occurrence of a character in a string within another.
9357
     *
9358
     * @see http://php.net/manual/en/function.mb-strrchr.php
9359
     *
9360
     * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
9361
     * @param string $needle        <p>The string to find in haystack</p>
9362
     * @param bool   $before_needle [optional] <p>
9363
     *                              Determines which portion of haystack
9364
     *                              this function returns.
9365
     *                              If set to true, it returns all of haystack
9366
     *                              from the beginning to the last occurrence of needle.
9367
     *                              If set to false, it returns all of haystack
9368
     *                              from the last occurrence of needle to the end,
9369
     *                              </p>
9370
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
9371
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
9372
     *
9373
     * @return false|string the portion of haystack or false if needle is not found
9374
     */
9375
    public static function strrchr(
9376
        string $haystack,
9377
        string $needle,
9378
        bool $before_needle = false,
9379
        string $encoding = 'UTF-8',
9380
        bool $clean_utf8 = false
9381
    ) {
9382 2
        if ($haystack === '' || $needle === '') {
9383 2
            return false;
9384
        }
9385
9386 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9387 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9388
        }
9389
9390 2
        if ($clean_utf8 === true) {
9391
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9392
            // if invalid characters are found in $haystack before $needle
9393 2
            $needle = self::clean($needle);
9394 2
            $haystack = self::clean($haystack);
9395
        }
9396
9397
        //
9398
        // fallback via mbstring
9399
        //
9400
9401 2
        if (self::$SUPPORT['mbstring'] === true) {
9402 2
            if ($encoding === 'UTF-8') {
9403 2
                return \mb_strrchr($haystack, $needle, $before_needle);
9404
            }
9405
9406 2
            return \mb_strrchr($haystack, $needle, $before_needle, $encoding);
9407
        }
9408
9409
        //
9410
        // fallback for binary || ascii only
9411
        //
9412
9413
        if (
9414
            $before_needle === false
9415
            &&
9416
            (
9417
                $encoding === 'CP850'
9418
                ||
9419
                $encoding === 'ASCII'
9420
            )
9421
        ) {
9422
            return \strrchr($haystack, $needle);
9423
        }
9424
9425
        if (
9426
            $encoding !== 'UTF-8'
9427
            &&
9428
            self::$SUPPORT['mbstring'] === false
9429
        ) {
9430
            \trigger_error('UTF8::strrchr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9431
        }
9432
9433
        //
9434
        // fallback via iconv
9435
        //
9436
9437
        if (self::$SUPPORT['iconv'] === true) {
9438
            $needle_tmp = self::substr($needle, 0, 1, $encoding);
9439
            if ($needle_tmp === false) {
9440
                return false;
9441
            }
9442
            $needle = (string) $needle_tmp;
9443
9444
            $pos = \iconv_strrpos($haystack, $needle, $encoding);
9445
            if ($pos === false) {
9446
                return false;
9447
            }
9448
9449
            if ($before_needle) {
9450
                return self::substr($haystack, 0, $pos, $encoding);
9451
            }
9452
9453
            return self::substr($haystack, $pos, null, $encoding);
9454
        }
9455
9456
        //
9457
        // fallback via vanilla php
9458
        //
9459
9460
        $needle_tmp = self::substr($needle, 0, 1, $encoding);
9461
        if ($needle_tmp === false) {
9462
            return false;
9463
        }
9464
        $needle = (string) $needle_tmp;
9465
9466
        $pos = self::strrpos($haystack, $needle, 0, $encoding);
9467
        if ($pos === false) {
9468
            return false;
9469
        }
9470
9471
        if ($before_needle) {
9472
            return self::substr($haystack, 0, $pos, $encoding);
9473
        }
9474
9475
        return self::substr($haystack, $pos, null, $encoding);
9476
    }
9477
9478
    /**
9479
     * Reverses characters order in the string.
9480
     *
9481
     * @param string $str      <p>The input string.</p>
9482
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
9483
     *
9484
     * @return string the string with characters in the reverse sequence
9485
     */
9486
    public static function strrev(string $str, string $encoding = 'UTF-8'): string
9487
    {
9488 10
        if ($str === '') {
9489 4
            return '';
9490
        }
9491
9492
        // init
9493 8
        $reversed = '';
9494
9495 8
        $str = self::emoji_encode($str, true);
9496
9497 8
        if ($encoding === 'UTF-8') {
9498 8
            if (self::$SUPPORT['intl'] === true) {
9499
                // try "grapheme" first: https://stackoverflow.com/questions/17496493/strrev-dosent-support-utf-8
9500 8
                $i = (int) \grapheme_strlen($str);
9501 8
                while ($i--) {
9502 8
                    $reversed_tmp = \grapheme_substr($str, $i, 1);
9503 8
                    if ($reversed_tmp !== false) {
9504 8
                        $reversed .= $reversed_tmp;
9505
                    }
9506
                }
9507
            } else {
9508
                $i = (int) \mb_strlen($str);
9509 8
                while ($i--) {
9510
                    $reversed_tmp = \mb_substr($str, $i, 1);
9511
                    if ($reversed_tmp !== false) {
9512
                        $reversed .= $reversed_tmp;
9513
                    }
9514
                }
9515
            }
9516
        } else {
9517
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9518
9519
            $i = (int) self::strlen($str, $encoding);
9520
            while ($i--) {
9521
                $reversed_tmp = self::substr($str, $i, 1, $encoding);
9522
                if ($reversed_tmp !== false) {
9523
                    $reversed .= $reversed_tmp;
9524
                }
9525
            }
9526
        }
9527
9528 8
        return self::emoji_decode($reversed, true);
9529
    }
9530
9531
    /**
9532
     * Find the last occurrence of a character in a string within another, case-insensitive.
9533
     *
9534
     * @see http://php.net/manual/en/function.mb-strrichr.php
9535
     *
9536
     * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
9537
     * @param string $needle        <p>The string to find in haystack.</p>
9538
     * @param bool   $before_needle [optional] <p>
9539
     *                              Determines which portion of haystack
9540
     *                              this function returns.
9541
     *                              If set to true, it returns all of haystack
9542
     *                              from the beginning to the last occurrence of needle.
9543
     *                              If set to false, it returns all of haystack
9544
     *                              from the last occurrence of needle to the end,
9545
     *                              </p>
9546
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
9547
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
9548
     *
9549
     * @return false|string the portion of haystack or<br>false if needle is not found
9550
     */
9551
    public static function strrichr(
9552
        string $haystack,
9553
        string $needle,
9554
        bool $before_needle = false,
9555
        string $encoding = 'UTF-8',
9556
        bool $clean_utf8 = false
9557
    ) {
9558 3
        if ($haystack === '' || $needle === '') {
9559 2
            return false;
9560
        }
9561
9562 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9563 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9564
        }
9565
9566 3
        if ($clean_utf8 === true) {
9567
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9568
            // if invalid characters are found in $haystack before $needle
9569 2
            $needle = self::clean($needle);
9570 2
            $haystack = self::clean($haystack);
9571
        }
9572
9573
        //
9574
        // fallback via mbstring
9575
        //
9576
9577 3
        if (self::$SUPPORT['mbstring'] === true) {
9578 3
            if ($encoding === 'UTF-8') {
9579 3
                return \mb_strrichr($haystack, $needle, $before_needle);
9580
            }
9581
9582 2
            return \mb_strrichr($haystack, $needle, $before_needle, $encoding);
9583
        }
9584
9585
        //
9586
        // fallback via vanilla php
9587
        //
9588
9589
        $needle_tmp = self::substr($needle, 0, 1, $encoding);
9590
        if ($needle_tmp === false) {
9591
            return false;
9592
        }
9593
        $needle = (string) $needle_tmp;
9594
9595
        $pos = self::strripos($haystack, $needle, 0, $encoding);
9596
        if ($pos === false) {
9597
            return false;
9598
        }
9599
9600
        if ($before_needle) {
9601
            return self::substr($haystack, 0, $pos, $encoding);
9602
        }
9603
9604
        return self::substr($haystack, $pos, null, $encoding);
9605
    }
9606
9607
    /**
9608
     * Find the position of the last occurrence of a substring in a string, case-insensitive.
9609
     *
9610
     * @param string     $haystack   <p>The string to look in.</p>
9611
     * @param int|string $needle     <p>The string to look for.</p>
9612
     * @param int        $offset     [optional] <p>Number of characters to ignore in the beginning or end.</p>
9613
     * @param string     $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
9614
     * @param bool       $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9615
     *
9616
     * @return false|int
9617
     *                   <p>The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
9618
     *                   string.<br>If needle is not found, it returns false.</p>
9619
     */
9620
    public static function strripos(
9621
        string $haystack,
9622
        $needle,
9623
        int $offset = 0,
9624
        string $encoding = 'UTF-8',
9625
        bool $clean_utf8 = false
9626
    ) {
9627 3
        if ($haystack === '') {
9628
            return false;
9629
        }
9630
9631
        // iconv and mbstring do not support integer $needle
9632 3
        if ((int) $needle === $needle && $needle >= 0) {
9633
            $needle = (string) self::chr($needle);
9634
        }
9635 3
        $needle = (string) $needle;
9636
9637 3
        if ($needle === '') {
9638
            return false;
9639
        }
9640
9641 3
        if ($clean_utf8 === true) {
9642
            // mb_strripos() && iconv_strripos() is not tolerant to invalid characters
9643 2
            $needle = self::clean($needle);
9644 2
            $haystack = self::clean($haystack);
9645
        }
9646
9647 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9648 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9649
        }
9650
9651
        //
9652
        // fallback via mbstrig
9653
        //
9654
9655 3
        if (self::$SUPPORT['mbstring'] === true) {
9656 3
            if ($encoding === 'UTF-8') {
9657 3
                return \mb_strripos($haystack, $needle, $offset);
9658
            }
9659
9660
            return \mb_strripos($haystack, $needle, $offset, $encoding);
9661
        }
9662
9663
        //
9664
        // fallback for binary || ascii only
9665
        //
9666
9667
        if (
9668
            $encoding === 'CP850'
9669
            ||
9670
            $encoding === 'ASCII'
9671
        ) {
9672
            return \strripos($haystack, $needle, $offset);
9673
        }
9674
9675
        if (
9676
            $encoding !== 'UTF-8'
9677
            &&
9678
            self::$SUPPORT['mbstring'] === false
9679
        ) {
9680
            \trigger_error('UTF8::strripos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9681
        }
9682
9683
        //
9684
        // fallback via intl
9685
        //
9686
9687
        if (
9688
            $encoding === 'UTF-8' // INFO: "grapheme_strripos()" can't handle other encodings
9689
            &&
9690
            $offset >= 0 // grapheme_strripos() can't handle negative offset
9691
            &&
9692
            self::$SUPPORT['intl'] === true
9693
        ) {
9694
            $return_tmp = \grapheme_strripos($haystack, $needle, $offset);
9695
            if ($return_tmp !== false) {
9696
                return $return_tmp;
9697
            }
9698
        }
9699
9700
        //
9701
        // fallback for ascii only
9702
        //
9703
9704
        if (ASCII::is_ascii($haystack . $needle)) {
9705
            return \strripos($haystack, $needle, $offset);
9706
        }
9707
9708
        //
9709
        // fallback via vanilla php
9710
        //
9711
9712
        $haystack = self::strtocasefold($haystack, true, false, $encoding);
9713
        $needle = self::strtocasefold($needle, true, false, $encoding);
9714
9715
        return self::strrpos($haystack, $needle, $offset, $encoding, $clean_utf8);
9716
    }
9717
9718
    /**
9719
     * Finds position of last occurrence of a string within another, case-insensitive.
9720
     *
9721
     * @param string $haystack <p>
9722
     *                         The string from which to get the position of the last occurrence
9723
     *                         of needle.
9724
     *                         </p>
9725
     * @param string $needle   <p>
9726
     *                         The string to find in haystack.
9727
     *                         </p>
9728
     * @param int    $offset   [optional] <p>
9729
     *                         The position in haystack
9730
     *                         to start searching.
9731
     *                         </p>
9732
     *
9733
     * @return false|int
9734
     *                   <p>eturn the numeric position of the last occurrence of needle in the
9735
     *                   haystack string, or false if needle is not found.</p>
9736
     */
9737
    public static function strripos_in_byte(string $haystack, string $needle, int $offset = 0)
9738
    {
9739
        if ($haystack === '' || $needle === '') {
9740
            return false;
9741
        }
9742
9743
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
9744
            // "mb_" is available if overload is used, so use it ...
9745
            return \mb_strripos($haystack, $needle, $offset, 'CP850'); // 8-BIT
9746
        }
9747
9748
        return \strripos($haystack, $needle, $offset);
9749
    }
9750
9751
    /**
9752
     * Find the position of the last occurrence of a substring in a string.
9753
     *
9754
     * @see http://php.net/manual/en/function.mb-strrpos.php
9755
     *
9756
     * @param string     $haystack   <p>The string being checked, for the last occurrence of needle</p>
9757
     * @param int|string $needle     <p>The string to find in haystack.<br>Or a code point as int.</p>
9758
     * @param int        $offset     [optional] <p>May be specified to begin searching an arbitrary number of characters
9759
     *                               into the string. Negative values will stop searching at an arbitrary point prior to
9760
     *                               the end of the string.
9761
     *                               </p>
9762
     * @param string     $encoding   [optional] <p>Set the charset.</p>
9763
     * @param bool       $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9764
     *
9765
     * @return false|int
9766
     *                   <p>The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
9767
     *                   string.<br>If needle is not found, it returns false.</p>
9768
     */
9769
    public static function strrpos(
9770
        string $haystack,
9771
        $needle,
9772
        int $offset = 0,
9773
        string $encoding = 'UTF-8',
9774
        bool $clean_utf8 = false
9775
    ) {
9776 35
        if ($haystack === '') {
9777 3
            return false;
9778
        }
9779
9780
        // iconv and mbstring do not support integer $needle
9781 34
        if ((int) $needle === $needle && $needle >= 0) {
9782 2
            $needle = (string) self::chr($needle);
9783
        }
9784 34
        $needle = (string) $needle;
9785
9786 34
        if ($needle === '') {
9787 2
            return false;
9788
        }
9789
9790 34
        if ($clean_utf8 === true) {
9791
            // mb_strrpos && iconv_strrpos is not tolerant to invalid characters
9792 4
            $needle = self::clean($needle);
9793 4
            $haystack = self::clean($haystack);
9794
        }
9795
9796 34
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9797 8
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9798
        }
9799
9800
        //
9801
        // fallback via mbstring
9802
        //
9803
9804 34
        if (self::$SUPPORT['mbstring'] === true) {
9805 34
            if ($encoding === 'UTF-8') {
9806 34
                return \mb_strrpos($haystack, $needle, $offset);
9807
            }
9808
9809 2
            return \mb_strrpos($haystack, $needle, $offset, $encoding);
9810
        }
9811
9812
        //
9813
        // fallback for binary || ascii only
9814
        //
9815
9816
        if (
9817
            $encoding === 'CP850'
9818
            ||
9819
            $encoding === 'ASCII'
9820
        ) {
9821
            return \strrpos($haystack, $needle, $offset);
9822
        }
9823
9824
        if (
9825
            $encoding !== 'UTF-8'
9826
            &&
9827
            self::$SUPPORT['mbstring'] === false
9828
        ) {
9829
            \trigger_error('UTF8::strrpos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9830
        }
9831
9832
        //
9833
        // fallback via intl
9834
        //
9835
9836
        if (
9837
            $offset >= 0 // grapheme_strrpos() can't handle negative offset
9838
            &&
9839
            $encoding === 'UTF-8' // INFO: "grapheme_strrpos()" can't handle other encodings
9840
            &&
9841
            self::$SUPPORT['intl'] === true
9842
        ) {
9843
            $return_tmp = \grapheme_strrpos($haystack, $needle, $offset);
9844
            if ($return_tmp !== false) {
9845
                return $return_tmp;
9846
            }
9847
        }
9848
9849
        //
9850
        // fallback for ascii only
9851
        //
9852
9853
        if (ASCII::is_ascii($haystack . $needle)) {
9854
            return \strrpos($haystack, $needle, $offset);
9855
        }
9856
9857
        //
9858
        // fallback via vanilla php
9859
        //
9860
9861
        $haystack_tmp = null;
9862
        if ($offset > 0) {
9863
            $haystack_tmp = self::substr($haystack, $offset);
9864
        } elseif ($offset < 0) {
9865
            $haystack_tmp = self::substr($haystack, 0, $offset);
9866
            $offset = 0;
9867
        }
9868
9869
        if ($haystack_tmp !== null) {
9870
            if ($haystack_tmp === false) {
9871
                $haystack_tmp = '';
9872
            }
9873
            $haystack = (string) $haystack_tmp;
9874
        }
9875
9876
        $pos = \strrpos($haystack, $needle);
9877
        if ($pos === false) {
9878
            return false;
9879
        }
9880
9881
        /** @var false|string $str_tmp - needed for PhpStan (stubs error) */
9882
        $str_tmp = \substr($haystack, 0, $pos);
9883
        if ($str_tmp === false) {
9884
            return false;
9885
        }
9886
9887
        return $offset + (int) self::strlen($str_tmp);
9888
    }
9889
9890
    /**
9891
     * Find the position of the last occurrence of a substring in a string.
9892
     *
9893
     * @param string $haystack <p>
9894
     *                         The string being checked, for the last occurrence
9895
     *                         of needle.
9896
     *                         </p>
9897
     * @param string $needle   <p>
9898
     *                         The string to find in haystack.
9899
     *                         </p>
9900
     * @param int    $offset   [optional] <p>May be specified to begin searching an arbitrary number of characters into
9901
     *                         the string. Negative values will stop searching at an arbitrary point
9902
     *                         prior to the end of the string.
9903
     *                         </p>
9904
     *
9905
     * @return false|int
9906
     *                   <p>The numeric position of the last occurrence of needle in the
9907
     *                   haystack string. If needle is not found, it returns false.</p>
9908
     */
9909
    public static function strrpos_in_byte(string $haystack, string $needle, int $offset = 0)
9910
    {
9911
        if ($haystack === '' || $needle === '') {
9912
            return false;
9913
        }
9914
9915
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
9916
            // "mb_" is available if overload is used, so use it ...
9917
            return \mb_strrpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
9918
        }
9919
9920
        return \strrpos($haystack, $needle, $offset);
9921
    }
9922
9923
    /**
9924
     * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
9925
     * mask.
9926
     *
9927
     * @param string $str      <p>The input string.</p>
9928
     * @param string $mask     <p>The mask of chars</p>
9929
     * @param int    $offset   [optional]
9930
     * @param int    $length   [optional]
9931
     * @param string $encoding [optional] <p>Set the charset.</p>
9932
     *
9933
     * @return false|int
9934
     */
9935
    public static function strspn(
9936
        string $str,
9937
        string $mask,
9938
        int $offset = 0,
9939
        int $length = null,
9940
        string $encoding = 'UTF-8'
9941
    ) {
9942 10
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9943
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9944
        }
9945
9946 10
        if ($offset || $length !== null) {
9947 2
            if ($encoding === 'UTF-8') {
9948 2
                if ($length === null) {
9949
                    $str = (string) \mb_substr($str, $offset);
9950
                } else {
9951 2
                    $str = (string) \mb_substr($str, $offset, $length);
9952
                }
9953
            } else {
9954
                $str = (string) self::substr($str, $offset, $length, $encoding);
9955
            }
9956
        }
9957
9958 10
        if ($str === '' || $mask === '') {
9959 2
            return 0;
9960
        }
9961
9962 8
        $matches = [];
9963
9964 8
        return \preg_match('/^' . self::rxClass($mask) . '+/u', $str, $matches) ? (int) self::strlen($matches[0], $encoding) : 0;
9965
    }
9966
9967
    /**
9968
     * Returns part of haystack string from the first occurrence of needle to the end of haystack.
9969
     *
9970
     * @param string $haystack      <p>The input string. Must be valid UTF-8.</p>
9971
     * @param string $needle        <p>The string to look for. Must be valid UTF-8.</p>
9972
     * @param bool   $before_needle [optional] <p>
9973
     *                              If <b>TRUE</b>, strstr() returns the part of the
9974
     *                              haystack before the first occurrence of the needle (excluding the needle).
9975
     *                              </p>
9976
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
9977
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
9978
     *
9979
     * @return false|string
9980
     *                      A sub-string,<br>or <strong>false</strong> if needle is not found
9981
     */
9982
    public static function strstr(
9983
        string $haystack,
9984
        string $needle,
9985
        bool $before_needle = false,
9986
        string $encoding = 'UTF-8',
9987
        $clean_utf8 = false
9988
    ) {
9989 3
        if ($haystack === '' || $needle === '') {
9990 2
            return false;
9991
        }
9992
9993 3
        if ($clean_utf8 === true) {
9994
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9995
            // if invalid characters are found in $haystack before $needle
9996
            $needle = self::clean($needle);
9997
            $haystack = self::clean($haystack);
9998
        }
9999
10000 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10001 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10002
        }
10003
10004
        //
10005
        // fallback via mbstring
10006
        //
10007
10008 3
        if (self::$SUPPORT['mbstring'] === true) {
10009 3
            if ($encoding === 'UTF-8') {
10010 3
                return \mb_strstr($haystack, $needle, $before_needle);
10011
            }
10012
10013 2
            return \mb_strstr($haystack, $needle, $before_needle, $encoding);
10014
        }
10015
10016
        //
10017
        // fallback for binary || ascii only
10018
        //
10019
10020
        if (
10021
            $encoding === 'CP850'
10022
            ||
10023
            $encoding === 'ASCII'
10024
        ) {
10025
            return \strstr($haystack, $needle, $before_needle);
10026
        }
10027
10028
        if (
10029
            $encoding !== 'UTF-8'
10030
            &&
10031
            self::$SUPPORT['mbstring'] === false
10032
        ) {
10033
            \trigger_error('UTF8::strstr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10034
        }
10035
10036
        //
10037
        // fallback via intl
10038
        //
10039
10040
        if (
10041
            $encoding === 'UTF-8' // INFO: "grapheme_strstr()" can't handle other encodings
10042
            &&
10043
            self::$SUPPORT['intl'] === true
10044
        ) {
10045
            $return_tmp = \grapheme_strstr($haystack, $needle, $before_needle);
10046
            if ($return_tmp !== false) {
10047
                return $return_tmp;
10048
            }
10049
        }
10050
10051
        //
10052
        // fallback for ascii only
10053
        //
10054
10055
        if (ASCII::is_ascii($haystack . $needle)) {
10056
            return \strstr($haystack, $needle, $before_needle);
10057
        }
10058
10059
        //
10060
        // fallback via vanilla php
10061
        //
10062
10063
        \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/us', $haystack, $match);
10064
10065
        if (!isset($match[1])) {
10066
            return false;
10067
        }
10068
10069
        if ($before_needle) {
10070
            return $match[1];
10071
        }
10072
10073
        return self::substr($haystack, (int) self::strlen($match[1]));
10074
    }
10075
10076
    /**
10077
     *  * Finds first occurrence of a string within another.
10078
     *
10079
     * @param string $haystack      <p>
10080
     *                              The string from which to get the first occurrence
10081
     *                              of needle.
10082
     *                              </p>
10083
     * @param string $needle        <p>
10084
     *                              The string to find in haystack.
10085
     *                              </p>
10086
     * @param bool   $before_needle [optional] <p>
10087
     *                              Determines which portion of haystack
10088
     *                              this function returns.
10089
     *                              If set to true, it returns all of haystack
10090
     *                              from the beginning to the first occurrence of needle.
10091
     *                              If set to false, it returns all of haystack
10092
     *                              from the first occurrence of needle to the end,
10093
     *                              </p>
10094
     *
10095
     * @return false|string
10096
     *                      <p>The portion of haystack,
10097
     *                      or false if needle is not found.</p>
10098
     */
10099
    public static function strstr_in_byte(
10100
        string $haystack,
10101
        string $needle,
10102
        bool $before_needle = false
10103
    ) {
10104
        if ($haystack === '' || $needle === '') {
10105
            return false;
10106
        }
10107
10108
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10109
            // "mb_" is available if overload is used, so use it ...
10110
            return \mb_strstr($haystack, $needle, $before_needle, 'CP850'); // 8-BIT
10111
        }
10112
10113
        return \strstr($haystack, $needle, $before_needle);
10114
    }
10115
10116
    /**
10117
     * Unicode transformation for case-less matching.
10118
     *
10119
     * @see http://unicode.org/reports/tr21/tr21-5.html
10120
     *
10121
     * @param string      $str        <p>The input string.</p>
10122
     * @param bool        $full       [optional] <p>
10123
     *                                <b>true</b>, replace full case folding chars (default)<br>
10124
     *                                <b>false</b>, use only limited static array [UTF8::$COMMON_CASE_FOLD]
10125
     *                                </p>
10126
     * @param bool        $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10127
     * @param string      $encoding   [optional] <p>Set the charset.</p>
10128
     * @param string|null $lang       [optional] <p>Set the language for special cases: az, el, lt, tr</p>
10129
     * @param bool        $lower      [optional] <p>Use lowercase string, otherwise use uppercase string. PS: uppercase
10130
     *                                is for some languages better ...</p>
10131
     *
10132
     * @return string
10133
     */
10134
    public static function strtocasefold(
10135
        string $str,
10136
        bool $full = true,
10137
        bool $clean_utf8 = false,
10138
        string $encoding = 'UTF-8',
10139
        string $lang = null,
10140
        $lower = true
10141
    ): string {
10142 32
        if ($str === '') {
10143 5
            return '';
10144
        }
10145
10146 31
        if ($clean_utf8 === true) {
10147
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10148
            // if invalid characters are found in $haystack before $needle
10149 2
            $str = self::clean($str);
10150
        }
10151
10152 31
        $str = self::fixStrCaseHelper($str, $lower, $full);
10153
10154 31
        if ($lang === null && $encoding === 'UTF-8') {
10155 31
            if ($lower === true) {
10156 2
                return \mb_strtolower($str);
10157
            }
10158
10159 29
            return \mb_strtoupper($str);
10160
        }
10161
10162 2
        if ($lower === true) {
10163
            return self::strtolower($str, $encoding, false, $lang);
10164
        }
10165
10166 2
        return self::strtoupper($str, $encoding, false, $lang);
10167
    }
10168
10169
    /**
10170
     * Make a string lowercase.
10171
     *
10172
     * @see http://php.net/manual/en/function.mb-strtolower.php
10173
     *
10174
     * @param string      $str                           <p>The string being lowercased.</p>
10175
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
10176
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
10177
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt, tr</p>
10178
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
10179
     *
10180
     * @return string
10181
     *                <p>String with all alphabetic characters converted to lowercase.</p>
10182
     */
10183
    public static function strtolower(
10184
        $str,
10185
        string $encoding = 'UTF-8',
10186
        bool $clean_utf8 = false,
10187
        string $lang = null,
10188
        bool $try_to_keep_the_string_length = false
10189
    ): string {
10190
        // init
10191 73
        $str = (string) $str;
10192
10193 73
        if ($str === '') {
10194 1
            return '';
10195
        }
10196
10197 72
        if ($clean_utf8 === true) {
10198
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10199
            // if invalid characters are found in $haystack before $needle
10200 2
            $str = self::clean($str);
10201
        }
10202
10203
        // hack for old php version or for the polyfill ...
10204 72
        if ($try_to_keep_the_string_length === true) {
10205
            $str = self::fixStrCaseHelper($str, true);
10206
        }
10207
10208 72
        if ($lang === null && $encoding === 'UTF-8') {
10209 13
            return \mb_strtolower($str);
10210
        }
10211
10212 61
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
10213
10214 61
        if ($lang !== null) {
10215 2
            if (self::$SUPPORT['intl'] === true) {
10216 2
                if (self::$INTL_TRANSLITERATOR_LIST === null) {
10217
                    self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
10218
                }
10219
10220 2
                $language_code = $lang . '-Lower';
10221 2
                if (!\in_array($language_code, self::$INTL_TRANSLITERATOR_LIST, true)) {
10222
                    \trigger_error('UTF8::strtolower() cannot handle special language: ' . $lang . ' | supported: ' . \print_r(self::$INTL_TRANSLITERATOR_LIST, true), \E_USER_WARNING);
10223
10224
                    $language_code = 'Any-Lower';
10225
                }
10226
10227
                /** @noinspection PhpComposerExtensionStubsInspection */
10228
                /** @noinspection UnnecessaryCastingInspection */
10229 2
                return (string) \transliterator_transliterate($language_code, $str);
10230
            }
10231
10232
            \trigger_error('UTF8::strtolower() without intl cannot handle the "lang" parameter: ' . $lang, \E_USER_WARNING);
10233
        }
10234
10235
        // always fallback via symfony polyfill
10236 61
        return \mb_strtolower($str, $encoding);
10237
    }
10238
10239
    /**
10240
     * Make a string uppercase.
10241
     *
10242
     * @see http://php.net/manual/en/function.mb-strtoupper.php
10243
     *
10244
     * @param string      $str                           <p>The string being uppercased.</p>
10245
     * @param string      $encoding                      [optional] <p>Set the charset.</p>
10246
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
10247
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt, tr</p>
10248
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
10249
     *
10250
     * @return string
10251
     *                <p>String with all alphabetic characters converted to uppercase.</p>
10252
     */
10253
    public static function strtoupper(
10254
        $str,
10255
        string $encoding = 'UTF-8',
10256
        bool $clean_utf8 = false,
10257
        string $lang = null,
10258
        bool $try_to_keep_the_string_length = false
10259
    ): string {
10260
        // init
10261 17
        $str = (string) $str;
10262
10263 17
        if ($str === '') {
10264 1
            return '';
10265
        }
10266
10267 16
        if ($clean_utf8 === true) {
10268
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10269
            // if invalid characters are found in $haystack before $needle
10270 2
            $str = self::clean($str);
10271
        }
10272
10273
        // hack for old php version or for the polyfill ...
10274 16
        if ($try_to_keep_the_string_length === true) {
10275 2
            $str = self::fixStrCaseHelper($str, false);
10276
        }
10277
10278 16
        if ($lang === null && $encoding === 'UTF-8') {
10279 8
            return \mb_strtoupper($str);
10280
        }
10281
10282 10
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
10283
10284 10
        if ($lang !== null) {
10285 2
            if (self::$SUPPORT['intl'] === true) {
10286 2
                if (self::$INTL_TRANSLITERATOR_LIST === null) {
10287
                    self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
10288
                }
10289
10290 2
                $language_code = $lang . '-Upper';
10291 2
                if (!\in_array($language_code, self::$INTL_TRANSLITERATOR_LIST, true)) {
10292
                    \trigger_error('UTF8::strtoupper() without intl for special language: ' . $lang, \E_USER_WARNING);
10293
10294
                    $language_code = 'Any-Upper';
10295
                }
10296
10297
                /** @noinspection PhpComposerExtensionStubsInspection */
10298
                /** @noinspection UnnecessaryCastingInspection */
10299 2
                return (string) \transliterator_transliterate($language_code, $str);
10300
            }
10301
10302
            \trigger_error('UTF8::strtolower() without intl cannot handle the "lang"-parameter: ' . $lang, \E_USER_WARNING);
10303
        }
10304
10305
        // always fallback via symfony polyfill
10306 10
        return \mb_strtoupper($str, $encoding);
10307
    }
10308
10309
    /**
10310
     * Translate characters or replace sub-strings.
10311
     *
10312
     * @see http://php.net/manual/en/function.strtr.php
10313
     *
10314
     * @param string          $str  <p>The string being translated.</p>
10315
     * @param string|string[] $from <p>The string replacing from.</p>
10316
     * @param string|string[] $to   [optional] <p>The string being translated to to.</p>
10317
     *
10318
     * @return string
10319
     *                <p>This function returns a copy of str, translating all occurrences of each character in "from" to the
10320
     *                corresponding character in "to".</p>
10321
     */
10322
    public static function strtr(string $str, $from, $to = ''): string
10323
    {
10324 2
        if ($str === '') {
10325
            return '';
10326
        }
10327
10328 2
        if ($from === $to) {
10329
            return $str;
10330
        }
10331
10332 2
        if ($to !== '') {
10333 2
            $from = self::str_split($from);
10334 2
            $to = self::str_split($to);
10335 2
            $count_from = \count($from);
10336 2
            $count_to = \count($to);
10337
10338 2
            if ($count_from > $count_to) {
10339 2
                $from = \array_slice($from, 0, $count_to);
10340 2
            } elseif ($count_from < $count_to) {
10341 2
                $to = \array_slice($to, 0, $count_from);
10342
            }
10343
10344 2
            $from = \array_combine($from, $to);
10345
            /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
10346 2
            if ($from === false) {
10347
                throw new \InvalidArgumentException('The number of elements for each array isn\'t equal or the arrays are empty: (from: ' . \print_r($from, true) . ' | to: ' . \print_r($to, true) . ')');
10348
            }
10349
        }
10350
10351 2
        if (\is_string($from)) {
10352 2
            return \str_replace($from, '', $str);
10353
        }
10354
10355 2
        return \strtr($str, $from);
10356
    }
10357
10358
    /**
10359
     * Return the width of a string.
10360
     *
10361
     * @param string $str        <p>The input string.</p>
10362
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
10363
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10364
     *
10365
     * @return int
10366
     */
10367
    public static function strwidth(
10368
        string $str,
10369
        string $encoding = 'UTF-8',
10370
        bool $clean_utf8 = false
10371
    ): int {
10372 2
        if ($str === '') {
10373 2
            return 0;
10374
        }
10375
10376 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10377 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10378
        }
10379
10380 2
        if ($clean_utf8 === true) {
10381
            // iconv and mbstring are not tolerant to invalid encoding
10382
            // further, their behaviour is inconsistent with that of PHP's substr
10383 2
            $str = self::clean($str);
10384
        }
10385
10386
        //
10387
        // fallback via mbstring
10388
        //
10389
10390 2
        if (self::$SUPPORT['mbstring'] === true) {
10391 2
            if ($encoding === 'UTF-8') {
10392 2
                return \mb_strwidth($str);
10393
            }
10394
10395
            return \mb_strwidth($str, $encoding);
10396
        }
10397
10398
        //
10399
        // fallback via vanilla php
10400
        //
10401
10402
        if ($encoding !== 'UTF-8') {
10403
            $str = self::encode('UTF-8', $str, false, $encoding);
10404
        }
10405
10406
        $wide = 0;
10407
        $str = (string) \preg_replace('/[\x{1100}-\x{115F}\x{2329}\x{232A}\x{2E80}-\x{303E}\x{3040}-\x{A4CF}\x{AC00}-\x{D7A3}\x{F900}-\x{FAFF}\x{FE10}-\x{FE19}\x{FE30}-\x{FE6F}\x{FF00}-\x{FF60}\x{FFE0}-\x{FFE6}\x{20000}-\x{2FFFD}\x{30000}-\x{3FFFD}]/u', '', $str, -1, $wide);
10408
10409
        return ($wide << 1) + (int) self::strlen($str, 'UTF-8');
10410
    }
10411
10412
    /**
10413
     * Get part of a string.
10414
     *
10415
     * @see http://php.net/manual/en/function.mb-substr.php
10416
     *
10417
     * @param string $str        <p>The string being checked.</p>
10418
     * @param int    $offset     <p>The first position used in str.</p>
10419
     * @param int    $length     [optional] <p>The maximum length of the returned string.</p>
10420
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
10421
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10422
     *
10423
     * @return false|string
10424
     *                      The portion of <i>str</i> specified by the <i>offset</i> and
10425
     *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
10426
     *                      characters long, <b>FALSE</b> will be returned.
10427
     */
10428
    public static function substr(
10429
        string $str,
10430
        int $offset = 0,
10431
        int $length = null,
10432
        string $encoding = 'UTF-8',
10433
        bool $clean_utf8 = false
10434
    ) {
10435
        // empty string
10436 172
        if ($str === '' || $length === 0) {
10437 8
            return '';
10438
        }
10439
10440 168
        if ($clean_utf8 === true) {
10441
            // iconv and mbstring are not tolerant to invalid encoding
10442
            // further, their behaviour is inconsistent with that of PHP's substr
10443 2
            $str = self::clean($str);
10444
        }
10445
10446
        // whole string
10447 168
        if (!$offset && $length === null) {
10448 7
            return $str;
10449
        }
10450
10451 163
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10452 19
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10453
        }
10454
10455
        //
10456
        // fallback via mbstring
10457
        //
10458
10459 163
        if (self::$SUPPORT['mbstring'] === true) {
10460 161
            if ($encoding === 'UTF-8') {
10461 161
                if ($length === null) {
10462 64
                    return \mb_substr($str, $offset);
10463
                }
10464
10465 102
                return \mb_substr($str, $offset, $length);
10466
            }
10467
10468
            return self::substr($str, $offset, $length, $encoding);
10469
        }
10470
10471
        //
10472
        // fallback for binary || ascii only
10473
        //
10474
10475
        if (
10476 4
            $encoding === 'CP850'
10477
            ||
10478 4
            $encoding === 'ASCII'
10479
        ) {
10480
            if ($length === null) {
10481
                return \substr($str, $offset);
10482
            }
10483
10484
            return \substr($str, $offset, $length);
10485
        }
10486
10487
        // otherwise we need the string-length
10488 4
        $str_length = 0;
10489 4
        if ($offset || $length === null) {
10490 4
            $str_length = self::strlen($str, $encoding);
10491
        }
10492
10493
        // e.g.: invalid chars + mbstring not installed
10494 4
        if ($str_length === false) {
10495
            return false;
10496
        }
10497
10498
        // empty string
10499 4
        if ($offset === $str_length && !$length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type integer|null is loosely compared to false; this is ambiguous if the integer can be 0. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
10500
            return '';
10501
        }
10502
10503
        // impossible
10504 4
        if ($offset && $offset > $str_length) {
10505
            return '';
10506
        }
10507
10508 4
        if ($length === null) {
10509 4
            $length = (int) $str_length;
10510
        } else {
10511 2
            $length = (int) $length;
10512
        }
10513
10514
        if (
10515 4
            $encoding !== 'UTF-8'
10516
            &&
10517 4
            self::$SUPPORT['mbstring'] === false
10518
        ) {
10519 2
            \trigger_error('UTF8::substr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10520
        }
10521
10522
        //
10523
        // fallback via intl
10524
        //
10525
10526
        if (
10527 4
            $encoding === 'UTF-8' // INFO: "grapheme_substr()" can't handle other encodings
10528
            &&
10529 4
            $offset >= 0 // grapheme_substr() can't handle negative offset
10530
            &&
10531 4
            self::$SUPPORT['intl'] === true
10532
        ) {
10533
            $return_tmp = \grapheme_substr($str, $offset, $length);
10534
            if ($return_tmp !== false) {
10535
                return $return_tmp;
10536
            }
10537
        }
10538
10539
        //
10540
        // fallback via iconv
10541
        //
10542
10543
        if (
10544 4
            $length >= 0 // "iconv_substr()" can't handle negative length
10545
            &&
10546 4
            self::$SUPPORT['iconv'] === true
10547
        ) {
10548
            $return_tmp = \iconv_substr($str, $offset, $length);
10549
            if ($return_tmp !== false) {
10550
                return $return_tmp;
10551
            }
10552
        }
10553
10554
        //
10555
        // fallback for ascii only
10556
        //
10557
10558 4
        if (ASCII::is_ascii($str)) {
10559
            return \substr($str, $offset, $length);
10560
        }
10561
10562
        //
10563
        // fallback via vanilla php
10564
        //
10565
10566
        // split to array, and remove invalid characters
10567 4
        $array = self::str_split($str);
10568
10569
        // extract relevant part, and join to make sting again
10570 4
        return \implode('', \array_slice($array, $offset, $length));
10571
    }
10572
10573
    /**
10574
     * Binary-safe comparison of two strings from an offset, up to a length of characters.
10575
     *
10576
     * @param string   $str1               <p>The main string being compared.</p>
10577
     * @param string   $str2               <p>The secondary string being compared.</p>
10578
     * @param int      $offset             [optional] <p>The start position for the comparison. If negative, it starts
10579
     *                                     counting from the end of the string.</p>
10580
     * @param int|null $length             [optional] <p>The length of the comparison. The default value is the largest
10581
     *                                     of the length of the str compared to the length of main_str less the
10582
     *                                     offset.</p>
10583
     * @param bool     $case_insensitivity [optional] <p>If case_insensitivity is TRUE, comparison is case
10584
     *                                     insensitive.</p>
10585
     * @param string   $encoding           [optional] <p>Set the charset for e.g. "mb_" function</p>
10586
     *
10587
     * @return int
10588
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
10589
     *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
10590
     *             <strong>0</strong> if they are equal
10591
     */
10592
    public static function substr_compare(
10593
        string $str1,
10594
        string $str2,
10595
        int $offset = 0,
10596
        int $length = null,
10597
        bool $case_insensitivity = false,
10598
        string $encoding = 'UTF-8'
10599
    ): int {
10600
        if (
10601 2
            $offset !== 0
10602
            ||
10603 2
            $length !== null
10604
        ) {
10605 2
            if ($encoding === 'UTF-8') {
10606 2
                if ($length === null) {
10607 2
                    $str1 = (string) \mb_substr($str1, $offset);
10608
                } else {
10609 2
                    $str1 = (string) \mb_substr($str1, $offset, $length);
10610
                }
10611 2
                $str2 = (string) \mb_substr($str2, 0, (int) self::strlen($str1));
10612
            } else {
10613
                $encoding = self::normalize_encoding($encoding, 'UTF-8');
10614
10615
                $str1 = (string) self::substr($str1, $offset, $length, $encoding);
10616
                $str2 = (string) self::substr($str2, 0, (int) self::strlen($str1), $encoding);
10617
            }
10618
        }
10619
10620 2
        if ($case_insensitivity === true) {
10621 2
            return self::strcasecmp($str1, $str2, $encoding);
10622
        }
10623
10624 2
        return self::strcmp($str1, $str2);
10625
    }
10626
10627
    /**
10628
     * Count the number of substring occurrences.
10629
     *
10630
     * @see http://php.net/manual/en/function.substr-count.php
10631
     *
10632
     * @param string $haystack   <p>The string to search in.</p>
10633
     * @param string $needle     <p>The substring to search for.</p>
10634
     * @param int    $offset     [optional] <p>The offset where to start counting.</p>
10635
     * @param int    $length     [optional] <p>
10636
     *                           The maximum length after the specified offset to search for the
10637
     *                           substring. It outputs a warning if the offset plus the length is
10638
     *                           greater than the haystack length.
10639
     *                           </p>
10640
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
10641
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10642
     *
10643
     * @return false|int this functions returns an integer or false if there isn't a string
10644
     */
10645
    public static function substr_count(
10646
        string $haystack,
10647
        string $needle,
10648
        int $offset = 0,
10649
        int $length = null,
10650
        string $encoding = 'UTF-8',
10651
        bool $clean_utf8 = false
10652
    ) {
10653 5
        if ($haystack === '' || $needle === '') {
10654 2
            return false;
10655
        }
10656
10657 5
        if ($length === 0) {
10658 2
            return 0;
10659
        }
10660
10661 5
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10662 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10663
        }
10664
10665 5
        if ($clean_utf8 === true) {
10666
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10667
            // if invalid characters are found in $haystack before $needle
10668
            $needle = self::clean($needle);
10669
            $haystack = self::clean($haystack);
10670
        }
10671
10672 5
        if ($offset || $length > 0) {
10673 2
            if ($length === null) {
10674 2
                $length_tmp = self::strlen($haystack, $encoding);
10675 2
                if ($length_tmp === false) {
10676
                    return false;
10677
                }
10678 2
                $length = (int) $length_tmp;
10679
            }
10680
10681 2
            if ($encoding === 'UTF-8') {
10682 2
                $haystack = (string) \mb_substr($haystack, $offset, $length);
10683
            } else {
10684 2
                $haystack = (string) \mb_substr($haystack, $offset, $length, $encoding);
10685
            }
10686
        }
10687
10688
        if (
10689 5
            $encoding !== 'UTF-8'
10690
            &&
10691 5
            self::$SUPPORT['mbstring'] === false
10692
        ) {
10693
            \trigger_error('UTF8::substr_count() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10694
        }
10695
10696 5
        if (self::$SUPPORT['mbstring'] === true) {
10697 5
            if ($encoding === 'UTF-8') {
10698 5
                return \mb_substr_count($haystack, $needle);
10699
            }
10700
10701 2
            return \mb_substr_count($haystack, $needle, $encoding);
10702
        }
10703
10704
        \preg_match_all('/' . \preg_quote($needle, '/') . '/us', $haystack, $matches, \PREG_SET_ORDER);
10705
10706
        return \count($matches);
10707
    }
10708
10709
    /**
10710
     * Count the number of substring occurrences.
10711
     *
10712
     * @param string $haystack <p>
10713
     *                         The string being checked.
10714
     *                         </p>
10715
     * @param string $needle   <p>
10716
     *                         The string being found.
10717
     *                         </p>
10718
     * @param int    $offset   [optional] <p>
10719
     *                         The offset where to start counting
10720
     *                         </p>
10721
     * @param int    $length   [optional] <p>
10722
     *                         The maximum length after the specified offset to search for the
10723
     *                         substring. It outputs a warning if the offset plus the length is
10724
     *                         greater than the haystack length.
10725
     *                         </p>
10726
     *
10727
     * @return false|int the number of times the
10728
     *                   needle substring occurs in the
10729
     *                   haystack string
10730
     */
10731
    public static function substr_count_in_byte(
10732
        string $haystack,
10733
        string $needle,
10734
        int $offset = 0,
10735
        int $length = null
10736
    ) {
10737
        if ($haystack === '' || $needle === '') {
10738
            return 0;
10739
        }
10740
10741
        if (
10742
            ($offset || $length !== null)
10743
            &&
10744
            self::$SUPPORT['mbstring_func_overload'] === true
10745
        ) {
10746
            if ($length === null) {
10747
                $length_tmp = self::strlen($haystack);
10748
                if ($length_tmp === false) {
10749
                    return false;
10750
                }
10751
                $length = (int) $length_tmp;
10752
            }
10753
10754
            if (
10755
                (
10756
                    $length !== 0
10757
                    &&
10758
                    $offset !== 0
10759
                )
10760
                &&
10761
                ($length + $offset) <= 0
10762
                &&
10763
                Bootup::is_php('7.1') === false // output from "substr_count()" have changed in PHP 7.1
10764
            ) {
10765
                return false;
10766
            }
10767
10768
            /** @var false|string $haystack_tmp - needed for PhpStan (stubs error) */
10769
            $haystack_tmp = \substr($haystack, $offset, $length);
10770
            if ($haystack_tmp === false) {
10771
                $haystack_tmp = '';
10772
            }
10773
            $haystack = (string) $haystack_tmp;
10774
        }
10775
10776
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10777
            // "mb_" is available if overload is used, so use it ...
10778
            return \mb_substr_count($haystack, $needle, 'CP850'); // 8-BIT
10779
        }
10780
10781
        if ($length === null) {
10782
            return \substr_count($haystack, $needle, $offset);
10783
        }
10784
10785
        return \substr_count($haystack, $needle, $offset, $length);
10786
    }
10787
10788
    /**
10789
     * Returns the number of occurrences of $substring in the given string.
10790
     * By default, the comparison is case-sensitive, but can be made insensitive
10791
     * by setting $case_sensitive to false.
10792
     *
10793
     * @param string $str            <p>The input string.</p>
10794
     * @param string $substring      <p>The substring to search for.</p>
10795
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
10796
     * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
10797
     *
10798
     * @return int
10799
     */
10800
    public static function substr_count_simple(
10801
        string $str,
10802
        string $substring,
10803
        bool $case_sensitive = true,
10804
        string $encoding = 'UTF-8'
10805
    ): int {
10806 15
        if ($str === '' || $substring === '') {
10807 2
            return 0;
10808
        }
10809
10810 13
        if ($encoding === 'UTF-8') {
10811 7
            if ($case_sensitive) {
10812
                return (int) \mb_substr_count($str, $substring);
10813
            }
10814
10815 7
            return (int) \mb_substr_count(
10816 7
                \mb_strtoupper($str),
10817 7
                \mb_strtoupper($substring)
10818
            );
10819
        }
10820
10821 6
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
10822
10823 6
        if ($case_sensitive) {
10824 3
            return (int) \mb_substr_count($str, $substring, $encoding);
10825
        }
10826
10827 3
        return (int) \mb_substr_count(
10828 3
            self::strtocasefold($str, true, false, $encoding, null, false),
10829 3
            self::strtocasefold($substring, true, false, $encoding, null, false),
10830 3
            $encoding
10831
        );
10832
    }
10833
10834
    /**
10835
     * Removes a prefix ($needle) from the beginning of the string ($haystack), case-insensitive.
10836
     *
10837
     * @param string $haystack <p>The string to search in.</p>
10838
     * @param string $needle   <p>The substring to search for.</p>
10839
     *
10840
     * @return string return the sub-string
10841
     */
10842
    public static function substr_ileft(string $haystack, string $needle): string
10843
    {
10844 2
        if ($haystack === '') {
10845 2
            return '';
10846
        }
10847
10848 2
        if ($needle === '') {
10849 2
            return $haystack;
10850
        }
10851
10852 2
        if (self::str_istarts_with($haystack, $needle) === true) {
10853 2
            $haystack = (string) \mb_substr($haystack, (int) self::strlen($needle));
10854
        }
10855
10856 2
        return $haystack;
10857
    }
10858
10859
    /**
10860
     * Get part of a string process in bytes.
10861
     *
10862
     * @param string $str    <p>The string being checked.</p>
10863
     * @param int    $offset <p>The first position used in str.</p>
10864
     * @param int    $length [optional] <p>The maximum length of the returned string.</p>
10865
     *
10866
     * @return false|string
10867
     *                      The portion of <i>str</i> specified by the <i>offset</i> and
10868
     *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
10869
     *                      characters long, <b>FALSE</b> will be returned.
10870
     */
10871
    public static function substr_in_byte(string $str, int $offset = 0, int $length = null)
10872
    {
10873
        // empty string
10874
        if ($str === '' || $length === 0) {
10875
            return '';
10876
        }
10877
10878
        // whole string
10879
        if (!$offset && $length === null) {
10880
            return $str;
10881
        }
10882
10883
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10884
            // "mb_" is available if overload is used, so use it ...
10885
            return \mb_substr($str, $offset, $length, 'CP850'); // 8-BIT
10886
        }
10887
10888
        return \substr($str, $offset, $length ?? 2147483647);
10889
    }
10890
10891
    /**
10892
     * Removes a suffix ($needle) from the end of the string ($haystack), case-insensitive.
10893
     *
10894
     * @param string $haystack <p>The string to search in.</p>
10895
     * @param string $needle   <p>The substring to search for.</p>
10896
     *
10897
     * @return string return the sub-string
10898
     */
10899
    public static function substr_iright(string $haystack, string $needle): string
10900
    {
10901 2
        if ($haystack === '') {
10902 2
            return '';
10903
        }
10904
10905 2
        if ($needle === '') {
10906 2
            return $haystack;
10907
        }
10908
10909 2
        if (self::str_iends_with($haystack, $needle) === true) {
10910 2
            $haystack = (string) \mb_substr($haystack, 0, (int) self::strlen($haystack) - (int) self::strlen($needle));
10911
        }
10912
10913 2
        return $haystack;
10914
    }
10915
10916
    /**
10917
     * Removes a prefix ($needle) from the beginning of the string ($haystack).
10918
     *
10919
     * @param string $haystack <p>The string to search in.</p>
10920
     * @param string $needle   <p>The substring to search for.</p>
10921
     *
10922
     * @return string return the sub-string
10923
     */
10924
    public static function substr_left(string $haystack, string $needle): string
10925
    {
10926 2
        if ($haystack === '') {
10927 2
            return '';
10928
        }
10929
10930 2
        if ($needle === '') {
10931 2
            return $haystack;
10932
        }
10933
10934 2
        if (self::str_starts_with($haystack, $needle) === true) {
10935 2
            $haystack = (string) \mb_substr($haystack, (int) self::strlen($needle));
10936
        }
10937
10938 2
        return $haystack;
10939
    }
10940
10941
    /**
10942
     * Replace text within a portion of a string.
10943
     *
10944
     * source: https://gist.github.com/stemar/8287074
10945
     *
10946
     * @param string|string[] $str         <p>The input string or an array of stings.</p>
10947
     * @param string|string[] $replacement <p>The replacement string or an array of stings.</p>
10948
     * @param int|int[]       $offset      <p>
10949
     *                                     If start is positive, the replacing will begin at the start'th offset
10950
     *                                     into string.
10951
     *                                     <br><br>
10952
     *                                     If start is negative, the replacing will begin at the start'th character
10953
     *                                     from the end of string.
10954
     *                                     </p>
10955
     * @param int|int[]|null  $length      [optional] <p>If given and is positive, it represents the length of the
10956
     *                                     portion of string which is to be replaced. If it is negative, it
10957
     *                                     represents the number of characters from the end of string at which to
10958
     *                                     stop replacing. If it is not given, then it will default to strlen(
10959
     *                                     string ); i.e. end the replacing at the end of string. Of course, if
10960
     *                                     length is zero then this function will have the effect of inserting
10961
     *                                     replacement into string at the given start offset.</p>
10962
     * @param string          $encoding    [optional] <p>Set the charset for e.g. "mb_" function</p>
10963
     *
10964
     * @return string|string[] The result string is returned. If string is an array then array is returned.
10965
     */
10966
    public static function substr_replace(
10967
        $str,
10968
        $replacement,
10969
        $offset,
10970
        $length = null,
10971
        string $encoding = 'UTF-8'
10972
    ) {
10973 10
        if (\is_array($str) === true) {
10974 1
            $num = \count($str);
10975
10976
            // the replacement
10977 1
            if (\is_array($replacement) === true) {
10978 1
                $replacement = \array_slice($replacement, 0, $num);
10979
            } else {
10980 1
                $replacement = \array_pad([$replacement], $num, $replacement);
10981
            }
10982
10983
            // the offset
10984 1
            if (\is_array($offset) === true) {
10985 1
                $offset = \array_slice($offset, 0, $num);
10986 1
                foreach ($offset as &$value_tmp) {
10987 1
                    $value_tmp = (int) $value_tmp === $value_tmp ? $value_tmp : 0;
10988
                }
10989 1
                unset($value_tmp);
10990
            } else {
10991 1
                $offset = \array_pad([$offset], $num, $offset);
10992
            }
10993
10994
            // the length
10995 1
            if ($length === null) {
10996 1
                $length = \array_fill(0, $num, 0);
10997 1
            } elseif (\is_array($length) === true) {
10998 1
                $length = \array_slice($length, 0, $num);
10999 1
                foreach ($length as &$value_tmp_V2) {
11000 1
                    $value_tmp_V2 = (int) $value_tmp_V2 === $value_tmp_V2 ? $value_tmp_V2 : $num;
11001
                }
11002 1
                unset($value_tmp_V2);
11003
            } else {
11004 1
                $length = \array_pad([$length], $num, $length);
11005
            }
11006
11007
            // recursive call
11008 1
            return \array_map([self::class, 'substr_replace'], $str, $replacement, $offset, $length);
11009
        }
11010
11011 10
        if (\is_array($replacement) === true) {
11012 1
            if ($replacement !== []) {
11013 1
                $replacement = $replacement[0];
11014
            } else {
11015 1
                $replacement = '';
11016
            }
11017
        }
11018
11019
        // init
11020 10
        $str = (string) $str;
11021 10
        $replacement = (string) $replacement;
11022
11023 10
        if (\is_array($length) === true) {
11024
            throw new \InvalidArgumentException('Parameter "$length" can only be an array, if "$str" is also an array.');
11025
        }
11026
11027 10
        if (\is_array($offset) === true) {
11028
            throw new \InvalidArgumentException('Parameter "$offset" can only be an array, if "$str" is also an array.');
11029
        }
11030
11031 10
        if ($str === '') {
11032 1
            return $replacement;
11033
        }
11034
11035 9
        if (self::$SUPPORT['mbstring'] === true) {
11036 9
            $string_length = (int) self::strlen($str, $encoding);
11037
11038 9
            if ($offset < 0) {
11039 1
                $offset = (int) \max(0, $string_length + $offset);
11040 9
            } elseif ($offset > $string_length) {
11041 1
                $offset = $string_length;
11042
            }
11043
11044 9
            if ($length !== null && $length < 0) {
11045 1
                $length = (int) \max(0, $string_length - $offset + $length);
11046 9
            } elseif ($length === null || $length > $string_length) {
11047 4
                $length = $string_length;
11048
            }
11049
11050
            /** @noinspection AdditionOperationOnArraysInspection */
11051 9
            if (($offset + $length) > $string_length) {
11052 4
                $length = $string_length - $offset;
11053
            }
11054
11055
            /** @noinspection AdditionOperationOnArraysInspection */
11056 9
            return ((string) \mb_substr($str, 0, $offset, $encoding)) .
11057 9
                   $replacement .
11058 9
                   ((string) \mb_substr($str, $offset + $length, $string_length - $offset - $length, $encoding));
11059
        }
11060
11061
        //
11062
        // fallback for ascii only
11063
        //
11064
11065
        if (ASCII::is_ascii($str)) {
11066
            return ($length === null) ?
11067
                \substr_replace($str, $replacement, $offset) :
11068
                \substr_replace($str, $replacement, $offset, $length);
11069
        }
11070
11071
        //
11072
        // fallback via vanilla php
11073
        //
11074
11075
        \preg_match_all('/./us', $str, $str_matches);
11076
        \preg_match_all('/./us', $replacement, $replacement_matches);
11077
11078
        if ($length === null) {
11079
            $length_tmp = self::strlen($str, $encoding);
11080
            if ($length_tmp === false) {
11081
                // e.g.: non mbstring support + invalid chars
11082
                return '';
11083
            }
11084
            $length = (int) $length_tmp;
11085
        }
11086
11087
        \array_splice($str_matches[0], $offset, $length, $replacement_matches[0]);
11088
11089
        return \implode('', $str_matches[0]);
11090
    }
11091
11092
    /**
11093
     * Removes a suffix ($needle) from the end of the string ($haystack).
11094
     *
11095
     * @param string $haystack <p>The string to search in.</p>
11096
     * @param string $needle   <p>The substring to search for.</p>
11097
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
11098
     *
11099
     * @return string return the sub-string
11100
     */
11101
    public static function substr_right(
11102
        string $haystack,
11103
        string $needle,
11104
        string $encoding = 'UTF-8'
11105
    ): string {
11106 2
        if ($haystack === '') {
11107 2
            return '';
11108
        }
11109
11110 2
        if ($needle === '') {
11111 2
            return $haystack;
11112
        }
11113
11114
        if (
11115 2
            $encoding === 'UTF-8'
11116
            &&
11117 2
            \substr($haystack, -\strlen($needle)) === $needle
11118
        ) {
11119 2
            return (string) \mb_substr($haystack, 0, (int) \mb_strlen($haystack) - (int) \mb_strlen($needle));
11120
        }
11121
11122 2
        if (\substr($haystack, -\strlen($needle)) === $needle) {
11123
            return (string) self::substr(
11124
                $haystack,
11125
                0,
11126
                (int) self::strlen($haystack, $encoding) - (int) self::strlen($needle, $encoding),
11127
                $encoding
11128
            );
11129
        }
11130
11131 2
        return $haystack;
11132
    }
11133
11134
    /**
11135
     * Returns a case swapped version of the string.
11136
     *
11137
     * @param string $str        <p>The input string.</p>
11138
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
11139
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
11140
     *
11141
     * @return string each character's case swapped
11142
     */
11143
    public static function swapCase(string $str, string $encoding = 'UTF-8', bool $clean_utf8 = false): string
11144
    {
11145 6
        if ($str === '') {
11146 1
            return '';
11147
        }
11148
11149 6
        if ($clean_utf8 === true) {
11150
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11151
            // if invalid characters are found in $haystack before $needle
11152 2
            $str = self::clean($str);
11153
        }
11154
11155 6
        if ($encoding === 'UTF-8') {
11156 4
            return (string) (\mb_strtolower($str) ^ \mb_strtoupper($str) ^ $str);
11157
        }
11158
11159 4
        return (string) (self::strtolower($str, $encoding) ^ self::strtoupper($str, $encoding) ^ $str);
11160
    }
11161
11162
    /**
11163
     * Checks whether symfony-polyfills are used.
11164
     *
11165
     * @return bool
11166
     *              <strong>true</strong> if in use, <strong>false</strong> otherwise
11167
     */
11168
    public static function symfony_polyfill_used(): bool
11169
    {
11170
        // init
11171
        $return = false;
11172
11173
        $return_tmp = \extension_loaded('mbstring');
11174
        if ($return_tmp === false && \function_exists('mb_strlen')) {
11175
            $return = true;
11176
        }
11177
11178
        $return_tmp = \extension_loaded('iconv');
11179
        if ($return_tmp === false && \function_exists('iconv')) {
11180
            $return = true;
11181
        }
11182
11183
        return $return;
11184
    }
11185
11186
    /**
11187
     * @param string $str
11188
     * @param int    $tab_length
11189
     *
11190
     * @return string
11191
     */
11192
    public static function tabs_to_spaces(string $str, int $tab_length = 4): string
11193
    {
11194 6
        if ($tab_length === 4) {
11195 3
            $spaces = '    ';
11196 3
        } elseif ($tab_length === 2) {
11197 1
            $spaces = '  ';
11198
        } else {
11199 2
            $spaces = \str_repeat(' ', $tab_length);
11200
        }
11201
11202 6
        return \str_replace("\t", $spaces, $str);
11203
    }
11204
11205
    /**
11206
     * Converts the first character of each word in the string to uppercase
11207
     * and all other chars to lowercase.
11208
     *
11209
     * @param string      $str                           <p>The input string.</p>
11210
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
11211
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
11212
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt, tr</p>
11213
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
11214
     *
11215
     * @return string
11216
     *                <p>A string with all characters of $str being title-cased.</p>
11217
     */
11218
    public static function titlecase(
11219
        string $str,
11220
        string $encoding = 'UTF-8',
11221
        bool $clean_utf8 = false,
11222
        string $lang = null,
11223
        bool $try_to_keep_the_string_length = false
11224
    ): string {
11225 5
        if ($clean_utf8 === true) {
11226
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11227
            // if invalid characters are found in $haystack before $needle
11228
            $str = self::clean($str);
11229
        }
11230
11231
        if (
11232 5
            $lang === null
11233
            &&
11234 5
            $try_to_keep_the_string_length === false
11235
        ) {
11236 5
            if ($encoding === 'UTF-8') {
11237 3
                return \mb_convert_case($str, \MB_CASE_TITLE);
11238
            }
11239
11240 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11241
11242 2
            return \mb_convert_case($str, \MB_CASE_TITLE, $encoding);
11243
        }
11244
11245
        return self::str_titleize(
11246
            $str,
11247
            null,
11248
            $encoding,
11249
            false,
11250
            $lang,
11251
            $try_to_keep_the_string_length,
11252
            false
11253
        );
11254
    }
11255
11256
    /**
11257
     * alias for "UTF8::to_ascii()"
11258
     *
11259
     * @param string $str
11260
     * @param string $subst_chr
11261
     * @param bool   $strict
11262
     *
11263
     * @return string
11264
     *
11265
     * @see UTF8::to_ascii()
11266
     * @deprecated <p>please use "UTF8::to_ascii()"</p>
11267
     */
11268
    public static function toAscii(
11269
        string $str,
11270
        string $subst_chr = '?',
11271
        bool $strict = false
11272
    ): string {
11273 7
        return self::to_ascii($str, $subst_chr, $strict);
11274
    }
11275
11276
    /**
11277
     * alias for "UTF8::to_iso8859()"
11278
     *
11279
     * @param string|string[] $str
11280
     *
11281
     * @return string|string[]
11282
     *
11283
     * @see UTF8::to_iso8859()
11284
     * @deprecated <p>please use "UTF8::to_iso8859()"</p>
11285
     */
11286
    public static function toIso8859($str)
11287
    {
11288 2
        return self::to_iso8859($str);
11289
    }
11290
11291
    /**
11292
     * alias for "UTF8::to_latin1()"
11293
     *
11294
     * @param string|string[] $str
11295
     *
11296
     * @return string|string[]
11297
     *
11298
     * @see UTF8::to_iso8859()
11299
     * @deprecated <p>please use "UTF8::to_iso8859()"</p>
11300
     */
11301
    public static function toLatin1($str)
11302
    {
11303 2
        return self::to_iso8859($str);
11304
    }
11305
11306
    /**
11307
     * alias for "UTF8::to_utf8()"
11308
     *
11309
     * @param string|string[] $str
11310
     *
11311
     * @return string|string[]
11312
     *
11313
     * @see UTF8::to_utf8()
11314
     * @deprecated <p>please use "UTF8::to_utf8()"</p>
11315
     */
11316
    public static function toUTF8($str)
11317
    {
11318 2
        return self::to_utf8($str);
11319
    }
11320
11321
    /**
11322
     * Convert a string into ASCII.
11323
     *
11324
     * @param string $str     <p>The input string.</p>
11325
     * @param string $unknown [optional] <p>Character use if character unknown. (default is ?)</p>
11326
     * @param bool   $strict  [optional] <p>Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad
11327
     *                        performance</p>
11328
     *
11329
     * @return string
11330
     */
11331
    public static function to_ascii(
11332
        string $str,
11333
        string $unknown = '?',
11334
        bool $strict = false
11335
    ): string {
11336 37
        return ASCII::to_transliterate($str, $unknown, $strict);
11337
    }
11338
11339
    /**
11340
     * @param mixed $str
11341
     *
11342
     * @return bool
11343
     */
11344
    public static function to_boolean($str): bool
11345
    {
11346
        // init
11347 19
        $str = (string) $str;
11348
11349 19
        if ($str === '') {
11350 2
            return false;
11351
        }
11352
11353
        // Info: http://php.net/manual/en/filter.filters.validate.php
11354
        $map = [
11355 17
            'true'  => true,
11356
            '1'     => true,
11357
            'on'    => true,
11358
            'yes'   => true,
11359
            'false' => false,
11360
            '0'     => false,
11361
            'off'   => false,
11362
            'no'    => false,
11363
        ];
11364
11365 17
        if (isset($map[$str])) {
11366 11
            return $map[$str];
11367
        }
11368
11369 6
        $key = \strtolower($str);
11370 6
        if (isset($map[$key])) {
11371 2
            return $map[$key];
11372
        }
11373
11374 4
        if (\is_numeric($str)) {
11375 2
            return ((float) $str + 0) > 0;
11376
        }
11377
11378 2
        return (bool) \trim($str);
11379
    }
11380
11381
    /**
11382
     * Convert given string to safe filename (and keep string case).
11383
     *
11384
     * @param string $str
11385
     * @param bool   $use_transliterate No transliteration, conversion etc. is done by default - unsafe characters are
11386
     *                                  simply replaced with hyphen.
11387
     * @param string $fallback_char
11388
     *
11389
     * @return string
11390
     */
11391
    public static function to_filename(
11392
        string $str,
11393
        bool $use_transliterate = false,
11394
        string $fallback_char = '-'
11395
    ): string {
11396 1
        return ASCII::to_filename(
11397 1
            $str,
11398 1
            $use_transliterate,
11399 1
            $fallback_char
11400
        );
11401
    }
11402
11403
    /**
11404
     * Convert a string into "ISO-8859"-encoding (Latin-1).
11405
     *
11406
     * @param string|string[] $str
11407
     *
11408
     * @return string|string[]
11409
     */
11410
    public static function to_iso8859($str)
11411
    {
11412 8
        if (\is_array($str) === true) {
11413 2
            foreach ($str as $k => &$v) {
11414 2
                $v = self::to_iso8859($v);
11415
            }
11416
11417 2
            return $str;
11418
        }
11419
11420 8
        $str = (string) $str;
11421 8
        if ($str === '') {
11422 2
            return '';
11423
        }
11424
11425 8
        return self::utf8_decode($str);
11426
    }
11427
11428
    /**
11429
     * alias for "UTF8::to_iso8859()"
11430
     *
11431
     * @param string|string[] $str
11432
     *
11433
     * @return string|string[]
11434
     *
11435
     * @see UTF8::to_iso8859()
11436
     * @deprecated <p>please use "UTF8::to_iso8859()"</p>
11437
     */
11438
    public static function to_latin1($str)
11439
    {
11440 2
        return self::to_iso8859($str);
11441
    }
11442
11443
    /**
11444
     * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
11445
     *
11446
     * <ul>
11447
     * <li>It decode UTF-8 codepoints and Unicode escape sequences.</li>
11448
     * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li>
11449
     * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
11450
     * case.</li>
11451
     * </ul>
11452
     *
11453
     * @param string|string[] $str                        <p>Any string or array.</p>
11454
     * @param bool            $decode_html_entity_to_utf8 <p>Set to true, if you need to decode html-entities.</p>
11455
     *
11456
     * @return string|string[] the UTF-8 encoded string
11457
     */
11458
    public static function to_utf8($str, bool $decode_html_entity_to_utf8 = false)
11459
    {
11460 43
        if (\is_array($str) === true) {
11461 4
            foreach ($str as $k => &$v) {
11462 4
                $v = self::to_utf8($v, $decode_html_entity_to_utf8);
11463
            }
11464
11465 4
            return $str;
11466
        }
11467
11468 43
        $str = (string) $str;
11469 43
        if ($str === '') {
11470 7
            return $str;
11471
        }
11472
11473 43
        $max = \strlen($str);
11474 43
        $buf = '';
11475
11476 43
        for ($i = 0; $i < $max; ++$i) {
11477 43
            $c1 = $str[$i];
11478
11479 43
            if ($c1 >= "\xC0") { // should be converted to UTF8, if it's not UTF8 already
11480
11481 39
                if ($c1 <= "\xDF") { // looks like 2 bytes UTF8
11482
11483 36
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
11484
11485 36
                    if ($c2 >= "\x80" && $c2 <= "\xBF") { // yeah, almost sure it's UTF8 already
11486 22
                        $buf .= $c1 . $c2;
11487 22
                        ++$i;
11488
                    } else { // not valid UTF8 - convert it
11489 36
                        $buf .= self::to_utf8_convert_helper($c1);
11490
                    }
11491 36
                } elseif ($c1 >= "\xE0" && $c1 <= "\xEF") { // looks like 3 bytes UTF8
11492
11493 35
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
11494 35
                    $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
11495
11496 35
                    if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF") { // yeah, almost sure it's UTF8 already
11497 17
                        $buf .= $c1 . $c2 . $c3;
11498 17
                        $i += 2;
11499
                    } else { // not valid UTF8 - convert it
11500 35
                        $buf .= self::to_utf8_convert_helper($c1);
11501
                    }
11502 28
                } elseif ($c1 >= "\xF0" && $c1 <= "\xF7") { // looks like 4 bytes UTF8
11503
11504 28
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
11505 28
                    $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
11506 28
                    $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
11507
11508 28
                    if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF" && $c4 >= "\x80" && $c4 <= "\xBF") { // yeah, almost sure it's UTF8 already
11509 10
                        $buf .= $c1 . $c2 . $c3 . $c4;
11510 10
                        $i += 3;
11511
                    } else { // not valid UTF8 - convert it
11512 28
                        $buf .= self::to_utf8_convert_helper($c1);
11513
                    }
11514
                } else { // doesn't look like UTF8, but should be converted
11515
11516 39
                    $buf .= self::to_utf8_convert_helper($c1);
11517
                }
11518 40
            } elseif (($c1 & "\xC0") === "\x80") { // needs conversion
11519
11520 4
                $buf .= self::to_utf8_convert_helper($c1);
11521
            } else { // it doesn't need conversion
11522
11523 40
                $buf .= $c1;
11524
            }
11525
        }
11526
11527
        // decode unicode escape sequences + unicode surrogate pairs
11528 43
        $buf = \preg_replace_callback(
11529 43
            '/\\\\u([dD][89abAB][0-9a-fA-F]{2})\\\\u([dD][cdefCDEF][\da-fA-F]{2})|\\\\u([0-9a-fA-F]{4})/',
11530
            /**
11531
             * @param array $matches
11532
             *
11533
             * @return string
11534
             */
11535
            static function (array $matches): string {
11536 13
                if (isset($matches[3])) {
11537 13
                    $cp = (int) \hexdec($matches[3]);
11538
                } else {
11539
                    // http://unicode.org/faq/utf_bom.html#utf16-4
11540 1
                    $cp = ((int) \hexdec($matches[1]) << 10)
11541 1
                          + (int) \hexdec($matches[2])
11542 1
                          + 0x10000
11543 1
                          - (0xD800 << 10)
11544 1
                          - 0xDC00;
11545
                }
11546
11547
                // https://github.com/php/php-src/blob/php-7.3.2/ext/standard/html.c#L471
11548
                //
11549
                // php_utf32_utf8(unsigned char *buf, unsigned k)
11550
11551 13
                if ($cp < 0x80) {
11552 8
                    return (string) self::chr($cp);
11553
                }
11554
11555 10
                if ($cp < 0xA0) {
11556
                    /** @noinspection UnnecessaryCastingInspection */
11557
                    return (string) self::chr(0xC0 | $cp >> 6) . (string) self::chr(0x80 | $cp & 0x3F);
11558
                }
11559
11560 10
                return self::decimal_to_chr($cp);
11561 43
            },
11562 43
            $buf
11563
        );
11564
11565 43
        if ($buf === null) {
11566
            return '';
11567
        }
11568
11569
        // decode UTF-8 codepoints
11570 43
        if ($decode_html_entity_to_utf8 === true) {
11571 3
            $buf = self::html_entity_decode($buf);
11572
        }
11573
11574 43
        return $buf;
11575
    }
11576
11577
    /**
11578
     * Strip whitespace or other characters from the beginning and end of a UTF-8 string.
11579
     *
11580
     * INFO: This is slower then "trim()"
11581
     *
11582
     * We can only use the original-function, if we use <= 7-Bit in the string / chars
11583
     * but the check for ASCII (7-Bit) cost more time, then we can safe here.
11584
     *
11585
     * @param string      $str   <p>The string to be trimmed</p>
11586
     * @param string|null $chars [optional] <p>Optional characters to be stripped</p>
11587
     *
11588
     * @return string the trimmed string
11589
     */
11590
    public static function trim(string $str = '', string $chars = null): string
11591
    {
11592 56
        if ($str === '') {
11593 9
            return '';
11594
        }
11595
11596 49
        if (self::$SUPPORT['mbstring'] === true) {
11597 49
            if ($chars) {
11598
                /** @noinspection PregQuoteUsageInspection */
11599 27
                $chars = \preg_quote($chars);
11600 27
                $pattern = "^[${chars}]+|[${chars}]+\$";
11601
            } else {
11602 22
                $pattern = '^[\\s]+|[\\s]+$';
11603
            }
11604
11605
            /** @noinspection PhpComposerExtensionStubsInspection */
11606 49
            return (string) \mb_ereg_replace($pattern, '', $str);
11607
        }
11608
11609 8
        if ($chars) {
11610
            $chars = \preg_quote($chars, '/');
11611
            $pattern = "^[${chars}]+|[${chars}]+\$";
11612
        } else {
11613 8
            $pattern = '^[\\s]+|[\\s]+$';
11614
        }
11615
11616 8
        return self::regex_replace($str, $pattern, '', '', '/');
11617
    }
11618
11619
    /**
11620
     * Makes string's first char uppercase.
11621
     *
11622
     * @param string      $str                           <p>The input string.</p>
11623
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
11624
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
11625
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt, tr</p>
11626
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
11627
     *
11628
     * @return string the resulting string
11629
     */
11630
    public static function ucfirst(
11631
        string $str,
11632
        string $encoding = 'UTF-8',
11633
        bool $clean_utf8 = false,
11634
        string $lang = null,
11635
        bool $try_to_keep_the_string_length = false
11636
    ): string {
11637 69
        if ($str === '') {
11638 3
            return '';
11639
        }
11640
11641 68
        if ($clean_utf8 === true) {
11642
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11643
            // if invalid characters are found in $haystack before $needle
11644 1
            $str = self::clean($str);
11645
        }
11646
11647 68
        $use_mb_functions = $lang === null && $try_to_keep_the_string_length === false;
11648
11649 68
        if ($encoding === 'UTF-8') {
11650 22
            $str_part_two = (string) \mb_substr($str, 1);
11651
11652 22
            if ($use_mb_functions === true) {
11653 22
                $str_part_one = \mb_strtoupper(
11654 22
                    (string) \mb_substr($str, 0, 1)
11655
                );
11656
            } else {
11657
                $str_part_one = self::strtoupper(
11658
                    (string) \mb_substr($str, 0, 1),
11659
                    $encoding,
11660
                    false,
11661
                    $lang,
11662 22
                    $try_to_keep_the_string_length
11663
                );
11664
            }
11665
        } else {
11666 47
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11667
11668 47
            $str_part_two = (string) self::substr($str, 1, null, $encoding);
11669
11670 47
            if ($use_mb_functions === true) {
11671 47
                $str_part_one = \mb_strtoupper(
11672 47
                    (string) \mb_substr($str, 0, 1, $encoding),
11673 47
                    $encoding
11674
                );
11675
            } else {
11676
                $str_part_one = self::strtoupper(
11677
                    (string) self::substr($str, 0, 1, $encoding),
11678
                    $encoding,
11679
                    false,
11680
                    $lang,
11681
                    $try_to_keep_the_string_length
11682
                );
11683
            }
11684
        }
11685
11686 68
        return $str_part_one . $str_part_two;
11687
    }
11688
11689
    /**
11690
     * alias for "UTF8::ucfirst()"
11691
     *
11692
     * @param string $str
11693
     * @param string $encoding
11694
     * @param bool   $clean_utf8
11695
     *
11696
     * @return string
11697
     *
11698
     * @see UTF8::ucfirst()
11699
     * @deprecated <p>please use "UTF8::ucfirst()"</p>
11700
     */
11701
    public static function ucword(
11702
        string $str,
11703
        string $encoding = 'UTF-8',
11704
        bool $clean_utf8 = false
11705
    ): string {
11706 1
        return self::ucfirst($str, $encoding, $clean_utf8);
11707
    }
11708
11709
    /**
11710
     * Uppercase for all words in the string.
11711
     *
11712
     * @param string   $str        <p>The input string.</p>
11713
     * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
11714
     * @param string   $char_list  [optional] <p>Additional chars that contains to words and do not start a new
11715
     *                             word.</p>
11716
     * @param string   $encoding   [optional] <p>Set the charset.</p>
11717
     * @param bool     $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
11718
     *
11719
     * @return string
11720
     */
11721
    public static function ucwords(
11722
        string $str,
11723
        array $exceptions = [],
11724
        string $char_list = '',
11725
        string $encoding = 'UTF-8',
11726
        bool $clean_utf8 = false
11727
    ): string {
11728 8
        if (!$str) {
11729 2
            return '';
11730
        }
11731
11732
        // INFO: mb_convert_case($str, MB_CASE_TITLE);
11733
        // -> MB_CASE_TITLE didn't only uppercase the first letter, it also lowercase all other letters
11734
11735 7
        if ($clean_utf8 === true) {
11736
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11737
            // if invalid characters are found in $haystack before $needle
11738 1
            $str = self::clean($str);
11739
        }
11740
11741 7
        $use_php_default_functions = !(bool) ($char_list . \implode('', $exceptions));
11742
11743
        if (
11744 7
            $use_php_default_functions === true
11745
            &&
11746 7
            ASCII::is_ascii($str) === true
11747
        ) {
11748
            return \ucwords($str);
11749
        }
11750
11751 7
        $words = self::str_to_words($str, $char_list);
11752 7
        $use_exceptions = $exceptions !== [];
11753
11754 7
        $words_str = '';
11755 7
        foreach ($words as &$word) {
11756 7
            if (!$word) {
11757 7
                continue;
11758
            }
11759
11760
            if (
11761 7
                $use_exceptions === false
11762
                ||
11763 7
                !\in_array($word, $exceptions, true)
11764
            ) {
11765 7
                $words_str .= self::ucfirst($word, $encoding);
11766
            } else {
11767 7
                $words_str .= $word;
11768
            }
11769
        }
11770
11771 7
        return $words_str;
11772
    }
11773
11774
    /**
11775
     * Multi decode HTML entity + fix urlencoded-win1252-chars.
11776
     *
11777
     * e.g:
11778
     * 'test+test'                     => 'test test'
11779
     * 'D&#252;sseldorf'               => 'Düsseldorf'
11780
     * 'D%FCsseldorf'                  => 'Düsseldorf'
11781
     * 'D&#xFC;sseldorf'               => 'Düsseldorf'
11782
     * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
11783
     * 'Düsseldorf'                   => 'Düsseldorf'
11784
     * 'D%C3%BCsseldorf'               => 'Düsseldorf'
11785
     * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
11786
     * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
11787
     *
11788
     * @param string $str          <p>The input string.</p>
11789
     * @param bool   $multi_decode <p>Decode as often as possible.</p>
11790
     *
11791
     * @return string
11792
     */
11793
    public static function urldecode(string $str, bool $multi_decode = true): string
11794
    {
11795 4
        if ($str === '') {
11796 3
            return '';
11797
        }
11798
11799
        if (
11800 4
            \strpos($str, '&') === false
11801
            &&
11802 4
            \strpos($str, '%') === false
11803
            &&
11804 4
            \strpos($str, '+') === false
11805
            &&
11806 4
            \strpos($str, '\u') === false
11807
        ) {
11808 3
            return self::fix_simple_utf8($str);
11809
        }
11810
11811 4
        $str = self::urldecode_unicode_helper($str);
11812
11813 4
        if ($multi_decode === true) {
11814
            do {
11815 3
                $str_compare = $str;
11816
11817
                /**
11818
                 * @psalm-suppress PossiblyInvalidArgument
11819
                 */
11820 3
                $str = self::fix_simple_utf8(
11821 3
                    \urldecode(
11822 3
                        self::html_entity_decode(
11823 3
                            self::to_utf8($str),
11824 3
                            \ENT_QUOTES | \ENT_HTML5
11825
                        )
11826
                    )
11827
                );
11828 3
            } while ($str_compare !== $str);
11829
        } else {
11830
            /**
11831
             * @psalm-suppress PossiblyInvalidArgument
11832
             */
11833 1
            $str = self::fix_simple_utf8(
11834 1
                \urldecode(
11835 1
                    self::html_entity_decode(
11836 1
                        self::to_utf8($str),
11837 1
                        \ENT_QUOTES | \ENT_HTML5
11838
                    )
11839
                )
11840
            );
11841
        }
11842
11843 4
        return $str;
11844
    }
11845
11846
    /**
11847
     * Return a array with "urlencoded"-win1252 -> UTF-8
11848
     *
11849
     * @return string[]
11850
     *
11851
     * @deprecated <p>please use the "UTF8::urldecode()" function to decode a string</p>
11852
     */
11853
    public static function urldecode_fix_win1252_chars(): array
11854
    {
11855
        return [
11856 2
            '%20' => ' ',
11857
            '%21' => '!',
11858
            '%22' => '"',
11859
            '%23' => '#',
11860
            '%24' => '$',
11861
            '%25' => '%',
11862
            '%26' => '&',
11863
            '%27' => "'",
11864
            '%28' => '(',
11865
            '%29' => ')',
11866
            '%2A' => '*',
11867
            '%2B' => '+',
11868
            '%2C' => ',',
11869
            '%2D' => '-',
11870
            '%2E' => '.',
11871
            '%2F' => '/',
11872
            '%30' => '0',
11873
            '%31' => '1',
11874
            '%32' => '2',
11875
            '%33' => '3',
11876
            '%34' => '4',
11877
            '%35' => '5',
11878
            '%36' => '6',
11879
            '%37' => '7',
11880
            '%38' => '8',
11881
            '%39' => '9',
11882
            '%3A' => ':',
11883
            '%3B' => ';',
11884
            '%3C' => '<',
11885
            '%3D' => '=',
11886
            '%3E' => '>',
11887
            '%3F' => '?',
11888
            '%40' => '@',
11889
            '%41' => 'A',
11890
            '%42' => 'B',
11891
            '%43' => 'C',
11892
            '%44' => 'D',
11893
            '%45' => 'E',
11894
            '%46' => 'F',
11895
            '%47' => 'G',
11896
            '%48' => 'H',
11897
            '%49' => 'I',
11898
            '%4A' => 'J',
11899
            '%4B' => 'K',
11900
            '%4C' => 'L',
11901
            '%4D' => 'M',
11902
            '%4E' => 'N',
11903
            '%4F' => 'O',
11904
            '%50' => 'P',
11905
            '%51' => 'Q',
11906
            '%52' => 'R',
11907
            '%53' => 'S',
11908
            '%54' => 'T',
11909
            '%55' => 'U',
11910
            '%56' => 'V',
11911
            '%57' => 'W',
11912
            '%58' => 'X',
11913
            '%59' => 'Y',
11914
            '%5A' => 'Z',
11915
            '%5B' => '[',
11916
            '%5C' => '\\',
11917
            '%5D' => ']',
11918
            '%5E' => '^',
11919
            '%5F' => '_',
11920
            '%60' => '`',
11921
            '%61' => 'a',
11922
            '%62' => 'b',
11923
            '%63' => 'c',
11924
            '%64' => 'd',
11925
            '%65' => 'e',
11926
            '%66' => 'f',
11927
            '%67' => 'g',
11928
            '%68' => 'h',
11929
            '%69' => 'i',
11930
            '%6A' => 'j',
11931
            '%6B' => 'k',
11932
            '%6C' => 'l',
11933
            '%6D' => 'm',
11934
            '%6E' => 'n',
11935
            '%6F' => 'o',
11936
            '%70' => 'p',
11937
            '%71' => 'q',
11938
            '%72' => 'r',
11939
            '%73' => 's',
11940
            '%74' => 't',
11941
            '%75' => 'u',
11942
            '%76' => 'v',
11943
            '%77' => 'w',
11944
            '%78' => 'x',
11945
            '%79' => 'y',
11946
            '%7A' => 'z',
11947
            '%7B' => '{',
11948
            '%7C' => '|',
11949
            '%7D' => '}',
11950
            '%7E' => '~',
11951
            '%7F' => '',
11952
            '%80' => '`',
11953
            '%81' => '',
11954
            '%82' => '‚',
11955
            '%83' => 'ƒ',
11956
            '%84' => '„',
11957
            '%85' => '…',
11958
            '%86' => '†',
11959
            '%87' => '‡',
11960
            '%88' => 'ˆ',
11961
            '%89' => '‰',
11962
            '%8A' => 'Š',
11963
            '%8B' => '‹',
11964
            '%8C' => 'Œ',
11965
            '%8D' => '',
11966
            '%8E' => 'Ž',
11967
            '%8F' => '',
11968
            '%90' => '',
11969
            '%91' => '‘',
11970
            '%92' => '’',
11971
            '%93' => '“',
11972
            '%94' => '”',
11973
            '%95' => '•',
11974
            '%96' => '–',
11975
            '%97' => '—',
11976
            '%98' => '˜',
11977
            '%99' => '™',
11978
            '%9A' => 'š',
11979
            '%9B' => '›',
11980
            '%9C' => 'œ',
11981
            '%9D' => '',
11982
            '%9E' => 'ž',
11983
            '%9F' => 'Ÿ',
11984
            '%A0' => '',
11985
            '%A1' => '¡',
11986
            '%A2' => '¢',
11987
            '%A3' => '£',
11988
            '%A4' => '¤',
11989
            '%A5' => '¥',
11990
            '%A6' => '¦',
11991
            '%A7' => '§',
11992
            '%A8' => '¨',
11993
            '%A9' => '©',
11994
            '%AA' => 'ª',
11995
            '%AB' => '«',
11996
            '%AC' => '¬',
11997
            '%AD' => '',
11998
            '%AE' => '®',
11999
            '%AF' => '¯',
12000
            '%B0' => '°',
12001
            '%B1' => '±',
12002
            '%B2' => '²',
12003
            '%B3' => '³',
12004
            '%B4' => '´',
12005
            '%B5' => 'µ',
12006
            '%B6' => '¶',
12007
            '%B7' => '·',
12008
            '%B8' => '¸',
12009
            '%B9' => '¹',
12010
            '%BA' => 'º',
12011
            '%BB' => '»',
12012
            '%BC' => '¼',
12013
            '%BD' => '½',
12014
            '%BE' => '¾',
12015
            '%BF' => '¿',
12016
            '%C0' => 'À',
12017
            '%C1' => 'Á',
12018
            '%C2' => 'Â',
12019
            '%C3' => 'Ã',
12020
            '%C4' => 'Ä',
12021
            '%C5' => 'Å',
12022
            '%C6' => 'Æ',
12023
            '%C7' => 'Ç',
12024
            '%C8' => 'È',
12025
            '%C9' => 'É',
12026
            '%CA' => 'Ê',
12027
            '%CB' => 'Ë',
12028
            '%CC' => 'Ì',
12029
            '%CD' => 'Í',
12030
            '%CE' => 'Î',
12031
            '%CF' => 'Ï',
12032
            '%D0' => 'Ð',
12033
            '%D1' => 'Ñ',
12034
            '%D2' => 'Ò',
12035
            '%D3' => 'Ó',
12036
            '%D4' => 'Ô',
12037
            '%D5' => 'Õ',
12038
            '%D6' => 'Ö',
12039
            '%D7' => '×',
12040
            '%D8' => 'Ø',
12041
            '%D9' => 'Ù',
12042
            '%DA' => 'Ú',
12043
            '%DB' => 'Û',
12044
            '%DC' => 'Ü',
12045
            '%DD' => 'Ý',
12046
            '%DE' => 'Þ',
12047
            '%DF' => 'ß',
12048
            '%E0' => 'à',
12049
            '%E1' => 'á',
12050
            '%E2' => 'â',
12051
            '%E3' => 'ã',
12052
            '%E4' => 'ä',
12053
            '%E5' => 'å',
12054
            '%E6' => 'æ',
12055
            '%E7' => 'ç',
12056
            '%E8' => 'è',
12057
            '%E9' => 'é',
12058
            '%EA' => 'ê',
12059
            '%EB' => 'ë',
12060
            '%EC' => 'ì',
12061
            '%ED' => 'í',
12062
            '%EE' => 'î',
12063
            '%EF' => 'ï',
12064
            '%F0' => 'ð',
12065
            '%F1' => 'ñ',
12066
            '%F2' => 'ò',
12067
            '%F3' => 'ó',
12068
            '%F4' => 'ô',
12069
            '%F5' => 'õ',
12070
            '%F6' => 'ö',
12071
            '%F7' => '÷',
12072
            '%F8' => 'ø',
12073
            '%F9' => 'ù',
12074
            '%FA' => 'ú',
12075
            '%FB' => 'û',
12076
            '%FC' => 'ü',
12077
            '%FD' => 'ý',
12078
            '%FE' => 'þ',
12079
            '%FF' => 'ÿ',
12080
        ];
12081
    }
12082
12083
    /**
12084
     * Decodes a UTF-8 string to ISO-8859-1.
12085
     *
12086
     * @param string $str             <p>The input string.</p>
12087
     * @param bool   $keep_utf8_chars
12088
     *
12089
     * @return string
12090
     */
12091
    public static function utf8_decode(string $str, bool $keep_utf8_chars = false): string
12092
    {
12093 14
        if ($str === '') {
12094 6
            return '';
12095
        }
12096
12097
        // save for later comparision
12098 14
        $str_backup = $str;
12099 14
        $len = \strlen($str);
12100
12101 14
        if (self::$ORD === null) {
12102
            self::$ORD = self::getData('ord');
12103
        }
12104
12105 14
        if (self::$CHR === null) {
12106
            self::$CHR = self::getData('chr');
12107
        }
12108
12109 14
        $no_char_found = '?';
12110
        /** @noinspection ForeachInvariantsInspection */
12111 14
        for ($i = 0, $j = 0; $i < $len; ++$i, ++$j) {
12112 14
            switch ($str[$i] & "\xF0") {
12113 14
                case "\xC0":
12114 13
                case "\xD0":
12115 13
                    $c = (self::$ORD[$str[$i] & "\x1F"] << 6) | self::$ORD[$str[++$i] & "\x3F"];
12116 13
                    $str[$j] = $c < 256 ? self::$CHR[$c] : $no_char_found;
12117
12118 13
                    break;
12119
12120
                /** @noinspection PhpMissingBreakStatementInspection */
12121 13
                case "\xF0":
12122
                    ++$i;
12123
12124
                // no break
12125
12126 13
                case "\xE0":
12127 11
                    $str[$j] = $no_char_found;
12128 11
                    $i += 2;
12129
12130 11
                    break;
12131
12132
                default:
12133 12
                    $str[$j] = $str[$i];
12134
            }
12135
        }
12136
12137
        /** @var false|string $return - needed for PhpStan (stubs error) */
12138 14
        $return = \substr($str, 0, $j);
12139 14
        if ($return === false) {
12140
            $return = '';
12141
        }
12142
12143
        if (
12144 14
            $keep_utf8_chars === true
12145
            &&
12146 14
            (int) self::strlen($return) >= (int) self::strlen($str_backup)
12147
        ) {
12148 2
            return $str_backup;
12149
        }
12150
12151 14
        return $return;
12152
    }
12153
12154
    /**
12155
     * Encodes an ISO-8859-1 string to UTF-8.
12156
     *
12157
     * @param string $str <p>The input string.</p>
12158
     *
12159
     * @return string
12160
     */
12161
    public static function utf8_encode(string $str): string
12162
    {
12163 14
        if ($str === '') {
12164 14
            return '';
12165
        }
12166
12167
        /** @var false|string $str - the polyfill maybe return false */
12168 14
        $str = \utf8_encode($str);
0 ignored issues
show
Bug introduced by
It seems like $str can also be of type false; however, parameter $data of utf8_encode() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

12168
        $str = \utf8_encode(/** @scrutinizer ignore-type */ $str);
Loading history...
12169
12170
        /** @noinspection CallableParameterUseCaseInTypeContextInspection */
12171
        /** @psalm-suppress TypeDoesNotContainType */
12172 14
        if ($str === false) {
12173
            return '';
12174
        }
12175
12176 14
        return $str;
12177
    }
12178
12179
    /**
12180
     * fix -> utf8-win1252 chars
12181
     *
12182
     * @param string $str <p>The input string.</p>
12183
     *
12184
     * @return string
12185
     *
12186
     * @deprecated <p>please use "UTF8::fix_simple_utf8()"</p>
12187
     */
12188
    public static function utf8_fix_win1252_chars(string $str): string
12189
    {
12190 2
        return self::fix_simple_utf8($str);
12191
    }
12192
12193
    /**
12194
     * Returns an array with all utf8 whitespace characters.
12195
     *
12196
     * @see http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
12197
     *
12198
     * @return string[]
12199
     *                  An array with all known whitespace characters as values and the type of whitespace as keys
12200
     *                  as defined in above URL
12201
     */
12202
    public static function whitespace_table(): array
12203
    {
12204 2
        return self::$WHITESPACE_TABLE;
12205
    }
12206
12207
    /**
12208
     * Limit the number of words in a string.
12209
     *
12210
     * @param string $str        <p>The input string.</p>
12211
     * @param int    $limit      <p>The limit of words as integer.</p>
12212
     * @param string $str_add_on <p>Replacement for the striped string.</p>
12213
     *
12214
     * @return string
12215
     */
12216
    public static function words_limit(
12217
        string $str,
12218
        int $limit = 100,
12219
        string $str_add_on = '…'
12220
    ): string {
12221 2
        if ($str === '' || $limit < 1) {
12222 2
            return '';
12223
        }
12224
12225 2
        \preg_match('/^\\s*+(?:[^\\s]++\\s*+){1,' . $limit . '}/u', $str, $matches);
12226
12227
        if (
12228 2
            !isset($matches[0])
12229
            ||
12230 2
            \mb_strlen($str) === (int) \mb_strlen($matches[0])
12231
        ) {
12232 2
            return $str;
12233
        }
12234
12235 2
        return \rtrim($matches[0]) . $str_add_on;
12236
    }
12237
12238
    /**
12239
     * Wraps a string to a given number of characters
12240
     *
12241
     * @see http://php.net/manual/en/function.wordwrap.php
12242
     *
12243
     * @param string $str   <p>The input string.</p>
12244
     * @param int    $width [optional] <p>The column width.</p>
12245
     * @param string $break [optional] <p>The line is broken using the optional break parameter.</p>
12246
     * @param bool   $cut   [optional] <p>
12247
     *                      If the cut is set to true, the string is
12248
     *                      always wrapped at or before the specified width. So if you have
12249
     *                      a word that is larger than the given width, it is broken apart.
12250
     *                      </p>
12251
     *
12252
     * @return string
12253
     *                <p>The given string wrapped at the specified column.</p>
12254
     */
12255
    public static function wordwrap(
12256
        string $str,
12257
        int $width = 75,
12258
        string $break = "\n",
12259
        bool $cut = false
12260
    ): string {
12261 12
        if ($str === '' || $break === '') {
12262 4
            return '';
12263
        }
12264
12265 10
        $str_split = \explode($break, $str);
12266 10
        if ($str_split === false) {
12267
            return '';
12268
        }
12269
12270 10
        $chars = [];
12271 10
        $word_split = '';
12272 10
        foreach ($str_split as $i => $i_value) {
12273 10
            if ($i) {
12274 3
                $chars[] = $break;
12275 3
                $word_split .= '#';
12276
            }
12277
12278 10
            foreach (self::str_split($i_value) as $c) {
12279 10
                $chars[] = $c;
12280 10
                if ($c === ' ') {
12281 3
                    $word_split .= ' ';
12282
                } else {
12283 10
                    $word_split .= '?';
12284
                }
12285
            }
12286
        }
12287
12288 10
        $str_return = '';
12289 10
        $j = 0;
12290 10
        $b = -1;
12291 10
        $i = -1;
12292 10
        $word_split = \wordwrap($word_split, $width, '#', $cut);
12293
12294 10
        $max = \mb_strlen($word_split);
12295 10
        while (($b = \mb_strpos($word_split, '#', $b + 1)) !== false) {
12296 8
            for (++$i; $i < $b; ++$i) {
12297 8
                $str_return .= $chars[$j];
12298 8
                unset($chars[$j++]);
12299
12300
                // prevent endless loop, e.g. if there is a error in the "mb_*" polyfill
12301 8
                if ($i > $max) {
12302
                    break 2;
12303
                }
12304
            }
12305
12306
            if (
12307 8
                $break === $chars[$j]
12308
                ||
12309 8
                $chars[$j] === ' '
12310
            ) {
12311 5
                unset($chars[$j++]);
12312
            }
12313
12314 8
            $str_return .= $break;
12315
12316
            // prevent endless loop, e.g. if there is a error in the "mb_*" polyfill
12317 8
            if ($b > $max) {
12318
                break;
12319
            }
12320
        }
12321
12322 10
        return $str_return . \implode('', $chars);
12323
    }
12324
12325
    /**
12326
     * Line-Wrap the string after $limit, but split the string by "$delimiter" before ...
12327
     *    ... so that we wrap the per line.
12328
     *
12329
     * @param string      $str             <p>The input string.</p>
12330
     * @param int         $width           [optional] <p>The column width.</p>
12331
     * @param string      $break           [optional] <p>The line is broken using the optional break parameter.</p>
12332
     * @param bool        $cut             [optional] <p>
12333
     *                                     If the cut is set to true, the string is
12334
     *                                     always wrapped at or before the specified width. So if you have
12335
     *                                     a word that is larger than the given width, it is broken apart.
12336
     *                                     </p>
12337
     * @param bool        $add_final_break [optional] <p>
12338
     *                                     If this flag is true, then the method will add a $break at the end
12339
     *                                     of the result string.
12340
     *                                     </p>
12341
     * @param string|null $delimiter       [optional] <p>
12342
     *                                     You can change the default behavior, where we split the string by newline.
12343
     *                                     </p>
12344
     *
12345
     * @return string
12346
     */
12347
    public static function wordwrap_per_line(
12348
        string $str,
12349
        int $width = 75,
12350
        string $break = "\n",
12351
        bool $cut = false,
12352
        bool $add_final_break = true,
12353
        string $delimiter = null
12354
    ): string {
12355 1
        if ($delimiter === null) {
12356 1
            $strings = \preg_split('/\\r\\n|\\r|\\n/', $str);
12357
        } else {
12358 1
            $strings = \explode($delimiter, $str);
12359
        }
12360
12361 1
        $string_helper_array = [];
12362 1
        if ($strings !== false) {
12363 1
            foreach ($strings as $value) {
12364 1
                $string_helper_array[] = self::wordwrap($value, $width, $break, $cut);
12365
            }
12366
        }
12367
12368 1
        if ($add_final_break) {
12369 1
            $final_break = $break;
12370
        } else {
12371 1
            $final_break = '';
12372
        }
12373
12374 1
        return \implode($delimiter ?? "\n", $string_helper_array) . $final_break;
12375
    }
12376
12377
    /**
12378
     * Returns an array of Unicode White Space characters.
12379
     *
12380
     * @return string[] an array with numeric code point as key and White Space Character as value
12381
     */
12382
    public static function ws(): array
12383
    {
12384 2
        return self::$WHITESPACE;
12385
    }
12386
12387
    /**
12388
     * Checks whether the passed string contains only byte sequences that are valid UTF-8 characters.
12389
     *
12390
     * @see http://hsivonen.iki.fi/php-utf8/
12391
     *
12392
     * @param string $str    <p>The string to be checked.</p>
12393
     * @param bool   $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
12394
     *
12395
     * @return bool
12396
     *
12397
     * @noinspection ReturnTypeCanBeDeclaredInspection
12398
     */
12399
    private static function is_utf8_string(string $str, bool $strict = false)
12400
    {
12401 108
        if ($str === '') {
12402 14
            return true;
12403
        }
12404
12405 102
        if ($strict === true) {
12406 2
            $is_binary = self::is_binary($str, true);
12407
12408 2
            if ($is_binary && self::is_utf16($str, false) !== false) {
12409 2
                return false;
12410
            }
12411
12412
            if ($is_binary && self::is_utf32($str, false) !== false) {
12413
                return false;
12414
            }
12415
        }
12416
12417 102
        if (self::pcre_utf8_support() !== true) {
12418
            // If even just the first character can be matched, when the /u
12419
            // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
12420
            // invalid, nothing at all will match, even if the string contains
12421
            // some valid sequences
12422
            return \preg_match('/^./us', $str, $ar) === 1;
12423
        }
12424
12425 102
        $mState = 0; // cached expected number of octets after the current octet
12426
        // until the beginning of the next UTF8 character sequence
12427 102
        $mUcs4 = 0; // cached Unicode character
12428 102
        $mBytes = 1; // cached expected number of octets in the current sequence
12429
12430 102
        if (self::$ORD === null) {
12431
            self::$ORD = self::getData('ord');
12432
        }
12433
12434 102
        $len = \strlen($str);
12435
        /** @noinspection ForeachInvariantsInspection */
12436 102
        for ($i = 0; $i < $len; ++$i) {
12437 102
            $in = self::$ORD[$str[$i]];
12438
12439 102
            if ($mState === 0) {
12440
                // When mState is zero we expect either a US-ASCII character or a
12441
                // multi-octet sequence.
12442 102
                if ((0x80 & $in) === 0) {
12443
                    // US-ASCII, pass straight through.
12444 97
                    $mBytes = 1;
12445 83
                } elseif ((0xE0 & $in) === 0xC0) {
12446
                    // First octet of 2 octet sequence.
12447 73
                    $mUcs4 = $in;
12448 73
                    $mUcs4 = ($mUcs4 & 0x1F) << 6;
12449 73
                    $mState = 1;
12450 73
                    $mBytes = 2;
12451 58
                } elseif ((0xF0 & $in) === 0xE0) {
12452
                    // First octet of 3 octet sequence.
12453 42
                    $mUcs4 = $in;
12454 42
                    $mUcs4 = ($mUcs4 & 0x0F) << 12;
12455 42
                    $mState = 2;
12456 42
                    $mBytes = 3;
12457 29
                } elseif ((0xF8 & $in) === 0xF0) {
12458
                    // First octet of 4 octet sequence.
12459 18
                    $mUcs4 = $in;
12460 18
                    $mUcs4 = ($mUcs4 & 0x07) << 18;
12461 18
                    $mState = 3;
12462 18
                    $mBytes = 4;
12463 13
                } elseif ((0xFC & $in) === 0xF8) {
12464
                    /* First octet of 5 octet sequence.
12465
                     *
12466
                     * This is illegal because the encoded codepoint must be either
12467
                     * (a) not the shortest form or
12468
                     * (b) outside the Unicode range of 0-0x10FFFF.
12469
                     * Rather than trying to resynchronize, we will carry on until the end
12470
                     * of the sequence and let the later error handling code catch it.
12471
                     */
12472 5
                    $mUcs4 = $in;
12473 5
                    $mUcs4 = ($mUcs4 & 0x03) << 24;
12474 5
                    $mState = 4;
12475 5
                    $mBytes = 5;
12476 10
                } elseif ((0xFE & $in) === 0xFC) {
12477
                    // First octet of 6 octet sequence, see comments for 5 octet sequence.
12478 5
                    $mUcs4 = $in;
12479 5
                    $mUcs4 = ($mUcs4 & 1) << 30;
12480 5
                    $mState = 5;
12481 5
                    $mBytes = 6;
12482
                } else {
12483
                    // Current octet is neither in the US-ASCII range nor a legal first
12484
                    // octet of a multi-octet sequence.
12485 102
                    return false;
12486
                }
12487 83
            } elseif ((0xC0 & $in) === 0x80) {
12488
12489
                // When mState is non-zero, we expect a continuation of the multi-octet
12490
                // sequence
12491
12492
                // Legal continuation.
12493 75
                $shift = ($mState - 1) * 6;
12494 75
                $tmp = $in;
12495 75
                $tmp = ($tmp & 0x0000003F) << $shift;
12496 75
                $mUcs4 |= $tmp;
12497
                // Prefix: End of the multi-octet sequence. mUcs4 now contains the final
12498
                // Unicode code point to be output.
12499 75
                if (--$mState === 0) {
12500
                    // Check for illegal sequences and code points.
12501
                    //
12502
                    // From Unicode 3.1, non-shortest form is illegal
12503
                    if (
12504 75
                        ($mBytes === 2 && $mUcs4 < 0x0080)
12505
                        ||
12506 75
                        ($mBytes === 3 && $mUcs4 < 0x0800)
12507
                        ||
12508 75
                        ($mBytes === 4 && $mUcs4 < 0x10000)
12509
                        ||
12510 75
                        ($mBytes > 4)
12511
                        ||
12512
                        // From Unicode 3.2, surrogate characters are illegal.
12513 75
                        (($mUcs4 & 0xFFFFF800) === 0xD800)
12514
                        ||
12515
                        // Code points outside the Unicode range are illegal.
12516 75
                        ($mUcs4 > 0x10FFFF)
12517
                    ) {
12518 9
                        return false;
12519
                    }
12520
                    // initialize UTF8 cache
12521 75
                    $mState = 0;
12522 75
                    $mUcs4 = 0;
12523 75
                    $mBytes = 1;
12524
                }
12525
            } else {
12526
                // ((0xC0 & (*in) != 0x80) && (mState != 0))
12527
                // Incomplete multi-octet sequence.
12528 35
                return false;
12529
            }
12530
        }
12531
12532 67
        return true;
12533
    }
12534
12535
    /**
12536
     * @param string $str
12537
     * @param bool   $use_lowercase      <p>Use uppercase by default, otherwise use lowercase.</p>
12538
     * @param bool   $use_full_case_fold <p>Convert not only common cases.</p>
12539
     *
12540
     * @return string
12541
     *
12542
     * @noinspection ReturnTypeCanBeDeclaredInspection
12543
     */
12544
    private static function fixStrCaseHelper(
12545
        string $str,
12546
        $use_lowercase = false,
12547
        $use_full_case_fold = false
12548
    ) {
12549 33
        $upper = self::$COMMON_CASE_FOLD['upper'];
12550 33
        $lower = self::$COMMON_CASE_FOLD['lower'];
12551
12552 33
        if ($use_lowercase === true) {
12553 2
            $str = \str_replace(
12554 2
                $upper,
12555 2
                $lower,
12556 2
                $str
12557
            );
12558
        } else {
12559 31
            $str = \str_replace(
12560 31
                $lower,
12561 31
                $upper,
12562 31
                $str
12563
            );
12564
        }
12565
12566 33
        if ($use_full_case_fold) {
12567 31
            static $FULL_CASE_FOLD = null;
12568 31
            if ($FULL_CASE_FOLD === null) {
12569 1
                $FULL_CASE_FOLD = self::getData('caseFolding_full');
12570
            }
12571
12572 31
            if ($use_lowercase === true) {
12573 2
                $str = \str_replace($FULL_CASE_FOLD[0], $FULL_CASE_FOLD[1], $str);
12574
            } else {
12575 29
                $str = \str_replace($FULL_CASE_FOLD[1], $FULL_CASE_FOLD[0], $str);
12576
            }
12577
        }
12578
12579 33
        return $str;
12580
    }
12581
12582
    /**
12583
     * get data from "/data/*.php"
12584
     *
12585
     * @param string $file
12586
     *
12587
     * @return array
12588
     *
12589
     * @noinspection ReturnTypeCanBeDeclaredInspection
12590
     */
12591
    private static function getData(string $file)
12592
    {
12593
        /** @noinspection PhpIncludeInspection */
12594
        /** @noinspection UsingInclusionReturnValueInspection */
12595
        /** @psalm-suppress UnresolvableInclude */
12596 6
        return include __DIR__ . '/data/' . $file . '.php';
12597
    }
12598
12599
    /**
12600
     * @return true|null
12601
     */
12602
    private static function initEmojiData()
12603
    {
12604 12
        if (self::$EMOJI_KEYS_CACHE === null) {
12605 1
            if (self::$EMOJI === null) {
12606 1
                self::$EMOJI = self::getData('emoji');
12607
            }
12608
12609 1
            \uksort(
12610 1
                self::$EMOJI,
12611
                static function (string $a, string $b): int {
12612 1
                    return \strlen($b) <=> \strlen($a);
12613 1
                }
12614
            );
12615
12616 1
            self::$EMOJI_KEYS_CACHE = \array_keys(self::$EMOJI);
12617 1
            self::$EMOJI_VALUES_CACHE = \array_values(self::$EMOJI);
12618
12619 1
            foreach (self::$EMOJI_KEYS_CACHE as $key) {
12620 1
                $tmp_key = \crc32($key);
12621 1
                self::$EMOJI_KEYS_REVERSIBLE_CACHE[] = '_-_PORTABLE_UTF8_-_' . $tmp_key . '_-_' . \strrev((string) $tmp_key) . '_-_8FTU_ELBATROP_-_';
12622
            }
12623
12624 1
            return true;
12625
        }
12626
12627 12
        return null;
12628
    }
12629
12630
    /**
12631
     * Checks whether mbstring "overloaded" is active on the server.
12632
     *
12633
     * @return bool
12634
     *
12635
     * @noinspection ReturnTypeCanBeDeclaredInspection
12636
     */
12637
    private static function mbstring_overloaded()
12638
    {
12639
        /**
12640
         * INI directive 'mbstring.func_overload' is deprecated since PHP 7.2
12641
         */
12642
12643
        /** @noinspection PhpComposerExtensionStubsInspection */
12644
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
12645
        return \defined('MB_OVERLOAD_STRING')
12646
               &&
12647
               ((int) @\ini_get('mbstring.func_overload') & \MB_OVERLOAD_STRING);
12648
    }
12649
12650
    /**
12651
     * @param array    $strings
12652
     * @param bool     $remove_empty_values
12653
     * @param int|null $remove_short_values
12654
     *
12655
     * @return array
12656
     *
12657
     * @noinspection ReturnTypeCanBeDeclaredInspection
12658
     */
12659
    private static function reduce_string_array(
12660
        array $strings,
12661
        bool $remove_empty_values,
12662
        int $remove_short_values = null
12663
    ) {
12664
        // init
12665 2
        $return = [];
12666
12667 2
        foreach ($strings as &$str) {
12668
            if (
12669 2
                $remove_short_values !== null
12670
                &&
12671 2
                \mb_strlen($str) <= $remove_short_values
12672
            ) {
12673 2
                continue;
12674
            }
12675
12676
            if (
12677 2
                $remove_empty_values === true
12678
                &&
12679 2
                \trim($str) === ''
12680
            ) {
12681 2
                continue;
12682
            }
12683
12684 2
            $return[] = $str;
12685
        }
12686
12687 2
        return $return;
12688
    }
12689
12690
    /**
12691
     * rxClass
12692
     *
12693
     * @param string $s
12694
     * @param string $class
12695
     *
12696
     * @return string
12697
     *
12698
     * @noinspection ReturnTypeCanBeDeclaredInspection
12699
     */
12700
    private static function rxClass(string $s, string $class = '')
12701
    {
12702 33
        static $RX_CLASS_CACHE = [];
12703
12704 33
        $cache_key = $s . $class;
12705
12706 33
        if (isset($RX_CLASS_CACHE[$cache_key])) {
12707 21
            return $RX_CLASS_CACHE[$cache_key];
12708
        }
12709
12710 16
        $class_array = [$class];
12711
12712
        /** @noinspection SuspiciousLoopInspection */
12713
        /** @noinspection AlterInForeachInspection */
12714 16
        foreach (self::str_split($s) as &$s) {
0 ignored issues
show
Bug introduced by
The expression self::str_split($s) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
12715 15
            if ($s === '-') {
12716
                $class_array[0] = '-' . $class_array[0];
12717 15
            } elseif (!isset($s[2])) {
12718 15
                $class_array[0] .= \preg_quote($s, '/');
12719 1
            } elseif (self::strlen($s) === 1) {
12720 1
                $class_array[0] .= $s;
12721
            } else {
12722 15
                $class_array[] = $s;
12723
            }
12724
        }
12725
12726 16
        if ($class_array[0]) {
12727 16
            $class_array[0] = '[' . $class_array[0] . ']';
12728
        }
12729
12730 16
        if (\count($class_array) === 1) {
12731 16
            $return = $class_array[0];
12732
        } else {
12733
            $return = '(?:' . \implode('|', $class_array) . ')';
12734
        }
12735
12736 16
        $RX_CLASS_CACHE[$cache_key] = $return;
12737
12738 16
        return $return;
12739
    }
12740
12741
    /**
12742
     * Personal names such as "Marcus Aurelius" are sometimes typed incorrectly using lowercase ("marcus aurelius").
12743
     *
12744
     * @param string $names
12745
     * @param string $delimiter
12746
     * @param string $encoding
12747
     *
12748
     * @return string
12749
     *
12750
     * @noinspection ReturnTypeCanBeDeclaredInspection
12751
     */
12752
    private static function str_capitalize_name_helper(
12753
        string $names,
12754
        string $delimiter,
12755
        string $encoding = 'UTF-8'
12756
    ) {
12757
        // init
12758 1
        $name_helper_array = \explode($delimiter, $names);
12759 1
        if ($name_helper_array === false) {
12760
            return '';
12761
        }
12762
12763
        $special_cases = [
12764 1
            'names' => [
12765
                'ab',
12766
                'af',
12767
                'al',
12768
                'and',
12769
                'ap',
12770
                'bint',
12771
                'binte',
12772
                'da',
12773
                'de',
12774
                'del',
12775
                'den',
12776
                'der',
12777
                'di',
12778
                'dit',
12779
                'ibn',
12780
                'la',
12781
                'mac',
12782
                'nic',
12783
                'of',
12784
                'ter',
12785
                'the',
12786
                'und',
12787
                'van',
12788
                'von',
12789
                'y',
12790
                'zu',
12791
            ],
12792
            'prefixes' => [
12793
                'al-',
12794
                "d'",
12795
                'ff',
12796
                "l'",
12797
                'mac',
12798
                'mc',
12799
                'nic',
12800
            ],
12801
        ];
12802
12803 1
        foreach ($name_helper_array as &$name) {
12804 1
            if (\in_array($name, $special_cases['names'], true)) {
12805 1
                continue;
12806
            }
12807
12808 1
            $continue = false;
12809
12810 1
            if ($delimiter === '-') {
12811
                /** @noinspection AlterInForeachInspection */
12812 1
                foreach ((array) $special_cases['names'] as &$beginning) {
12813 1
                    if (self::strpos($name, $beginning, 0, $encoding) === 0) {
12814 1
                        $continue = true;
12815
                    }
12816
                }
12817
            }
12818
12819
            /** @noinspection AlterInForeachInspection */
12820 1
            foreach ((array) $special_cases['prefixes'] as &$beginning) {
12821 1
                if (self::strpos($name, $beginning, 0, $encoding) === 0) {
12822 1
                    $continue = true;
12823
                }
12824
            }
12825
12826 1
            if ($continue === true) {
12827 1
                continue;
12828
            }
12829
12830 1
            $name = self::ucfirst($name);
12831
        }
12832
12833 1
        return \implode($delimiter, $name_helper_array);
12834
    }
12835
12836
    /**
12837
     * Generic case-sensitive transformation for collation matching.
12838
     *
12839
     * @param string $str <p>The input string</p>
12840
     *
12841
     * @return string|null
12842
     */
12843
    private static function strtonatfold(string $str)
12844
    {
12845
        /** @noinspection PhpUndefinedClassInspection */
12846 6
        return \preg_replace(
12847 6
            '/\p{Mn}+/u',
12848 6
            '',
12849 6
            \Normalizer::normalize($str, \Normalizer::NFD)
12850
        );
12851
    }
12852
12853
    /**
12854
     * @param int|string $input
12855
     *
12856
     * @return string
12857
     *
12858
     * @noinspection ReturnTypeCanBeDeclaredInspection
12859
     */
12860
    private static function to_utf8_convert_helper($input)
12861
    {
12862
        // init
12863 31
        $buf = '';
12864
12865 31
        if (self::$ORD === null) {
12866 1
            self::$ORD = self::getData('ord');
12867
        }
12868
12869 31
        if (self::$CHR === null) {
12870 1
            self::$CHR = self::getData('chr');
12871
        }
12872
12873 31
        if (self::$WIN1252_TO_UTF8 === null) {
12874 1
            self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
12875
        }
12876
12877 31
        $ordC1 = self::$ORD[$input];
12878 31
        if (isset(self::$WIN1252_TO_UTF8[$ordC1])) { // found in Windows-1252 special cases
12879 31
            $buf .= self::$WIN1252_TO_UTF8[$ordC1];
12880
        } else {
12881
            /** @noinspection OffsetOperationsInspection */
12882 1
            $cc1 = self::$CHR[$ordC1 / 64] | "\xC0";
12883 1
            $cc2 = ((string) $input & "\x3F") | "\x80";
0 ignored issues
show
Bug introduced by
Are you sure you want to use the bitwise | or did you mean ||?
Loading history...
Bug introduced by
Are you sure you want to use the bitwise & or did you mean &&?
Loading history...
12884 1
            $buf .= $cc1 . $cc2;
12885
        }
12886
12887 31
        return $buf;
12888
    }
12889
12890
    /**
12891
     * @param string $str
12892
     *
12893
     * @return string
12894
     *
12895
     * @noinspection ReturnTypeCanBeDeclaredInspection
12896
     */
12897
    private static function urldecode_unicode_helper(string $str)
12898
    {
12899 10
        $pattern = '/%u([0-9a-fA-F]{3,4})/';
12900 10
        if (\preg_match($pattern, $str)) {
12901 7
            $str = (string) \preg_replace($pattern, '&#x\\1;', $str);
12902
        }
12903
12904 10
        return $str;
12905
    }
12906
}
12907