Passed
Push — master ( c83347...4d3fa0 )
by Lars
08:00 queued 04:46
created

UTF8::str_iends()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 3
Code Lines 1

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 2

Importance

Changes 0
Metric Value
cc 1
eloc 1
nc 1
nop 2
dl 0
loc 3
ccs 0
cts 1
cp 0
crap 2
rs 10
c 0
b 0
f 0
1
<?php
2
3
declare(strict_types=1);
4
5
namespace voku\helper;
6
7
final class UTF8
8
{
9
    // (CRLF|([ZWNJ-ZWJ]|T+|L*(LV?V+|LV|LVT)T*|L+|[^Control])[Extend]*|[Control])
10
    // This regular expression is a work around for http://bugs.exim.org/1279
11
    const GRAPHEME_CLUSTER_RX = "(?:\r\n|(?:[ -~\x{200C}\x{200D}]|[ᆨ-ᇹ]+|[ᄀ-ᅟ]*(?:[가개갸걔거게겨계고과괘괴교구궈궤귀규그긔기까깨꺄꺠꺼께껴꼐꼬꽈꽤꾀꾜꾸꿔꿰뀌뀨끄끠끼나내냐냬너네녀녜노놔놰뇌뇨누눠눼뉘뉴느늬니다대댜댸더데뎌뎨도돠돼되됴두둬뒈뒤듀드듸디따때땨떄떠떼뗘뗴또똬뙈뙤뚀뚜뚸뛔뛰뜌뜨띄띠라래랴럐러레려례로롸뢔뢰료루뤄뤠뤼류르릐리마매먀먜머메며몌모뫄뫠뫼묘무뭐뭬뮈뮤므믜미바배뱌뱨버베벼볘보봐봬뵈뵤부붜붸뷔뷰브븨비빠빼뺘뺴뻐뻬뼈뼤뽀뽜뽸뾔뾰뿌뿨쀄쀠쀼쁘쁴삐사새샤섀서세셔셰소솨쇄쇠쇼수숴쉐쉬슈스싀시싸쌔쌰썌써쎄쎠쎼쏘쏴쐐쐬쑈쑤쒀쒜쒸쓔쓰씌씨아애야얘어에여예오와왜외요우워웨위유으의이자재쟈쟤저제져졔조좌좨죄죠주줘줴쥐쥬즈즤지짜째쨔쨰쩌쩨쪄쪠쪼쫘쫴쬐쬬쭈쭤쮀쮜쮸쯔쯰찌차채챠챼처체쳐쳬초촤쵀최쵸추춰췌취츄츠츼치카캐캬컈커케켜켸코콰쾌쾨쿄쿠쿼퀘퀴큐크킈키타태탸턔터테텨톄토톼퇘퇴툐투퉈퉤튀튜트틔티파패퍄퍠퍼페펴폐포퐈퐤푀표푸풔풰퓌퓨프픠피하해햐햬허헤혀혜호화홰회효후훠훼휘휴흐희히]?[ᅠ-ᆢ]+|[가-힣])[ᆨ-ᇹ]*|[ᄀ-ᅟ]+|[^\p{Cc}\p{Cf}\p{Zl}\p{Zp}])[\p{Mn}\p{Me}\x{09BE}\x{09D7}\x{0B3E}\x{0B57}\x{0BBE}\x{0BD7}\x{0CC2}\x{0CD5}\x{0CD6}\x{0D3E}\x{0D57}\x{0DCF}\x{0DDF}\x{200C}\x{200D}\x{1D165}\x{1D16E}-\x{1D172}]*|[\p{Cc}\p{Cf}\p{Zl}\p{Zp}])";
12
13
    /**
14
     * Bom => Byte-Length
15
     *
16
     * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
17
     *
18
     * @var array
19
     */
20
    private static $BOM = [
21
        "\xef\xbb\xbf"     => 3, // UTF-8 BOM
22
        ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
23
        "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
24
        '  þÿ'             => 6, // UTF-32 (BE) BOM as "WINDOWS-1252"
25
        "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
26
        'ÿþ  '             => 6, // UTF-32 (LE) BOM as "WINDOWS-1252"
27
        "\xfe\xff"         => 2, // UTF-16 (BE) BOM
28
        'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
29
        "\xff\xfe"         => 2, // UTF-16 (LE) BOM
30
        'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
31
    ];
32
33
    /**
34
     * Numeric code point => UTF-8 Character
35
     *
36
     * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
37
     *
38
     * @var array
39
     */
40
    private static $WHITESPACE = [
41
        // NUL Byte
42
        0 => "\x0",
43
        // Tab
44
        9 => "\x9",
45
        // New Line
46
        10 => "\xa",
47
        // Vertical Tab
48
        11 => "\xb",
49
        // Carriage Return
50
        13 => "\xd",
51
        // Ordinary Space
52
        32 => "\x20",
53
        // NO-BREAK SPACE
54
        160 => "\xc2\xa0",
55
        // OGHAM SPACE MARK
56
        5760 => "\xe1\x9a\x80",
57
        // MONGOLIAN VOWEL SEPARATOR
58
        6158 => "\xe1\xa0\x8e",
59
        // EN QUAD
60
        8192 => "\xe2\x80\x80",
61
        // EM QUAD
62
        8193 => "\xe2\x80\x81",
63
        // EN SPACE
64
        8194 => "\xe2\x80\x82",
65
        // EM SPACE
66
        8195 => "\xe2\x80\x83",
67
        // THREE-PER-EM SPACE
68
        8196 => "\xe2\x80\x84",
69
        // FOUR-PER-EM SPACE
70
        8197 => "\xe2\x80\x85",
71
        // SIX-PER-EM SPACE
72
        8198 => "\xe2\x80\x86",
73
        // FIGURE SPACE
74
        8199 => "\xe2\x80\x87",
75
        // PUNCTUATION SPACE
76
        8200 => "\xe2\x80\x88",
77
        // THIN SPACE
78
        8201 => "\xe2\x80\x89",
79
        //HAIR SPACE
80
        8202 => "\xe2\x80\x8a",
81
        // LINE SEPARATOR
82
        8232 => "\xe2\x80\xa8",
83
        // PARAGRAPH SEPARATOR
84
        8233 => "\xe2\x80\xa9",
85
        // NARROW NO-BREAK SPACE
86
        8239 => "\xe2\x80\xaf",
87
        // MEDIUM MATHEMATICAL SPACE
88
        8287 => "\xe2\x81\x9f",
89
        // IDEOGRAPHIC SPACE
90
        12288 => "\xe3\x80\x80",
91
    ];
92
93
    /**
94
     * @var array
95
     */
96
    private static $WHITESPACE_TABLE = [
97
        'SPACE'                     => "\x20",
98
        'NO-BREAK SPACE'            => "\xc2\xa0",
99
        'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
100
        'EN QUAD'                   => "\xe2\x80\x80",
101
        'EM QUAD'                   => "\xe2\x80\x81",
102
        'EN SPACE'                  => "\xe2\x80\x82",
103
        'EM SPACE'                  => "\xe2\x80\x83",
104
        'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
105
        'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
106
        'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
107
        'FIGURE SPACE'              => "\xe2\x80\x87",
108
        'PUNCTUATION SPACE'         => "\xe2\x80\x88",
109
        'THIN SPACE'                => "\xe2\x80\x89",
110
        'HAIR SPACE'                => "\xe2\x80\x8a",
111
        'LINE SEPARATOR'            => "\xe2\x80\xa8",
112
        'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
113
        'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
114
        'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
115
        'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
116
        'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
117
    ];
118
119
    /**
120
     * bidirectional text chars
121
     *
122
     * url: https://www.w3.org/International/questions/qa-bidi-unicode-controls
123
     *
124
     * @var array
125
     */
126
    private static $BIDI_UNI_CODE_CONTROLS_TABLE = [
127
        // LEFT-TO-RIGHT EMBEDDING (use -> dir = "ltr")
128
        8234 => "\xE2\x80\xAA",
129
        // RIGHT-TO-LEFT EMBEDDING (use -> dir = "rtl")
130
        8235 => "\xE2\x80\xAB",
131
        // POP DIRECTIONAL FORMATTING // (use -> </bdo>)
132
        8236 => "\xE2\x80\xAC",
133
        // LEFT-TO-RIGHT OVERRIDE // (use -> <bdo dir = "ltr">)
134
        8237 => "\xE2\x80\xAD",
135
        // RIGHT-TO-LEFT OVERRIDE // (use -> <bdo dir = "rtl">)
136
        8238 => "\xE2\x80\xAE",
137
        // LEFT-TO-RIGHT ISOLATE // (use -> dir = "ltr")
138
        8294 => "\xE2\x81\xA6",
139
        // RIGHT-TO-LEFT ISOLATE // (use -> dir = "rtl")
140
        8295 => "\xE2\x81\xA7",
141
        // FIRST STRONG ISOLATE // (use -> dir = "auto")
142
        8296 => "\xE2\x81\xA8",
143
        // POP DIRECTIONAL ISOLATE
144
        8297 => "\xE2\x81\xA9",
145
    ];
146
147
    /**
148
     * @var array
149
     */
150
    private static $COMMON_CASE_FOLD = [
151
        'upper' => [
152
            'µ',
153
            'ſ',
154
            "\xCD\x85",
155
            'ς',
156
            'ẞ',
157
            "\xCF\x90",
158
            "\xCF\x91",
159
            "\xCF\x95",
160
            "\xCF\x96",
161
            "\xCF\xB0",
162
            "\xCF\xB1",
163
            "\xCF\xB5",
164
            "\xE1\xBA\x9B",
165
            "\xE1\xBE\xBE",
166
        ],
167
        'lower' => [
168
            'μ',
169
            's',
170
            'ι',
171
            'σ',
172
            'ß',
173
            'β',
174
            'θ',
175
            'φ',
176
            'π',
177
            'κ',
178
            'ρ',
179
            'ε',
180
            "\xE1\xB9\xA1",
181
            'ι',
182
        ],
183
    ];
184
185
    /**
186
     * @var array
187
     */
188
    private static $SUPPORT = [];
189
190
    /**
191
     * @var array|null
192
     */
193
    private static $BROKEN_UTF8_FIX;
194
195
    /**
196
     * @var array|null
197
     */
198
    private static $WIN1252_TO_UTF8;
199
200
    /**
201
     * @var array|null
202
     */
203
    private static $INTL_TRANSLITERATOR_LIST;
204
205
    /**
206
     * @var array|null
207
     */
208
    private static $ENCODINGS;
209
210
    /**
211
     * @var array|null
212
     */
213
    private static $ORD;
214
215
    /**
216
     * @var array|null
217
     */
218
    private static $EMOJI;
219
220
    /**
221
     * @var array|null
222
     */
223
    private static $EMOJI_VALUES_CACHE;
224
225
    /**
226
     * @var array|null
227
     */
228
    private static $EMOJI_KEYS_CACHE;
229
230
    /**
231
     * @var array|null
232
     */
233
    private static $EMOJI_KEYS_REVERSIBLE_CACHE;
234
235
    /**
236
     * @var array|null
237
     */
238
    private static $CHR;
239
240
    /**
241
     * __construct()
242
     */
243 32
    public function __construct()
244
    {
245 32
    }
246
247
    /**
248
     * Return the character at the specified position: $str[1] like functionality.
249
     *
250
     * @param string $str      <p>A UTF-8 string.</p>
251
     * @param int    $pos      <p>The position of character to return.</p>
252
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
253
     *
254
     * @return string single multi-byte character
255
     */
256 3
    public static function access(string $str, int $pos, string $encoding = 'UTF-8'): string
257
    {
258 3
        if ($str === '' || $pos < 0) {
259 2
            return '';
260
        }
261
262 3
        if ($encoding === 'UTF-8') {
263 3
            return (string) \mb_substr($str, $pos, 1);
264
        }
265
266
        return (string) self::substr($str, $pos, 1, $encoding);
267
    }
268
269
    /**
270
     * Prepends UTF-8 BOM character to the string and returns the whole string.
271
     *
272
     * INFO: If BOM already existed there, the Input string is returned.
273
     *
274
     * @param string $str <p>The input string.</p>
275
     *
276
     * @return string the output string that contains BOM
277
     */
278 2
    public static function add_bom_to_string(string $str): string
279
    {
280 2
        if (self::string_has_bom($str) === false) {
281 2
            $str = self::bom() . $str;
282
        }
283
284 2
        return $str;
285
    }
286
287
    /**
288
     * Changes all keys in an array.
289
     *
290
     * @param array  $array    <p>The array to work on</p>
291
     * @param int    $case     [optional] <p> Either <strong>CASE_UPPER</strong><br>
292
     *                         or <strong>CASE_LOWER</strong> (default)</p>
293
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
294
     *
295
     * @return string[] an array with its keys lower or uppercased
296
     */
297 2
    public static function array_change_key_case(array $array, int $case = \CASE_LOWER, string $encoding = 'UTF-8'): array
298
    {
299
        if (
300 2
            $case !== \CASE_LOWER
301
            &&
302 2
            $case !== \CASE_UPPER
303
        ) {
304
            $case = \CASE_LOWER;
305
        }
306
307 2
        $return = [];
308 2
        foreach ($array as $key => &$value) {
309 2
            $key = $case === \CASE_LOWER
310 2
                ? self::strtolower((string) $key, $encoding)
311 2
                : self::strtoupper((string) $key, $encoding);
312
313 2
            $return[$key] = $value;
314
        }
315
316 2
        return $return;
317
    }
318
319
    /**
320
     * Returns the substring between $start and $end, if found, or an empty
321
     * string. An optional offset may be supplied from which to begin the
322
     * search for the start string.
323
     *
324
     * @param string $str
325
     * @param string $start    <p>Delimiter marking the start of the substring.</p>
326
     * @param string $end      <p>Delimiter marking the end of the substring.</p>
327
     * @param int    $offset   [optional] <p>Index from which to begin the search. Default: 0</p>
328
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
329
     *
330
     * @return string
331
     */
332 16
    public static function between(
333
        string $str,
334
        string $start,
335
        string $end,
336
        int $offset = 0,
337
        string $encoding = 'UTF-8'
338
    ): string {
339 16
        if ($encoding === 'UTF-8') {
340 8
            $posStart = \mb_strpos($str, $start, $offset);
341 8
            if ($posStart === false) {
342 1
                return '';
343
            }
344
345 7
            $substrIndex = $posStart + (int) \mb_strlen($start);
346 7
            $posEnd = \mb_strpos($str, $end, $substrIndex);
347
            if (
348 7
                $posEnd === false
349
                ||
350 7
                $posEnd === $substrIndex
351
            ) {
352 2
                return '';
353
            }
354
355 5
            return (string) \mb_substr($str, $substrIndex, $posEnd - $substrIndex);
356
        }
357
358 8
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
359
360 8
        $posStart = self::strpos($str, $start, $offset, $encoding);
361 8
        if ($posStart === false) {
362 1
            return '';
363
        }
364
365 7
        $substrIndex = $posStart + (int) self::strlen($start, $encoding);
366 7
        $posEnd = self::strpos($str, $end, $substrIndex, $encoding);
367
        if (
368 7
            $posEnd === false
369
            ||
370 7
            $posEnd === $substrIndex
371
        ) {
372 2
            return '';
373
        }
374
375 5
        return (string) self::substr(
376 5
            $str,
377 5
            $substrIndex,
378 5
            $posEnd - $substrIndex,
379 5
            $encoding
380
        );
381
    }
382
383
    /**
384
     * Convert binary into an string.
385
     *
386
     * @param mixed $bin 1|0
387
     *
388
     * @return string
389
     */
390 2
    public static function binary_to_str($bin): string
391
    {
392 2
        if (!isset($bin[0])) {
393
            return '';
394
        }
395
396 2
        $convert = \base_convert($bin, 2, 16);
397 2
        if ($convert === '0') {
398 1
            return '';
399
        }
400
401 2
        return \pack('H*', $convert);
402
    }
403
404
    /**
405
     * Returns the UTF-8 Byte Order Mark Character.
406
     *
407
     * INFO: take a look at UTF8::$bom for e.g. UTF-16 and UTF-32 BOM values
408
     *
409
     * @return string UTF-8 Byte Order Mark
410
     */
411 4
    public static function bom(): string
412
    {
413 4
        return "\xef\xbb\xbf";
414
    }
415
416
    /**
417
     * @alias of UTF8::chr_map()
418
     *
419
     * @param array|string $callback
420
     * @param string       $str
421
     *
422
     * @return string[]
423
     *
424
     * @see UTF8::chr_map()
425
     */
426 2
    public static function callback($callback, string $str): array
427
    {
428 2
        return self::chr_map($callback, $str);
429
    }
430
431
    /**
432
     * Returns the character at $index, with indexes starting at 0.
433
     *
434
     * @param string $str      <p>The input string.</p>
435
     * @param int    $index    <p>Position of the character.</p>
436
     * @param string $encoding [optional] <p>Default is UTF-8</p>
437
     *
438
     * @return string the character at $index
439
     */
440 9
    public static function char_at(string $str, int $index, string $encoding = 'UTF-8'): string
441
    {
442 9
        if ($encoding === 'UTF-8') {
443 5
            return (string) \mb_substr($str, $index, 1);
444
        }
445
446 4
        return (string) self::substr($str, $index, 1, $encoding);
447
    }
448
449
    /**
450
     * Returns an array consisting of the characters in the string.
451
     *
452
     * @param string $str <p>The input string.</p>
453
     *
454
     * @return string[] an array of chars
455
     */
456 3
    public static function chars(string $str): array
457
    {
458 3
        return self::str_split($str);
459
    }
460
461
    /**
462
     * This method will auto-detect your server environment for UTF-8 support.
463
     *
464
     * @return true|null
465
     *
466
     * @internal <p>You don't need to run it manually, it will be triggered if it's needed.</p>
467
     */
468 5
    public static function checkForSupport()
469
    {
470 5
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
471
            self::$SUPPORT['already_checked_via_portable_utf8'] = true;
472
473
            // http://php.net/manual/en/book.mbstring.php
474
            self::$SUPPORT['mbstring'] = self::mbstring_loaded();
475
            self::$SUPPORT['mbstring_func_overload'] = self::mbstring_overloaded();
476
            if (self::$SUPPORT['mbstring'] === true) {
477
                \mb_internal_encoding('UTF-8');
478
                /** @noinspection UnusedFunctionResultInspection */
479
                /** @noinspection PhpComposerExtensionStubsInspection */
480
                \mb_regex_encoding('UTF-8');
481
                self::$SUPPORT['mbstring_internal_encoding'] = 'UTF-8';
482
            }
483
484
            // http://php.net/manual/en/book.iconv.php
485
            self::$SUPPORT['iconv'] = self::iconv_loaded();
486
487
            // http://php.net/manual/en/book.intl.php
488
            self::$SUPPORT['intl'] = self::intl_loaded();
489
490
            // http://php.net/manual/en/class.intlchar.php
491
            self::$SUPPORT['intlChar'] = self::intlChar_loaded();
492
493
            // http://php.net/manual/en/book.ctype.php
494
            self::$SUPPORT['ctype'] = self::ctype_loaded();
495
496
            // http://php.net/manual/en/class.finfo.php
497
            self::$SUPPORT['finfo'] = self::finfo_loaded();
498
499
            // http://php.net/manual/en/book.json.php
500
            self::$SUPPORT['json'] = self::json_loaded();
501
502
            // http://php.net/manual/en/book.pcre.php
503
            self::$SUPPORT['pcre_utf8'] = self::pcre_utf8_support();
504
505
            self::$SUPPORT['symfony_polyfill_used'] = self::symfony_polyfill_used();
506
            if (self::$SUPPORT['symfony_polyfill_used'] === true) {
507
                \mb_internal_encoding('UTF-8');
508
                self::$SUPPORT['mbstring_internal_encoding'] = 'UTF-8';
509
            }
510
511
            return true;
512
        }
513
514 5
        return null;
515
    }
516
517
    /**
518
     * Generates a UTF-8 encoded character from the given code point.
519
     *
520
     * INFO: opposite to UTF8::ord()
521
     *
522
     * @param int|string $code_point <p>The code point for which to generate a character.</p>
523
     * @param string     $encoding   [optional] <p>Default is UTF-8</p>
524
     *
525
     * @return string|null multi-byte character, returns null on failure or empty input
526
     */
527 25
    public static function chr($code_point, string $encoding = 'UTF-8')
528
    {
529
        // init
530 25
        static $CHAR_CACHE = [];
531
532 25
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
533 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
534
        }
535
536
        if (
537 25
            $encoding !== 'UTF-8'
538
            &&
539 25
            $encoding !== 'ISO-8859-1'
540
            &&
541 25
            $encoding !== 'WINDOWS-1252'
542
            &&
543 25
            self::$SUPPORT['mbstring'] === false
544
        ) {
545
            \trigger_error('UTF8::chr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
546
        }
547
548 25
        $cacheKey = $code_point . $encoding;
549 25
        if (isset($CHAR_CACHE[$cacheKey]) === true) {
550 23
            return $CHAR_CACHE[$cacheKey];
551
        }
552
553 13
        if ($code_point <= 127) { // use "simple"-char only until "\x80"
554
555 12
            if (self::$CHR === null) {
556
                self::$CHR = (array) self::getData('chr');
557
            }
558
559
            /**
560
             * @psalm-suppress PossiblyNullArrayAccess
561
             */
562 12
            $chr = self::$CHR[$code_point];
563
564 12
            if ($encoding !== 'UTF-8') {
565 1
                $chr = self::encode($encoding, $chr);
566
            }
567
568 12
            return $CHAR_CACHE[$cacheKey] = $chr;
569
        }
570
571
        //
572
        // fallback via "IntlChar"
573
        //
574
575 7
        if (self::$SUPPORT['intlChar'] === true) {
576
            /** @noinspection PhpComposerExtensionStubsInspection */
577 7
            $chr = \IntlChar::chr($code_point);
578
579 7
            if ($encoding !== 'UTF-8') {
580
                $chr = self::encode($encoding, $chr);
581
            }
582
583 7
            return $CHAR_CACHE[$cacheKey] = $chr;
584
        }
585
586
        //
587
        // fallback via vanilla php
588
        //
589
590
        if (self::$CHR === null) {
591
            self::$CHR = (array) self::getData('chr');
592
        }
593
594
        $code_point = (int) $code_point;
595
        if ($code_point <= 0x7F) {
596
            /**
597
             * @psalm-suppress PossiblyNullArrayAccess
598
             */
599
            $chr = self::$CHR[$code_point];
600
        } elseif ($code_point <= 0x7FF) {
601
            /**
602
             * @psalm-suppress PossiblyNullArrayAccess
603
             */
604
            $chr = self::$CHR[($code_point >> 6) + 0xC0] .
605
                   self::$CHR[($code_point & 0x3F) + 0x80];
606
        } elseif ($code_point <= 0xFFFF) {
607
            /**
608
             * @psalm-suppress PossiblyNullArrayAccess
609
             */
610
            $chr = self::$CHR[($code_point >> 12) + 0xE0] .
611
                   self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
612
                   self::$CHR[($code_point & 0x3F) + 0x80];
613
        } else {
614
            /**
615
             * @psalm-suppress PossiblyNullArrayAccess
616
             */
617
            $chr = self::$CHR[($code_point >> 18) + 0xF0] .
618
                   self::$CHR[(($code_point >> 12) & 0x3F) + 0x80] .
619
                   self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
620
                   self::$CHR[($code_point & 0x3F) + 0x80];
621
        }
622
623
        if ($encoding !== 'UTF-8') {
624
            $chr = self::encode($encoding, $chr);
625
        }
626
627
        return $CHAR_CACHE[$cacheKey] = $chr;
628
    }
629
630
    /**
631
     * Applies callback to all characters of a string.
632
     *
633
     * @param array|string $callback <p>The callback function.</p>
634
     * @param string       $str      <p>UTF-8 string to run callback on.</p>
635
     *
636
     * @return string[] the outcome of callback
637
     */
638 2
    public static function chr_map($callback, string $str): array
639
    {
640 2
        return \array_map(
641 2
            $callback,
642 2
            self::str_split($str)
643
        );
644
    }
645
646
    /**
647
     * Generates an array of byte length of each character of a Unicode string.
648
     *
649
     * 1 byte => U+0000  - U+007F
650
     * 2 byte => U+0080  - U+07FF
651
     * 3 byte => U+0800  - U+FFFF
652
     * 4 byte => U+10000 - U+10FFFF
653
     *
654
     * @param string $str <p>The original unicode string.</p>
655
     *
656
     * @return int[] an array of byte lengths of each character
657
     */
658 4
    public static function chr_size_list(string $str): array
659
    {
660 4
        if ($str === '') {
661 4
            return [];
662
        }
663
664 4
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
665
            return \array_map(
666
                static function (string $data): int {
667
                    // "mb_" is available if overload is used, so use it ...
668
                    return \mb_strlen($data, 'CP850'); // 8-BIT
669
                },
670
                self::str_split($str)
671
            );
672
        }
673
674 4
        return \array_map('\strlen', self::str_split($str));
675
    }
676
677
    /**
678
     * Get a decimal code representation of a specific character.
679
     *
680
     * @param string $char <p>The input character.</p>
681
     *
682
     * @return int
683
     */
684 4
    public static function chr_to_decimal(string $char): int
685
    {
686 4
        $code = self::ord($char[0]);
687 4
        $bytes = 1;
688
689 4
        if (!($code & 0x80)) {
690
            // 0xxxxxxx
691 4
            return $code;
692
        }
693
694 4
        if (($code & 0xe0) === 0xc0) {
695
            // 110xxxxx
696 4
            $bytes = 2;
697 4
            $code &= ~0xc0;
698 4
        } elseif (($code & 0xf0) === 0xe0) {
699
            // 1110xxxx
700 4
            $bytes = 3;
701 4
            $code &= ~0xe0;
702 2
        } elseif (($code & 0xf8) === 0xf0) {
703
            // 11110xxx
704 2
            $bytes = 4;
705 2
            $code &= ~0xf0;
706
        }
707
708 4
        for ($i = 2; $i <= $bytes; ++$i) {
709
            // 10xxxxxx
710 4
            $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
711
        }
712
713 4
        return $code;
714
    }
715
716
    /**
717
     * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
718
     *
719
     * @param int|string $char <p>The input character</p>
720
     * @param string     $pfix [optional]
721
     *
722
     * @return string The code point encoded as U+xxxx
723
     */
724 2
    public static function chr_to_hex($char, string $pfix = 'U+'): string
725
    {
726 2
        if ($char === '') {
727 2
            return '';
728
        }
729
730 2
        if ($char === '&#0;') {
731 2
            $char = '';
732
        }
733
734 2
        return self::int_to_hex(self::ord((string) $char), $pfix);
735
    }
736
737
    /**
738
     * alias for "UTF8::chr_to_decimal()"
739
     *
740
     * @param string $chr
741
     *
742
     * @return int
743
     *
744
     * @see UTF8::chr_to_decimal()
745
     */
746 2
    public static function chr_to_int(string $chr): int
747
    {
748 2
        return self::chr_to_decimal($chr);
749
    }
750
751
    /**
752
     * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
753
     *
754
     * @param string $body     <p>The original string to be split.</p>
755
     * @param int    $chunklen [optional] <p>The maximum character length of a chunk.</p>
756
     * @param string $end      [optional] <p>The character(s) to be inserted at the end of each chunk.</p>
757
     *
758
     * @return string the chunked string
759
     */
760 4
    public static function chunk_split(string $body, int $chunklen = 76, string $end = "\r\n"): string
761
    {
762 4
        return \implode($end, self::str_split($body, $chunklen));
763
    }
764
765
    /**
766
     * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
767
     *
768
     * @param string $str                           <p>The string to be sanitized.</p>
769
     * @param bool   $remove_bom                    [optional] <p>Set to true, if you need to remove UTF-BOM.</p>
770
     * @param bool   $normalize_whitespace          [optional] <p>Set to true, if you need to normalize the
771
     *                                              whitespace.</p>
772
     * @param bool   $normalize_msword              [optional] <p>Set to true, if you need to normalize MS Word chars
773
     *                                              e.g.: "…"
774
     *                                              => "..."</p>
775
     * @param bool   $keep_non_breaking_space       [optional] <p>Set to true, to keep non-breaking-spaces, in
776
     *                                              combination with
777
     *                                              $normalize_whitespace</p>
778
     * @param bool   $replace_diamond_question_mark [optional] <p>Set to true, if you need to remove diamond question
779
     *                                              mark e.g.: "�"</p>
780
     * @param bool   $remove_invisible_characters   [optional] <p>Set to false, if you not want to remove invisible
781
     *                                              characters e.g.: "\0"</p>
782
     *
783
     * @return string clean UTF-8 encoded string
784
     */
785 114
    public static function clean(
786
        string $str,
787
        bool $remove_bom = false,
788
        bool $normalize_whitespace = false,
789
        bool $normalize_msword = false,
790
        bool $keep_non_breaking_space = false,
791
        bool $replace_diamond_question_mark = false,
792
        bool $remove_invisible_characters = true
793
    ): string {
794
        // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
795
        // caused connection reset problem on larger strings
796
797 114
        $regx = '/
798
          (
799
            (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
800
            |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
801
            |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
802
            |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
803
            ){1,100}                      # ...one or more times
804
          )
805
        | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
806
        | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
807
        /x';
808 114
        $str = (string) \preg_replace($regx, '$1', $str);
809
810 114
        if ($replace_diamond_question_mark === true) {
811 60
            $str = self::replace_diamond_question_mark($str, '');
812
        }
813
814 114
        if ($remove_invisible_characters === true) {
815 114
            $str = self::remove_invisible_characters($str);
816
        }
817
818 114
        if ($normalize_whitespace === true) {
819 64
            $str = self::normalize_whitespace($str, $keep_non_breaking_space);
820
        }
821
822 114
        if ($normalize_msword === true) {
823 32
            $str = self::normalize_msword($str);
824
        }
825
826 114
        if ($remove_bom === true) {
827 64
            $str = self::remove_bom($str);
828
        }
829
830 114
        return $str;
831
    }
832
833
    /**
834
     * Clean-up a and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
835
     *
836
     * @param string $str <p>The input string.</p>
837
     *
838
     * @return string
839
     */
840 33
    public static function cleanup($str): string
841
    {
842
        // init
843 33
        $str = (string) $str;
844
845 33
        if ($str === '') {
846 5
            return '';
847
        }
848
849
        // fixed ISO <-> UTF-8 Errors
850 33
        $str = self::fix_simple_utf8($str);
851
852
        // remove all none UTF-8 symbols
853
        // && remove diamond question mark (�)
854
        // && remove remove invisible characters (e.g. "\0")
855
        // && remove BOM
856
        // && normalize whitespace chars (but keep non-breaking-spaces)
857 33
        return self::clean(
858 33
            $str,
859 33
            true,
860 33
            true,
861 33
            false,
862 33
            true,
863 33
            true,
864 33
            true
865
        );
866
    }
867
868
    /**
869
     * Accepts a string or a array of strings and returns an array of Unicode code points.
870
     *
871
     * INFO: opposite to UTF8::string()
872
     *
873
     * @param string|string[] $arg     <p>A UTF-8 encoded string or an array of such strings.</p>
874
     * @param bool            $u_style <p>If True, will return code points in U+xxxx format,
875
     *                                 default, code points will be returned as integers.</p>
876
     *
877
     * @return array<int|string>
878
     *                           The array of code points:<br>
879
     *                           array<int> for $u_style === false<br>
880
     *                           array<string> for $u_style === true<br>
881
     */
882 12
    public static function codepoints($arg, bool $u_style = false): array
883
    {
884 12
        if (\is_string($arg) === true) {
885 12
            $arg = self::str_split($arg);
886
        }
887
888 12
        $arg = \array_map(
889
            [
890 12
                self::class,
891
                'ord',
892
            ],
893 12
            $arg
894
        );
895
896 12
        if (\count($arg) === 0) {
897 7
            return [];
898
        }
899
900 11
        if ($u_style === true) {
901 2
            $arg = \array_map(
902
                [
903 2
                    self::class,
904
                    'int_to_hex',
905
                ],
906 2
                $arg
907
            );
908
        }
909
910 11
        return $arg;
911
    }
912
913
    /**
914
     * Trims the string and replaces consecutive whitespace characters with a
915
     * single space. This includes tabs and newline characters, as well as
916
     * multibyte whitespace such as the thin space and ideographic space.
917
     *
918
     * @param string $str <p>The input string.</p>
919
     *
920
     * @return string string with a trimmed $str and condensed whitespace
921
     */
922 13
    public static function collapse_whitespace(string $str): string
923
    {
924 13
        if (self::$SUPPORT['mbstring'] === true) {
925
            /** @noinspection PhpComposerExtensionStubsInspection */
926 13
            return \trim((string) \mb_ereg_replace('[[:space:]]+', ' ', $str));
927
        }
928
929
        return \trim(self::regex_replace($str, '[[:space:]]+', ' '));
930
    }
931
932
    /**
933
     * Returns count of characters used in a string.
934
     *
935
     * @param string $str                <p>The input string.</p>
936
     * @param bool   $cleanUtf8          [optional] <p>Remove non UTF-8 chars from the string.</p>
937
     * @param bool   $tryToUseMbFunction [optional] <p>Set to false, if you don't want to use
938
     *
939
     * @return int[] an associative array of Character as keys and
940
     *               their count as values
941
     */
942 19
    public static function count_chars(
943
        string $str,
944
        bool $cleanUtf8 = false,
945
        bool $tryToUseMbFunction = true
946
    ): array {
947 19
        return \array_count_values(
948 19
            self::str_split(
949 19
                $str,
950 19
                1,
951 19
                $cleanUtf8,
952 19
                $tryToUseMbFunction
953
            )
954
        );
955
    }
956
957
    /**
958
     * Remove css media-queries.
959
     *
960
     * @param string $str
961
     *
962
     * @return string
963
     */
964 1
    public static function css_stripe_media_queries(string $str): string
965
    {
966 1
        return (string) \preg_replace(
967 1
            '#@media\\s+(?:only\\s)?(?:[\\s{\\(]|screen|all)\\s?[^{]+{.*}\\s*}\\s*#isumU',
968 1
            '',
969 1
            $str
970
        );
971
    }
972
973
    /**
974
     * Checks whether ctype is available on the server.
975
     *
976
     * @return bool
977
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
978
     */
979
    public static function ctype_loaded(): bool
980
    {
981
        return \extension_loaded('ctype');
982
    }
983
984
    /**
985
     * Converts a int-value into an UTF-8 character.
986
     *
987
     * @param mixed $int
988
     *
989
     * @return string
990
     */
991 19
    public static function decimal_to_chr($int): string
992
    {
993 19
        return self::html_entity_decode('&#' . $int . ';', \ENT_QUOTES | \ENT_HTML5);
994
    }
995
996
    /**
997
     * Decodes a MIME header field
998
     *
999
     * @param string $str
1000
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
1001
     *
1002
     * @return false|string
1003
     *                      A decoded MIME field on success,
1004
     *                      or false if an error occurs during the decoding
1005
     */
1006
    public static function decode_mimeheader($str, string $encoding = 'UTF-8')
1007
    {
1008
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
1009
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
1010
        }
1011
1012
        if (self::$SUPPORT['iconv'] === true) {
1013
            return \iconv_mime_decode($str, \ICONV_MIME_DECODE_CONTINUE_ON_ERROR, $encoding);
1014
        }
1015
1016
        if ($encoding !== 'UTF-8') {
1017
            $str = self::encode($encoding, $str);
1018
        }
1019
1020
        return \mb_decode_mimeheader($str);
1021
    }
1022
1023
    /**
1024
     * Decodes a string which was encoded by "UTF8::emoji_encode()".
1025
     *
1026
     * @param string $str                        <p>The input string.</p>
1027
     * @param bool   $useReversibleStringMapping [optional] <p>
1028
     *                                           When <b>TRUE</b>, we se a reversible string mapping
1029
     *                                           between "emoji_encode" and "emoji_decode".</p>
1030
     *
1031
     * @return string
1032
     */
1033 9
    public static function emoji_decode(string $str, bool $useReversibleStringMapping = false): string
1034
    {
1035 9
        self::initEmojiData();
1036
1037 9
        if ($useReversibleStringMapping === true) {
1038 9
            return (string) \str_replace(
1039 9
                (array) self::$EMOJI_KEYS_REVERSIBLE_CACHE,
1040 9
                (array) self::$EMOJI_VALUES_CACHE,
1041 9
                $str
1042
            );
1043
        }
1044
1045 1
        return (string) \str_replace(
1046 1
            (array) self::$EMOJI_KEYS_CACHE,
1047 1
            (array) self::$EMOJI_VALUES_CACHE,
1048 1
            $str
1049
        );
1050
    }
1051
1052
    /**
1053
     * Encode a string with emoji chars into a non-emoji string.
1054
     *
1055
     * @param string $str                        <p>The input string</p>
1056
     * @param bool   $useReversibleStringMapping [optional] <p>
1057
     *                                           when <b>TRUE</b>, we se a reversible string mapping
1058
     *                                           between "emoji_encode" and "emoji_decode"</p>
1059
     *
1060
     * @return string
1061
     */
1062 9
    public static function emoji_encode(string $str, bool $useReversibleStringMapping = false): string
1063
    {
1064 9
        self::initEmojiData();
1065
1066 9
        if ($useReversibleStringMapping === true) {
1067 9
            return (string) \str_replace(
1068 9
                (array) self::$EMOJI_VALUES_CACHE,
1069 9
                (array) self::$EMOJI_KEYS_REVERSIBLE_CACHE,
1070 9
                $str
1071
            );
1072
        }
1073
1074 1
        return (string) \str_replace(
1075 1
            (array) self::$EMOJI_VALUES_CACHE,
1076 1
            (array) self::$EMOJI_KEYS_CACHE,
1077 1
            $str
1078
        );
1079
    }
1080
1081
    /**
1082
     * Encode a string with a new charset-encoding.
1083
     *
1084
     * INFO:  The different to "UTF8::utf8_encode()" is that this function, try to fix also broken / double encoding,
1085
     *        so you can call this function also on a UTF-8 String and you don't mess the string.
1086
     *
1087
     * @param string $toEncoding             <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.</p>
1088
     * @param string $str                    <p>The input string</p>
1089
     * @param bool   $autodetectFromEncoding [optional] <p>Force the new encoding (we try to fix broken / double
1090
     *                                       encoding for UTF-8)<br> otherwise we auto-detect the current
1091
     *                                       string-encoding</p>
1092
     * @param string $fromEncoding           [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
1093
     *                                       A empty string will trigger the autodetect anyway.</p>
1094
     *
1095
     * @return string
1096
     *
1097
     * @psalm-suppress InvalidReturnStatement
1098
     */
1099 28
    public static function encode(
1100
        string $toEncoding,
1101
        string $str,
1102
        bool $autodetectFromEncoding = true,
1103
        string $fromEncoding = ''
1104
    ): string {
1105 28
        if ($str === '' || $toEncoding === '') {
1106 13
            return $str;
1107
        }
1108
1109 28
        if ($toEncoding !== 'UTF-8' && $toEncoding !== 'CP850') {
1110 7
            $toEncoding = self::normalize_encoding($toEncoding, 'UTF-8');
1111
        }
1112
1113 28
        if ($fromEncoding && $fromEncoding !== 'UTF-8' && $fromEncoding !== 'CP850') {
1114 2
            $fromEncoding = self::normalize_encoding($fromEncoding, null);
1115
        }
1116
1117
        if (
1118 28
            $toEncoding
1119
            &&
1120 28
            $fromEncoding
1121
            &&
1122 28
            $fromEncoding === $toEncoding
1123
        ) {
1124
            return $str;
1125
        }
1126
1127 28
        if ($toEncoding === 'JSON') {
1128 1
            $return = self::json_encode($str);
1129 1
            if ($return === false) {
1130
                throw new \InvalidArgumentException('The input string [' . $str . '] can not be used for json_encode().');
1131
            }
1132
1133 1
            return $return;
1134
        }
1135 28
        if ($fromEncoding === 'JSON') {
1136 1
            $str = self::json_decode($str);
1137 1
            $fromEncoding = '';
1138
        }
1139
1140 28
        if ($toEncoding === 'BASE64') {
1141 2
            return \base64_encode($str);
1142
        }
1143 28
        if ($fromEncoding === 'BASE64') {
1144 2
            $str = \base64_decode($str, true);
1145 2
            $fromEncoding = '';
1146
        }
1147
1148 28
        if ($toEncoding === 'HTML-ENTITIES') {
1149 2
            return self::html_encode($str, true, 'UTF-8');
1150
        }
1151 28
        if ($fromEncoding === 'HTML-ENTITIES') {
1152 2
            $str = self::html_decode($str, \ENT_COMPAT, 'UTF-8');
1153 2
            $fromEncoding = '';
1154
        }
1155
1156 28
        $fromEncodingDetected = false;
1157
        if (
1158 28
            $autodetectFromEncoding === true
1159
            ||
1160 28
            !$fromEncoding
1161
        ) {
1162 28
            $fromEncodingDetected = self::str_detect_encoding($str);
1163
        }
1164
1165
        // DEBUG
1166
        //var_dump($toEncoding, $fromEncoding, $fromEncodingDetected, $str, "\n\n");
1167
1168 28
        if ($fromEncodingDetected !== false) {
1169 24
            $fromEncoding = $fromEncodingDetected;
1170 7
        } elseif ($autodetectFromEncoding === true) {
1171
            // fallback for the "autodetect"-mode
1172 7
            return self::to_utf8($str);
1173
        }
1174
1175
        if (
1176 24
            !$fromEncoding
1177
            ||
1178 24
            $fromEncoding === $toEncoding
1179
        ) {
1180 15
            return $str;
1181
        }
1182
1183
        if (
1184 19
            $toEncoding === 'UTF-8'
1185
            &&
1186
            (
1187 17
                $fromEncoding === 'WINDOWS-1252'
1188
                ||
1189 19
                $fromEncoding === 'ISO-8859-1'
1190
            )
1191
        ) {
1192 13
            return self::to_utf8($str);
1193
        }
1194
1195
        if (
1196 12
            $toEncoding === 'ISO-8859-1'
1197
            &&
1198
            (
1199 6
                $fromEncoding === 'WINDOWS-1252'
1200
                ||
1201 12
                $fromEncoding === 'UTF-8'
1202
            )
1203
        ) {
1204 6
            return self::to_iso8859($str);
1205
        }
1206
1207
        if (
1208 10
            $toEncoding !== 'UTF-8'
1209
            &&
1210 10
            $toEncoding !== 'ISO-8859-1'
1211
            &&
1212 10
            $toEncoding !== 'WINDOWS-1252'
1213
            &&
1214 10
            self::$SUPPORT['mbstring'] === false
1215
        ) {
1216
            \trigger_error('UTF8::encode() without mbstring cannot handle "' . $toEncoding . '" encoding', \E_USER_WARNING);
1217
        }
1218
1219 10
        if (self::$SUPPORT['mbstring'] === true) {
1220
            // warning: do not use the symfony polyfill here
1221 10
            $strEncoded = \mb_convert_encoding(
1222 10
                $str,
1223 10
                $toEncoding,
1224 10
                $fromEncoding
1225
            );
1226
1227 10
            if ($strEncoded) {
1228 10
                return $strEncoded;
1229
            }
1230
        }
1231
1232
        $return = \iconv($fromEncoding, $toEncoding, $str);
1233
        if ($return !== false) {
1234
            return $return;
1235
        }
1236
1237
        return $str;
1238
    }
1239
1240
    /**
1241
     * @param string $str
1242
     * @param string $fromCharset      [optional] <p>Set the input charset.</p>
1243
     * @param string $toCharset        [optional] <p>Set the output charset.</p>
1244
     * @param string $transferEncoding [optional] <p>Set the transfer encoding.</p>
1245
     * @param string $linefeed         [optional] <p>Set the used linefeed.</p>
1246
     * @param int    $indent           [optional] <p>Set the max length indent.</p>
1247
     *
1248
     * @return false|string
1249
     *                      An encoded MIME field on success,
1250
     *                      or false if an error occurs during the encoding
1251
     */
1252
    public static function encode_mimeheader(
1253
        $str,
1254
        $fromCharset = 'UTF-8',
1255
        $toCharset = 'UTF-8',
1256
        $transferEncoding = 'Q',
1257
        $linefeed = "\r\n",
1258
        $indent = 76
1259
    ) {
1260
        if ($fromCharset !== 'UTF-8' && $fromCharset !== 'CP850') {
1261
            $fromCharset = self::normalize_encoding($fromCharset, 'UTF-8');
1262
        }
1263
1264
        if ($toCharset !== 'UTF-8' && $toCharset !== 'CP850') {
1265
            $toCharset = self::normalize_encoding($toCharset, 'UTF-8');
1266
        }
1267
1268
        return \iconv_mime_encode(
1269
            '',
1270
            $str,
1271
            [
1272
                'scheme'           => $transferEncoding,
1273
                'line-length'      => $indent,
1274
                'input-charset'    => $fromCharset,
1275
                'output-charset'   => $toCharset,
1276
                'line-break-chars' => $linefeed,
1277
            ]
1278
        );
1279
    }
1280
1281
    /**
1282
     * Create an extract from a sentence, so if the search-string was found, it try to centered in the output.
1283
     *
1284
     * @param string   $str                    <p>The input string.</p>
1285
     * @param string   $search                 <p>The searched string.</p>
1286
     * @param int|null $length                 [optional] <p>Default: null === text->length / 2</p>
1287
     * @param string   $replacerForSkippedText [optional] <p>Default: …</p>
1288
     * @param string   $encoding               [optional] <p>Set the charset for e.g. "mb_" function</p>
1289
     *
1290
     * @return string
1291
     */
1292 1
    public static function extract_text(
1293
        string $str,
1294
        string $search = '',
1295
        int $length = null,
1296
        string $replacerForSkippedText = '…',
1297
        string $encoding = 'UTF-8'
1298
    ): string {
1299 1
        if ($str === '') {
1300 1
            return '';
1301
        }
1302
1303 1
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
1304
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
1305
        }
1306
1307 1
        $trimChars = "\t\r\n -_()!~?=+/*\\,.:;\"'[]{}`&";
1308
1309 1
        if ($length === null) {
1310 1
            $length = (int) \round((int) self::strlen($str, $encoding) / 2, 0);
1311
        }
1312
1313 1
        if ($search === '') {
1314 1
            if ($encoding === 'UTF-8') {
1315 1
                if ($length > 0) {
1316 1
                    $stringLength = (int) \mb_strlen($str);
1317 1
                    $end = ($length - 1) > $stringLength ? $stringLength : ($length - 1);
1318
                } else {
1319 1
                    $end = 0;
1320
                }
1321
1322 1
                $pos = (int) \min(
1323 1
                    \mb_strpos($str, ' ', $end),
1324 1
                    \mb_strpos($str, '.', $end)
1325
                );
1326
            } else {
1327
                if ($length > 0) {
1328
                    $stringLength = (int) self::strlen($str, $encoding);
1329
                    $end = ($length - 1) > $stringLength ? $stringLength : ($length - 1);
1330
                } else {
1331
                    $end = 0;
1332
                }
1333
1334
                $pos = (int) \min(
1335
                    self::strpos($str, ' ', $end, $encoding),
1336
                    self::strpos($str, '.', $end, $encoding)
1337
                );
1338
            }
1339
1340 1
            if ($pos) {
1341 1
                if ($encoding === 'UTF-8') {
1342 1
                    $strSub = \mb_substr($str, 0, $pos);
1343
                } else {
1344
                    $strSub = self::substr($str, 0, $pos, $encoding);
1345
                }
1346
1347 1
                if ($strSub === false) {
1348
                    return '';
1349
                }
1350
1351 1
                return \rtrim($strSub, $trimChars) . $replacerForSkippedText;
1352
            }
1353
1354
            return $str;
1355
        }
1356
1357 1
        if ($encoding === 'UTF-8') {
1358 1
            $wordPos = (int) \mb_stripos($str, $search);
1359 1
            $halfSide = (int) ($wordPos - $length / 2 + (int) \mb_strlen($search) / 2);
1360
        } else {
1361
            $wordPos = (int) self::stripos($str, $search, 0, $encoding);
1362
            $halfSide = (int) ($wordPos - $length / 2 + (int) self::strlen($search, $encoding) / 2);
1363
        }
1364
1365 1
        $pos_start = 0;
1366 1
        if ($halfSide > 0) {
1367 1
            if ($encoding === 'UTF-8') {
1368 1
                $halfText = \mb_substr($str, 0, $halfSide);
1369
            } else {
1370
                $halfText = self::substr($str, 0, $halfSide, $encoding);
1371
            }
1372 1
            if ($halfText !== false) {
1373 1
                if ($encoding === 'UTF-8') {
1374 1
                    $pos_start = (int) \max(
1375 1
                        \mb_strrpos($halfText, ' '),
1376 1
                        \mb_strrpos($halfText, '.')
1377
                    );
1378
                } else {
1379
                    $pos_start = (int) \max(
1380
                        self::strrpos($halfText, ' ', 0, $encoding),
1381
                        self::strrpos($halfText, '.', 0, $encoding)
1382
                    );
1383
                }
1384
            }
1385
        }
1386
1387 1
        if ($wordPos && $halfSide > 0) {
1388 1
            $offset = $pos_start + $length - 1;
1389 1
            $realLength = (int) self::strlen($str, $encoding);
1390
1391 1
            if ($offset > $realLength) {
1392
                $offset = $realLength;
1393
            }
1394
1395 1
            if ($encoding === 'UTF-8') {
1396 1
                $pos_end = (int) \min(
1397 1
                    \mb_strpos($str, ' ', $offset),
1398 1
                    \mb_strpos($str, '.', $offset)
1399 1
                    ) - $pos_start;
1400
            } else {
1401
                $pos_end = (int) \min(
1402
                    self::strpos($str, ' ', $offset, $encoding),
1403
                    self::strpos($str, '.', $offset, $encoding)
1404
                    ) - $pos_start;
1405
            }
1406
1407 1
            if (!$pos_end || $pos_end <= 0) {
1408 1
                if ($encoding === 'UTF-8') {
1409 1
                    $strSub = \mb_substr($str, $pos_start, (int) \mb_strlen($str));
1410
                } else {
1411
                    $strSub = self::substr($str, $pos_start, (int) self::strlen($str, $encoding), $encoding);
1412
                }
1413 1
                if ($strSub !== false) {
1414 1
                    $extract = $replacerForSkippedText . \ltrim($strSub, $trimChars);
1415
                } else {
1416 1
                    $extract = '';
1417
                }
1418
            } else {
1419 1
                if ($encoding === 'UTF-8') {
1420 1
                    $strSub = \mb_substr($str, $pos_start, $pos_end);
1421
                } else {
1422
                    $strSub = self::substr($str, $pos_start, $pos_end, $encoding);
1423
                }
1424 1
                if ($strSub !== false) {
1425 1
                    $extract = $replacerForSkippedText . \trim($strSub, $trimChars) . $replacerForSkippedText;
1426
                } else {
1427 1
                    $extract = '';
1428
                }
1429
            }
1430
        } else {
1431 1
            $offset = $length - 1;
1432 1
            $trueLength = (int) self::strlen($str, $encoding);
1433
1434 1
            if ($offset > $trueLength) {
1435
                $offset = $trueLength;
1436
            }
1437
1438 1
            if ($encoding === 'UTF-8') {
1439 1
                $pos_end = (int) \min(
1440 1
                    \mb_strpos($str, ' ', $offset),
1441 1
                    \mb_strpos($str, '.', $offset)
1442
                );
1443
            } else {
1444
                $pos_end = (int) \min(
1445
                    self::strpos($str, ' ', $offset, $encoding),
1446
                    self::strpos($str, '.', $offset, $encoding)
1447
                );
1448
            }
1449
1450 1
            if ($pos_end) {
1451 1
                if ($encoding === 'UTF-8') {
1452 1
                    $strSub = \mb_substr($str, 0, $pos_end);
1453
                } else {
1454
                    $strSub = self::substr($str, 0, $pos_end, $encoding);
1455
                }
1456 1
                if ($strSub !== false) {
1457 1
                    $extract = \rtrim($strSub, $trimChars) . $replacerForSkippedText;
1458
                } else {
1459 1
                    $extract = '';
1460
                }
1461
            } else {
1462 1
                $extract = $str;
1463
            }
1464
        }
1465
1466 1
        return $extract;
1467
    }
1468
1469
    /**
1470
     * Reads entire file into a string.
1471
     *
1472
     * WARNING: do not use UTF-8 Option ($convertToUtf8) for binary-files (e.g.: images) !!!
1473
     *
1474
     * @see http://php.net/manual/en/function.file-get-contents.php
1475
     *
1476
     * @param string        $filename         <p>
1477
     *                                        Name of the file to read.
1478
     *                                        </p>
1479
     * @param bool          $use_include_path [optional] <p>
1480
     *                                        Prior to PHP 5, this parameter is called
1481
     *                                        use_include_path and is a bool.
1482
     *                                        As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
1483
     *                                        to trigger include path
1484
     *                                        search.
1485
     *                                        </p>
1486
     * @param resource|null $context          [optional] <p>
1487
     *                                        A valid context resource created with
1488
     *                                        stream_context_create. If you don't need to use a
1489
     *                                        custom context, you can skip this parameter by &null;.
1490
     *                                        </p>
1491
     * @param int|null      $offset           [optional] <p>
1492
     *                                        The offset where the reading starts.
1493
     *                                        </p>
1494
     * @param int|null      $maxLength        [optional] <p>
1495
     *                                        Maximum length of data read. The default is to read until end
1496
     *                                        of file is reached.
1497
     *                                        </p>
1498
     * @param int           $timeout          <p>The time in seconds for the timeout.</p>
1499
     * @param bool          $convertToUtf8    <strong>WARNING!!!</strong> <p>Maybe you can't use this option for
1500
     *                                        some files, because they used non default utf-8 chars. Binary files
1501
     *                                        like images or pdf will not be converted.</p>
1502
     * @param string        $fromEncoding     [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
1503
     *                                        A empty string will trigger the autodetect anyway.</p>
1504
     *
1505
     * @return false|string the function returns the read data as string or <b>false</b> on failure
1506
     */
1507 12
    public static function file_get_contents(
1508
        string $filename,
1509
        bool $use_include_path = false,
1510
        $context = null,
1511
        int $offset = null,
1512
        int $maxLength = null,
1513
        int $timeout = 10,
1514
        bool $convertToUtf8 = true,
1515
        string $fromEncoding = ''
1516
    ) {
1517
        // init
1518 12
        $filename = \filter_var($filename, \FILTER_SANITIZE_STRING);
1519
1520 12
        if ($filename === false) {
1521
            return false;
1522
        }
1523
1524 12
        if ($timeout && $context === null) {
1525 9
            $context = \stream_context_create(
1526
                [
1527
                    'http' => [
1528 9
                        'timeout' => $timeout,
1529
                    ],
1530
                ]
1531
            );
1532
        }
1533
1534 12
        if ($offset === null) {
1535 12
            $offset = 0;
1536
        }
1537
1538 12
        if (\is_int($maxLength) === true) {
1539 2
            $data = \file_get_contents($filename, $use_include_path, $context, $offset, $maxLength);
1540
        } else {
1541 12
            $data = \file_get_contents($filename, $use_include_path, $context, $offset);
1542
        }
1543
1544
        // return false on error
1545 12
        if ($data === false) {
1546
            return false;
1547
        }
1548
1549 12
        if ($convertToUtf8 === true) {
1550
            if (
1551 12
                self::is_binary($data, true) === true
1552
                &&
1553 12
                self::is_utf16($data, false) === false
1554
                &&
1555 12
                self::is_utf32($data, false) === false
1556 7
            ) {
1557
                // do nothing, it's binary and not UTF16 or UTF32
1558
            } else {
1559 9
                $data = self::encode('UTF-8', $data, false, $fromEncoding);
1560 9
                $data = self::cleanup($data);
1561
            }
1562
        }
1563
1564 12
        return $data;
1565
    }
1566
1567
    /**
1568
     * Checks if a file starts with BOM (Byte Order Mark) character.
1569
     *
1570
     * @param string $file_path <p>Path to a valid file.</p>
1571
     *
1572
     * @throws \RuntimeException if file_get_contents() returned false
1573
     *
1574
     * @return bool
1575
     *              <strong>true</strong> if the file has BOM at the start, <strong>false</strong> otherwise
1576
     */
1577 2
    public static function file_has_bom(string $file_path): bool
1578
    {
1579 2
        $file_content = \file_get_contents($file_path);
1580 2
        if ($file_content === false) {
1581
            throw new \RuntimeException('file_get_contents() returned false for:' . $file_path);
1582
        }
1583
1584 2
        return self::string_has_bom($file_content);
1585
    }
1586
1587
    /**
1588
     * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1589
     *
1590
     * @param mixed  $var
1591
     * @param int    $normalization_form
1592
     * @param string $leading_combining
1593
     *
1594
     * @return mixed
1595
     */
1596 62
    public static function filter($var, int $normalization_form = \Normalizer::NFC, string $leading_combining = '◌')
1597
    {
1598 62
        switch (\gettype($var)) {
1599 62
            case 'array':
1600 6
                foreach ($var as $k => &$v) {
1601 6
                    $v = self::filter($v, $normalization_form, $leading_combining);
1602
                }
1603 6
                unset($v);
1604
1605 6
                break;
1606 62
            case 'object':
1607 4
                foreach ($var as $k => &$v) {
1608 4
                    $v = self::filter($v, $normalization_form, $leading_combining);
1609
                }
1610 4
                unset($v);
1611
1612 4
                break;
1613 62
            case 'string':
1614
1615 62
                if (\strpos($var, "\r") !== false) {
1616
                    // Workaround https://bugs.php.net/65732
1617 3
                    $var = self::normalize_line_ending($var);
1618
                }
1619
1620 62
                if (self::is_ascii($var) === false) {
1621 32
                    if (\Normalizer::isNormalized($var, $normalization_form)) {
1622 27
                        $n = '-';
1623
                    } else {
1624 12
                        $n = \Normalizer::normalize($var, $normalization_form);
1625
1626 12
                        if (isset($n[0])) {
1627 7
                            $var = $n;
1628
                        } else {
1629 8
                            $var = self::encode('UTF-8', $var, true);
1630
                        }
1631
                    }
1632
1633
                    if (
1634 32
                        $var[0] >= "\x80"
1635
                        &&
1636 32
                        isset($n[0], $leading_combining[0])
1637
                        &&
1638 32
                        \preg_match('/^\p{Mn}/u', $var)
1639
                    ) {
1640
                        // Prevent leading combining chars
1641
                        // for NFC-safe concatenations.
1642 3
                        $var = $leading_combining . $var;
1643
                    }
1644
                }
1645
1646 62
                break;
1647
        }
1648
1649 62
        return $var;
1650
    }
1651
1652
    /**
1653
     * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1654
     *
1655
     * Gets a specific external variable by name and optionally filters it
1656
     *
1657
     * @see http://php.net/manual/en/function.filter-input.php
1658
     *
1659
     * @param int    $type          <p>
1660
     *                              One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1661
     *                              <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1662
     *                              <b>INPUT_ENV</b>.
1663
     *                              </p>
1664
     * @param string $variable_name <p>
1665
     *                              Name of a variable to get.
1666
     *                              </p>
1667
     * @param int    $filter        [optional] <p>
1668
     *                              The ID of the filter to apply. The
1669
     *                              manual page lists the available filters.
1670
     *                              </p>
1671
     * @param mixed  $options       [optional] <p>
1672
     *                              Associative array of options or bitwise disjunction of flags. If filter
1673
     *                              accepts options, flags can be provided in "flags" field of array.
1674
     *                              </p>
1675
     *
1676
     * @return mixed Value of the requested variable on success, <b>FALSE</b> if the filter fails, or <b>NULL</b> if the
1677
     *               <i>variable_name</i> variable is not set. If the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it
1678
     *               returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter fails.
1679
     */
1680
    public static function filter_input(
1681
        int $type,
1682
        string $variable_name,
1683
        int $filter = \FILTER_DEFAULT,
1684
        $options = null
1685
    ) {
1686
        if (\func_num_args() < 4) {
1687
            $var = \filter_input($type, $variable_name, $filter);
1688
        } else {
1689
            $var = \filter_input($type, $variable_name, $filter, $options);
1690
        }
1691
1692
        return self::filter($var);
1693
    }
1694
1695
    /**
1696
     * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1697
     *
1698
     * Gets external variables and optionally filters them
1699
     *
1700
     * @see http://php.net/manual/en/function.filter-input-array.php
1701
     *
1702
     * @param int   $type       <p>
1703
     *                          One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1704
     *                          <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1705
     *                          <b>INPUT_ENV</b>.
1706
     *                          </p>
1707
     * @param mixed $definition [optional] <p>
1708
     *                          An array defining the arguments. A valid key is a string
1709
     *                          containing a variable name and a valid value is either a filter type, or an array
1710
     *                          optionally specifying the filter, flags and options. If the value is an
1711
     *                          array, valid keys are filter which specifies the
1712
     *                          filter type,
1713
     *                          flags which specifies any flags that apply to the
1714
     *                          filter, and options which specifies any options that
1715
     *                          apply to the filter. See the example below for a better understanding.
1716
     *                          </p>
1717
     *                          <p>
1718
     *                          This parameter can be also an integer holding a filter constant. Then all values in the
1719
     *                          input array are filtered by this filter.
1720
     *                          </p>
1721
     * @param bool  $add_empty  [optional] <p>
1722
     *                          Add missing keys as <b>NULL</b> to the return value.
1723
     *                          </p>
1724
     *
1725
     * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b> on failure.
1726
     *               An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not
1727
     *               set. Or if the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it returns <b>FALSE</b> if the variable
1728
     *               is not set and <b>NULL</b> if the filter fails.
1729
     */
1730
    public static function filter_input_array(int $type, $definition = null, bool $add_empty = true)
1731
    {
1732
        if (\func_num_args() < 2) {
1733
            $a = \filter_input_array($type);
1734
        } else {
1735
            $a = \filter_input_array($type, $definition, $add_empty);
1736
        }
1737
1738
        return self::filter($a);
1739
    }
1740
1741
    /**
1742
     * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1743
     *
1744
     * Filters a variable with a specified filter
1745
     *
1746
     * @see http://php.net/manual/en/function.filter-var.php
1747
     *
1748
     * @param mixed $variable <p>
1749
     *                        Value to filter.
1750
     *                        </p>
1751
     * @param int   $filter   [optional] <p>
1752
     *                        The ID of the filter to apply. The
1753
     *                        manual page lists the available filters.
1754
     *                        </p>
1755
     * @param mixed $options  [optional] <p>
1756
     *                        Associative array of options or bitwise disjunction of flags. If filter
1757
     *                        accepts options, flags can be provided in "flags" field of array. For
1758
     *                        the "callback" filter, callable type should be passed. The
1759
     *                        callback must accept one argument, the value to be filtered, and return
1760
     *                        the value after filtering/sanitizing it.
1761
     *                        </p>
1762
     *                        <p>
1763
     *                        <code>
1764
     *                        // for filters that accept options, use this format
1765
     *                        $options = array(
1766
     *                        'options' => array(
1767
     *                        'default' => 3, // value to return if the filter fails
1768
     *                        // other options here
1769
     *                        'min_range' => 0
1770
     *                        ),
1771
     *                        'flags' => FILTER_FLAG_ALLOW_OCTAL,
1772
     *                        );
1773
     *                        $var = filter_var('0755', FILTER_VALIDATE_INT, $options);
1774
     *                        // for filter that only accept flags, you can pass them directly
1775
     *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);
1776
     *                        // for filter that only accept flags, you can also pass as an array
1777
     *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN,
1778
     *                        array('flags' => FILTER_NULL_ON_FAILURE));
1779
     *                        // callback validate filter
1780
     *                        function foo($value)
1781
     *                        {
1782
     *                        // Expected format: Surname, GivenNames
1783
     *                        if (strpos($value, ", ") === false) return false;
1784
     *                        list($surname, $givennames) = explode(", ", $value, 2);
1785
     *                        $empty = (empty($surname) || empty($givennames));
1786
     *                        $notstrings = (!is_string($surname) || !is_string($givennames));
1787
     *                        if ($empty || $notstrings) {
1788
     *                        return false;
1789
     *                        } else {
1790
     *                        return $value;
1791
     *                        }
1792
     *                        }
1793
     *                        $var = filter_var('Doe, Jane Sue', FILTER_CALLBACK, array('options' => 'foo'));
1794
     *                        </code>
1795
     *                        </p>
1796
     *
1797
     * @return mixed the filtered data, or <b>FALSE</b> if the filter fails
1798
     */
1799 2
    public static function filter_var($variable, int $filter = \FILTER_DEFAULT, $options = null)
1800
    {
1801 2
        if (\func_num_args() < 3) {
1802 2
            $variable = \filter_var($variable, $filter);
1803
        } else {
1804 2
            $variable = \filter_var($variable, $filter, $options);
1805
        }
1806
1807 2
        return self::filter($variable);
1808
    }
1809
1810
    /**
1811
     * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1812
     *
1813
     * Gets multiple variables and optionally filters them
1814
     *
1815
     * @see http://php.net/manual/en/function.filter-var-array.php
1816
     *
1817
     * @param array $data       <p>
1818
     *                          An array with string keys containing the data to filter.
1819
     *                          </p>
1820
     * @param mixed $definition [optional] <p>
1821
     *                          An array defining the arguments. A valid key is a string
1822
     *                          containing a variable name and a valid value is either a
1823
     *                          filter type, or an
1824
     *                          array optionally specifying the filter, flags and options.
1825
     *                          If the value is an array, valid keys are filter
1826
     *                          which specifies the filter type,
1827
     *                          flags which specifies any flags that apply to the
1828
     *                          filter, and options which specifies any options that
1829
     *                          apply to the filter. See the example below for a better understanding.
1830
     *                          </p>
1831
     *                          <p>
1832
     *                          This parameter can be also an integer holding a filter constant. Then all values in the
1833
     *                          input array are filtered by this filter.
1834
     *                          </p>
1835
     * @param bool  $add_empty  [optional] <p>
1836
     *                          Add missing keys as <b>NULL</b> to the return value.
1837
     *                          </p>
1838
     *
1839
     * @return mixed an array containing the values of the requested variables on success, or <b>FALSE</b> on failure.
1840
     *               An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not
1841
     *               set
1842
     */
1843 2
    public static function filter_var_array(array $data, $definition = null, bool $add_empty = true)
1844
    {
1845 2
        if (\func_num_args() < 2) {
1846 2
            $a = \filter_var_array($data);
1847
        } else {
1848 2
            $a = \filter_var_array($data, $definition, $add_empty);
1849
        }
1850
1851 2
        return self::filter($a);
1852
    }
1853
1854
    /**
1855
     * Checks whether finfo is available on the server.
1856
     *
1857
     * @return bool
1858
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
1859
     */
1860
    public static function finfo_loaded(): bool
1861
    {
1862
        return \class_exists('finfo');
1863
    }
1864
1865
    /**
1866
     * Returns the first $n characters of the string.
1867
     *
1868
     * @param string $str      <p>The input string.</p>
1869
     * @param int    $n        <p>Number of characters to retrieve from the start.</p>
1870
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
1871
     *
1872
     * @return string
1873
     */
1874 13
    public static function first_char(string $str, int $n = 1, string $encoding = 'UTF-8'): string
1875
    {
1876 13
        if ($str === '' || $n <= 0) {
1877 5
            return '';
1878
        }
1879
1880 8
        if ($encoding === 'UTF-8') {
1881 4
            return (string) \mb_substr($str, 0, $n);
1882
        }
1883
1884 4
        return (string) self::substr($str, 0, $n, $encoding);
1885
    }
1886
1887
    /**
1888
     * Check if the number of unicode characters are not more than the specified integer.
1889
     *
1890
     * @param string $str      the original string to be checked
1891
     * @param int    $box_size the size in number of chars to be checked against string
1892
     *
1893
     * @return bool true if string is less than or equal to $box_size, false otherwise
1894
     */
1895 2
    public static function fits_inside(string $str, int $box_size): bool
1896
    {
1897 2
        return self::strlen($str) <= $box_size;
1898
    }
1899
1900
    /**
1901
     * Try to fix simple broken UTF-8 strings.
1902
     *
1903
     * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
1904
     *
1905
     * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
1906
     * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
1907
     * See: http://en.wikipedia.org/wiki/Windows-1252
1908
     *
1909
     * @param string $str <p>The input string</p>
1910
     *
1911
     * @return string
1912
     */
1913 46
    public static function fix_simple_utf8(string $str): string
1914
    {
1915 46
        if ($str === '') {
1916 4
            return '';
1917
        }
1918
1919 46
        static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null;
1920 46
        static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null;
1921
1922 46
        if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) {
1923 1
            if (self::$BROKEN_UTF8_FIX === null) {
1924 1
                self::$BROKEN_UTF8_FIX = self::getData('utf8_fix');
1925
            }
1926
1927 1
            $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = \array_keys(self::$BROKEN_UTF8_FIX);
1928 1
            $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = \array_values(self::$BROKEN_UTF8_FIX);
1929
        }
1930
1931 46
        return \str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str);
1932
    }
1933
1934
    /**
1935
     * Fix a double (or multiple) encoded UTF8 string.
1936
     *
1937
     * @param string|string[] $str you can use a string or an array of strings
1938
     *
1939
     * @return string|string[]
1940
     *                         Will return the fixed input-"array" or
1941
     *                         the fixed input-"string"
1942
     *
1943
     * @psalm-suppress InvalidReturnType
1944
     */
1945 2
    public static function fix_utf8($str)
1946
    {
1947 2
        if (\is_array($str) === true) {
1948 2
            foreach ($str as $k => &$v) {
1949 2
                $v = self::fix_utf8($v);
1950
            }
1951 2
            unset($v);
1952
1953
            /**
1954
             * @psalm-suppress InvalidReturnStatement
1955
             */
1956 2
            return $str;
1957
        }
1958
1959 2
        $str = (string) $str;
1960 2
        $last = '';
1961 2
        while ($last !== $str) {
1962 2
            $last = $str;
1963
            /**
1964
             * @psalm-suppress PossiblyInvalidArgument
1965
             */
1966 2
            $str = self::to_utf8(
1967 2
                self::utf8_decode($str, true)
1968
            );
1969
        }
1970
1971
        /**
1972
         * @psalm-suppress InvalidReturnStatement
1973
         */
1974 2
        return $str;
1975
    }
1976
1977
    /**
1978
     * Get character of a specific character.
1979
     *
1980
     * @param string $char
1981
     *
1982
     * @return string 'RTL' or 'LTR'
1983
     */
1984 2
    public static function getCharDirection(string $char): string
1985
    {
1986 2
        if (self::$SUPPORT['intlChar'] === true) {
1987
            /** @noinspection PhpComposerExtensionStubsInspection */
1988 2
            $tmpReturn = \IntlChar::charDirection($char);
1989
1990
            // from "IntlChar"-Class
1991
            $charDirection = [
1992 2
                'RTL' => [1, 13, 14, 15, 21],
1993
                'LTR' => [0, 11, 12, 20],
1994
            ];
1995
1996 2
            if (\in_array($tmpReturn, $charDirection['LTR'], true)) {
1997
                return 'LTR';
1998
            }
1999
2000 2
            if (\in_array($tmpReturn, $charDirection['RTL'], true)) {
2001 2
                return 'RTL';
2002
            }
2003
        }
2004
2005 2
        $c = static::chr_to_decimal($char);
2006
2007 2
        if (!($c >= 0x5be && $c <= 0x10b7f)) {
2008 2
            return 'LTR';
2009
        }
2010
2011 2
        if ($c <= 0x85e) {
2012 2
            if ($c === 0x5be ||
2013 2
                $c === 0x5c0 ||
2014 2
                $c === 0x5c3 ||
2015 2
                $c === 0x5c6 ||
2016 2
                ($c >= 0x5d0 && $c <= 0x5ea) ||
2017 2
                ($c >= 0x5f0 && $c <= 0x5f4) ||
2018 2
                $c === 0x608 ||
2019 2
                $c === 0x60b ||
2020 2
                $c === 0x60d ||
2021 2
                $c === 0x61b ||
2022 2
                ($c >= 0x61e && $c <= 0x64a) ||
2023
                ($c >= 0x66d && $c <= 0x66f) ||
2024
                ($c >= 0x671 && $c <= 0x6d5) ||
2025
                ($c >= 0x6e5 && $c <= 0x6e6) ||
2026
                ($c >= 0x6ee && $c <= 0x6ef) ||
2027
                ($c >= 0x6fa && $c <= 0x70d) ||
2028
                $c === 0x710 ||
2029
                ($c >= 0x712 && $c <= 0x72f) ||
2030
                ($c >= 0x74d && $c <= 0x7a5) ||
2031
                $c === 0x7b1 ||
2032
                ($c >= 0x7c0 && $c <= 0x7ea) ||
2033
                ($c >= 0x7f4 && $c <= 0x7f5) ||
2034
                $c === 0x7fa ||
2035
                ($c >= 0x800 && $c <= 0x815) ||
2036
                $c === 0x81a ||
2037
                $c === 0x824 ||
2038
                $c === 0x828 ||
2039
                ($c >= 0x830 && $c <= 0x83e) ||
2040
                ($c >= 0x840 && $c <= 0x858) ||
2041 2
                $c === 0x85e
2042
            ) {
2043 2
                return 'RTL';
2044
            }
2045 2
        } elseif ($c === 0x200f) {
2046
            return 'RTL';
2047 2
        } elseif ($c >= 0xfb1d) {
2048 2
            if ($c === 0xfb1d ||
2049 2
                ($c >= 0xfb1f && $c <= 0xfb28) ||
2050 2
                ($c >= 0xfb2a && $c <= 0xfb36) ||
2051 2
                ($c >= 0xfb38 && $c <= 0xfb3c) ||
2052 2
                $c === 0xfb3e ||
2053 2
                ($c >= 0xfb40 && $c <= 0xfb41) ||
2054 2
                ($c >= 0xfb43 && $c <= 0xfb44) ||
2055 2
                ($c >= 0xfb46 && $c <= 0xfbc1) ||
2056 2
                ($c >= 0xfbd3 && $c <= 0xfd3d) ||
2057 2
                ($c >= 0xfd50 && $c <= 0xfd8f) ||
2058 2
                ($c >= 0xfd92 && $c <= 0xfdc7) ||
2059 2
                ($c >= 0xfdf0 && $c <= 0xfdfc) ||
2060 2
                ($c >= 0xfe70 && $c <= 0xfe74) ||
2061 2
                ($c >= 0xfe76 && $c <= 0xfefc) ||
2062 2
                ($c >= 0x10800 && $c <= 0x10805) ||
2063 2
                $c === 0x10808 ||
2064 2
                ($c >= 0x1080a && $c <= 0x10835) ||
2065 2
                ($c >= 0x10837 && $c <= 0x10838) ||
2066 2
                $c === 0x1083c ||
2067 2
                ($c >= 0x1083f && $c <= 0x10855) ||
2068 2
                ($c >= 0x10857 && $c <= 0x1085f) ||
2069 2
                ($c >= 0x10900 && $c <= 0x1091b) ||
2070 2
                ($c >= 0x10920 && $c <= 0x10939) ||
2071 2
                $c === 0x1093f ||
2072 2
                $c === 0x10a00 ||
2073 2
                ($c >= 0x10a10 && $c <= 0x10a13) ||
2074 2
                ($c >= 0x10a15 && $c <= 0x10a17) ||
2075 2
                ($c >= 0x10a19 && $c <= 0x10a33) ||
2076 2
                ($c >= 0x10a40 && $c <= 0x10a47) ||
2077 2
                ($c >= 0x10a50 && $c <= 0x10a58) ||
2078 2
                ($c >= 0x10a60 && $c <= 0x10a7f) ||
2079 2
                ($c >= 0x10b00 && $c <= 0x10b35) ||
2080 2
                ($c >= 0x10b40 && $c <= 0x10b55) ||
2081 2
                ($c >= 0x10b58 && $c <= 0x10b72) ||
2082 2
                ($c >= 0x10b78 && $c <= 0x10b7f)
2083
            ) {
2084 2
                return 'RTL';
2085
            }
2086
        }
2087
2088 2
        return 'LTR';
2089
    }
2090
2091
    /**
2092
     * Check for php-support.
2093
     *
2094
     * @param string|null $key
2095
     *
2096
     * @return mixed
2097
     *               Return the full support-"array", if $key === null<br>
2098
     *               return bool-value, if $key is used and available<br>
2099
     *               otherwise return <strong>null</strong>
2100
     */
2101 27
    public static function getSupportInfo(string $key = null)
2102
    {
2103 27
        if ($key === null) {
2104 4
            return self::$SUPPORT;
2105
        }
2106
2107 25
        if (self::$INTL_TRANSLITERATOR_LIST === null) {
2108 1
            self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
2109
        }
2110
        // compatibility fix for old versions
2111 25
        self::$SUPPORT['intl__transliterator_list_ids'] = self::$INTL_TRANSLITERATOR_LIST;
2112
2113 25
        return self::$SUPPORT[$key] ?? null;
2114
    }
2115
2116
    /**
2117
     * Warning: this method only works for some file-types (png bmp gif jpg rar zip midi exe pdf)
2118
     *          if you need more supported types, please use e.g. "finfo"
2119
     *
2120
     * @param string $str
2121
     * @param array  $fallback with this keys: 'ext', 'mime', 'type'
2122
     *
2123
     * @return array
2124
     *               with this keys: 'ext', 'mime', 'type'
2125
     */
2126 39
    public static function get_file_type(
2127
        string $str,
2128
        array $fallback = [
2129
            'ext'  => null,
2130
            'mime' => 'application/octet-stream',
2131
            'type' => null,
2132
        ]
2133
    ): array {
2134 39
        if ($str === '') {
2135
            return $fallback;
2136
        }
2137
2138 39
        $str_info = \substr($str, 0, 2);
2139 39
        if ($str_info === false || \strlen($str_info) !== 2) {
2140 11
            return $fallback;
2141
        }
2142
2143 35
        $str_info = \unpack('C2chars', $str_info);
2144 35
        if ($str_info === false) {
2145
            return $fallback;
2146
        }
2147 35
        $type_code = (int) ($str_info['chars1'] . $str_info['chars2']);
2148
2149
        // DEBUG
2150
        //var_dump($type_code);
2151
2152
        switch ($type_code) {
2153 35
            case 3780:
2154 5
                $ext = 'pdf';
2155 5
                $mime = 'application/pdf';
2156 5
                $type = 'binary';
2157
2158 5
                break;
2159 35
            case 7790:
2160
                $ext = 'exe';
2161
                $mime = 'application/octet-stream';
2162
                $type = 'binary';
2163
2164
                break;
2165 35
            case 7784:
2166
                $ext = 'midi';
2167
                $mime = 'audio/x-midi';
2168
                $type = 'binary';
2169
2170
                break;
2171 35
            case 8075:
2172 7
                $ext = 'zip';
2173 7
                $mime = 'application/zip';
2174 7
                $type = 'binary';
2175
2176 7
                break;
2177 35
            case 8297:
2178
                $ext = 'rar';
2179
                $mime = 'application/rar';
2180
                $type = 'binary';
2181
2182
                break;
2183 35
            case 255216:
2184
                $ext = 'jpg';
2185
                $mime = 'image/jpeg';
2186
                $type = 'binary';
2187
2188
                break;
2189 35
            case 7173:
2190
                $ext = 'gif';
2191
                $mime = 'image/gif';
2192
                $type = 'binary';
2193
2194
                break;
2195 35
            case 6677:
2196
                $ext = 'bmp';
2197
                $mime = 'image/bmp';
2198
                $type = 'binary';
2199
2200
                break;
2201 35
            case 13780:
2202 7
                $ext = 'png';
2203 7
                $mime = 'image/png';
2204 7
                $type = 'binary';
2205
2206 7
                break;
2207
            default:
2208 32
                return $fallback;
2209
        }
2210
2211
        return [
2212 7
            'ext'  => $ext,
2213 7
            'mime' => $mime,
2214 7
            'type' => $type,
2215
        ];
2216
    }
2217
2218
    /**
2219
     * @param int    $length        <p>Length of the random string.</p>
2220
     * @param string $possibleChars [optional] <p>Characters string for the random selection.</p>
2221
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
2222
     *
2223
     * @return string
2224
     */
2225 1
    public static function get_random_string(int $length, string $possibleChars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789', string $encoding = 'UTF-8'): string
2226
    {
2227
        // init
2228 1
        $i = 0;
2229 1
        $str = '';
2230
2231
        //
2232
        // add random chars
2233
        //
2234
2235 1
        if ($encoding === 'UTF-8') {
2236 1
            $maxlength = (int) \mb_strlen($possibleChars);
2237 1
            if ($maxlength === 0) {
2238 1
                return '';
2239
            }
2240
2241 1
            while ($i < $length) {
2242
                try {
2243 1
                    $randInt = \random_int(0, $maxlength - 1);
2244
                } catch (\Exception $e) {
2245
                    /** @noinspection RandomApiMigrationInspection */
2246
                    $randInt = \mt_rand(0, $maxlength - 1);
2247
                }
2248 1
                $char = \mb_substr($possibleChars, $randInt, 1);
2249 1
                if ($char !== false) {
2250 1
                    $str .= $char;
2251 1
                    ++$i;
2252
                }
2253
            }
2254
        } else {
2255
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2256
2257
            $maxlength = (int) self::strlen($possibleChars, $encoding);
2258
            if ($maxlength === 0) {
2259
                return '';
2260
            }
2261
2262
            while ($i < $length) {
2263
                try {
2264
                    $randInt = \random_int(0, $maxlength - 1);
2265
                } catch (\Exception $e) {
2266
                    /** @noinspection RandomApiMigrationInspection */
2267
                    $randInt = \mt_rand(0, $maxlength - 1);
2268
                }
2269
                $char = self::substr($possibleChars, $randInt, 1, $encoding);
2270
                if ($char !== false) {
2271
                    $str .= $char;
2272
                    ++$i;
2273
                }
2274
            }
2275
        }
2276
2277 1
        return $str;
2278
    }
2279
2280
    /**
2281
     * @param int|string $entropyExtra [optional] <p>Extra entropy via a string or int value.</p>
2282
     * @param bool       $md5          [optional] <p>Return the unique identifier as md5-hash? Default: true</p>
2283
     *
2284
     * @return string
2285
     */
2286 1
    public static function get_unique_string($entropyExtra = '', bool $md5 = true): string
2287
    {
2288 1
        $uniqueHelper = \random_int(0, \mt_getrandmax()) .
2289 1
                        \session_id() .
2290 1
                        ($_SERVER['REMOTE_ADDR'] ?? '') .
2291 1
                        ($_SERVER['SERVER_ADDR'] ?? '') .
2292 1
                        $entropyExtra;
2293
2294 1
        $uniqueString = \uniqid($uniqueHelper, true);
2295
2296 1
        if ($md5) {
2297 1
            $uniqueString = \md5($uniqueString . $uniqueHelper);
2298
        }
2299
2300 1
        return $uniqueString;
2301
    }
2302
2303
    /**
2304
     * alias for "UTF8::string_has_bom()"
2305
     *
2306
     * @param string $str
2307
     *
2308
     * @return bool
2309
     *
2310
     * @see UTF8::string_has_bom()
2311
     * @deprecated <p>use "UTF8::string_has_bom()"</p>
2312
     */
2313 2
    public static function hasBom(string $str): bool
2314
    {
2315 2
        return self::string_has_bom($str);
2316
    }
2317
2318
    /**
2319
     * Returns true if the string contains a lower case char, false otherwise.
2320
     *
2321
     * @param string $str <p>The input string.</p>
2322
     *
2323
     * @return bool whether or not the string contains a lower case character
2324
     */
2325 47
    public static function has_lowercase(string $str): bool
2326
    {
2327 47
        if (self::$SUPPORT['mbstring'] === true) {
2328
            /** @noinspection PhpComposerExtensionStubsInspection */
2329 47
            return \mb_ereg_match('.*[[:lower:]]', $str);
2330
        }
2331
2332
        return self::str_matches_pattern($str, '.*[[:lower:]]');
2333
    }
2334
2335
    /**
2336
     * Returns true if the string contains an upper case char, false otherwise.
2337
     *
2338
     * @param string $str <p>The input string.</p>
2339
     *
2340
     * @return bool whether or not the string contains an upper case character
2341
     */
2342 12
    public static function has_uppercase(string $str): bool
2343
    {
2344 12
        if (self::$SUPPORT['mbstring'] === true) {
2345
            /** @noinspection PhpComposerExtensionStubsInspection */
2346 12
            return \mb_ereg_match('.*[[:upper:]]', $str);
2347
        }
2348
2349
        return self::str_matches_pattern($str, '.*[[:upper:]]');
2350
    }
2351
2352
    /**
2353
     * Converts a hexadecimal-value into an UTF-8 character.
2354
     *
2355
     * @param string $hexdec <p>The hexadecimal value.</p>
2356
     *
2357
     * @return false|string one single UTF-8 character
2358
     */
2359 4
    public static function hex_to_chr(string $hexdec)
2360
    {
2361 4
        return self::decimal_to_chr(\hexdec($hexdec));
2362
    }
2363
2364
    /**
2365
     * Converts hexadecimal U+xxxx code point representation to integer.
2366
     *
2367
     * INFO: opposite to UTF8::int_to_hex()
2368
     *
2369
     * @param string $hexDec <p>The hexadecimal code point representation.</p>
2370
     *
2371
     * @return false|int the code point, or false on failure
2372
     */
2373 2
    public static function hex_to_int($hexDec)
2374
    {
2375
        // init
2376 2
        $hexDec = (string) $hexDec;
2377
2378 2
        if ($hexDec === '') {
2379 2
            return false;
2380
        }
2381
2382 2
        if (\preg_match('/^(?:\\\u|U\+|)([a-zA-Z0-9]{4,6})$/', $hexDec, $match)) {
2383 2
            return \intval($match[1], 16);
2384
        }
2385
2386 2
        return false;
2387
    }
2388
2389
    /**
2390
     * alias for "UTF8::html_entity_decode()"
2391
     *
2392
     * @param string $str
2393
     * @param int    $flags
2394
     * @param string $encoding
2395
     *
2396
     * @return string
2397
     *
2398
     * @see UTF8::html_entity_decode()
2399
     */
2400 4
    public static function html_decode(string $str, int $flags = null, string $encoding = 'UTF-8'): string
2401
    {
2402 4
        return self::html_entity_decode($str, $flags, $encoding);
2403
    }
2404
2405
    /**
2406
     * Converts a UTF-8 string to a series of HTML numbered entities.
2407
     *
2408
     * INFO: opposite to UTF8::html_decode()
2409
     *
2410
     * @param string $str            <p>The Unicode string to be encoded as numbered entities.</p>
2411
     * @param bool   $keepAsciiChars [optional] <p>Keep ASCII chars.</p>
2412
     * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
2413
     *
2414
     * @return string HTML numbered entities
2415
     */
2416 14
    public static function html_encode(string $str, bool $keepAsciiChars = false, string $encoding = 'UTF-8'): string
2417
    {
2418 14
        if ($str === '') {
2419 4
            return '';
2420
        }
2421
2422 14
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2423 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2424
        }
2425
2426
        // INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2427 14
        if (self::$SUPPORT['mbstring'] === true) {
2428 14
            $startCode = 0x00;
2429 14
            if ($keepAsciiChars === true) {
2430 13
                $startCode = 0x80;
2431
            }
2432
2433 14
            if ($encoding === 'UTF-8') {
2434 14
                return \mb_encode_numericentity(
2435 14
                    $str,
2436 14
                    [$startCode, 0xfffff, 0, 0xfffff, 0]
2437
                );
2438
            }
2439
2440 4
            return \mb_encode_numericentity(
2441 4
                $str,
2442 4
                [$startCode, 0xfffff, 0, 0xfffff, 0],
2443 4
                $encoding
2444
            );
2445
        }
2446
2447
        //
2448
        // fallback via vanilla php
2449
        //
2450
2451
        return \implode(
2452
            '',
2453
            \array_map(
2454
                static function (string $chr) use ($keepAsciiChars, $encoding): string {
2455
                    return self::single_chr_html_encode($chr, $keepAsciiChars, $encoding);
2456
                },
2457
                self::str_split($str)
2458
            )
2459
        );
2460
    }
2461
2462
    /**
2463
     * UTF-8 version of html_entity_decode()
2464
     *
2465
     * The reason we are not using html_entity_decode() by itself is because
2466
     * while it is not technically correct to leave out the semicolon
2467
     * at the end of an entity most browsers will still interpret the entity
2468
     * correctly. html_entity_decode() does not convert entities without
2469
     * semicolons, so we are left with our own little solution here. Bummer.
2470
     *
2471
     * Convert all HTML entities to their applicable characters
2472
     *
2473
     * INFO: opposite to UTF8::html_encode()
2474
     *
2475
     * @see http://php.net/manual/en/function.html-entity-decode.php
2476
     *
2477
     * @param string $str      <p>
2478
     *                         The input string.
2479
     *                         </p>
2480
     * @param int    $flags    [optional] <p>
2481
     *                         A bitmask of one or more of the following flags, which specify how to handle quotes
2482
     *                         and which document type to use. The default is ENT_COMPAT | ENT_HTML401.
2483
     *                         <table>
2484
     *                         Available <i>flags</i> constants
2485
     *                         <tr valign="top">
2486
     *                         <td>Constant Name</td>
2487
     *                         <td>Description</td>
2488
     *                         </tr>
2489
     *                         <tr valign="top">
2490
     *                         <td><b>ENT_COMPAT</b></td>
2491
     *                         <td>Will convert double-quotes and leave single-quotes alone.</td>
2492
     *                         </tr>
2493
     *                         <tr valign="top">
2494
     *                         <td><b>ENT_QUOTES</b></td>
2495
     *                         <td>Will convert both double and single quotes.</td>
2496
     *                         </tr>
2497
     *                         <tr valign="top">
2498
     *                         <td><b>ENT_NOQUOTES</b></td>
2499
     *                         <td>Will leave both double and single quotes unconverted.</td>
2500
     *                         </tr>
2501
     *                         <tr valign="top">
2502
     *                         <td><b>ENT_HTML401</b></td>
2503
     *                         <td>
2504
     *                         Handle code as HTML 4.01.
2505
     *                         </td>
2506
     *                         </tr>
2507
     *                         <tr valign="top">
2508
     *                         <td><b>ENT_XML1</b></td>
2509
     *                         <td>
2510
     *                         Handle code as XML 1.
2511
     *                         </td>
2512
     *                         </tr>
2513
     *                         <tr valign="top">
2514
     *                         <td><b>ENT_XHTML</b></td>
2515
     *                         <td>
2516
     *                         Handle code as XHTML.
2517
     *                         </td>
2518
     *                         </tr>
2519
     *                         <tr valign="top">
2520
     *                         <td><b>ENT_HTML5</b></td>
2521
     *                         <td>
2522
     *                         Handle code as HTML 5.
2523
     *                         </td>
2524
     *                         </tr>
2525
     *                         </table>
2526
     *                         </p>
2527
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
2528
     *
2529
     * @return string the decoded string
2530
     */
2531 46
    public static function html_entity_decode(string $str, int $flags = null, string $encoding = 'UTF-8'): string
2532
    {
2533
        if (
2534 46
            !isset($str[3]) // examples: &; || &x;
2535
            ||
2536 46
            \strpos($str, '&') === false // no "&"
2537
        ) {
2538 23
            return $str;
2539
        }
2540
2541 44
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2542 9
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2543
        }
2544
2545 44
        if ($flags === null) {
2546 10
            $flags = \ENT_QUOTES | \ENT_HTML5;
2547
        }
2548
2549
        if (
2550 44
            $encoding !== 'UTF-8'
2551
            &&
2552 44
            $encoding !== 'ISO-8859-1'
2553
            &&
2554 44
            $encoding !== 'WINDOWS-1252'
2555
            &&
2556 44
            self::$SUPPORT['mbstring'] === false
2557
        ) {
2558
            \trigger_error('UTF8::html_entity_decode() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
2559
        }
2560
2561
        do {
2562 44
            $str_compare = $str;
2563
2564
            // INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2565 44
            if (self::$SUPPORT['mbstring'] === true) {
2566 44
                if ($encoding === 'UTF-8') {
2567 44
                    $str = \mb_decode_numericentity(
2568 44
                        $str,
2569 44
                        [0x80, 0xfffff, 0, 0xfffff, 0]
2570
                    );
2571
                } else {
2572 4
                    $str = \mb_decode_numericentity(
2573 4
                        $str,
2574 4
                        [0x80, 0xfffff, 0, 0xfffff, 0],
2575 44
                        $encoding
2576
                    );
2577
                }
2578
            } else {
2579
                $str = (string) \preg_replace_callback(
2580
                    "/&#\d{2,6};/",
2581
                    /**
2582
                     * @param string[] $matches
2583
                     *
2584
                     * @return string
2585
                     */
2586
                    static function (array $matches) use ($encoding): string {
2587
                        $returnTmp = \mb_convert_encoding($matches[0], $encoding, 'HTML-ENTITIES');
2588
                        if ($returnTmp !== '"' && $returnTmp !== "'") {
2589
                            return $returnTmp;
2590
                        }
2591
2592
                        return $matches[0];
2593
                    },
2594
                    $str
2595
                );
2596
            }
2597
2598 44
            if (\strpos($str, '&') !== false) {
2599 40
                if (\strpos($str, '&#') !== false) {
2600
                    // decode also numeric & UTF16 two byte entities
2601 32
                    $str = (string) \preg_replace(
2602 32
                        '/(&#(?:x0*[0-9a-fA-F]{2,6}(?![0-9a-fA-F;])|(?:0*\d{2,6}(?![0-9;]))))/S',
2603 32
                        '$1;',
2604 32
                        $str
2605
                    );
2606
                }
2607
2608 40
                $str = \html_entity_decode(
2609 40
                    $str,
2610 40
                    $flags,
2611 40
                    $encoding
2612
                );
2613
            }
2614 44
        } while ($str_compare !== $str);
2615
2616 44
        return $str;
2617
    }
2618
2619
    /**
2620
     * Create a escape html version of the string via "UTF8::htmlspecialchars()".
2621
     *
2622
     * @param string $str
2623
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
2624
     *
2625
     * @return string
2626
     */
2627 6
    public static function html_escape(string $str, string $encoding = 'UTF-8'): string
2628
    {
2629 6
        return self::htmlspecialchars(
2630 6
            $str,
2631 6
            \ENT_QUOTES | \ENT_SUBSTITUTE,
2632 6
            $encoding
2633
        );
2634
    }
2635
2636
    /**
2637
     * Remove empty html-tag.
2638
     *
2639
     * e.g.: <tag></tag>
2640
     *
2641
     * @param string $str
2642
     *
2643
     * @return string
2644
     */
2645 1
    public static function html_stripe_empty_tags(string $str): string
2646
    {
2647 1
        return (string) \preg_replace(
2648 1
            "/<[^\/>]*>(([\s]?)*|)<\/[^>]*>/u",
2649 1
            '',
2650 1
            $str
2651
        );
2652
    }
2653
2654
    /**
2655
     * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities()
2656
     *
2657
     * @see http://php.net/manual/en/function.htmlentities.php
2658
     *
2659
     * @param string $str           <p>
2660
     *                              The input string.
2661
     *                              </p>
2662
     * @param int    $flags         [optional] <p>
2663
     *                              A bitmask of one or more of the following flags, which specify how to handle
2664
     *                              quotes, invalid code unit sequences and the used document type. The default is
2665
     *                              ENT_COMPAT | ENT_HTML401.
2666
     *                              <table>
2667
     *                              Available <i>flags</i> constants
2668
     *                              <tr valign="top">
2669
     *                              <td>Constant Name</td>
2670
     *                              <td>Description</td>
2671
     *                              </tr>
2672
     *                              <tr valign="top">
2673
     *                              <td><b>ENT_COMPAT</b></td>
2674
     *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2675
     *                              </tr>
2676
     *                              <tr valign="top">
2677
     *                              <td><b>ENT_QUOTES</b></td>
2678
     *                              <td>Will convert both double and single quotes.</td>
2679
     *                              </tr>
2680
     *                              <tr valign="top">
2681
     *                              <td><b>ENT_NOQUOTES</b></td>
2682
     *                              <td>Will leave both double and single quotes unconverted.</td>
2683
     *                              </tr>
2684
     *                              <tr valign="top">
2685
     *                              <td><b>ENT_IGNORE</b></td>
2686
     *                              <td>
2687
     *                              Silently discard invalid code unit sequences instead of returning
2688
     *                              an empty string. Using this flag is discouraged as it
2689
     *                              may have security implications.
2690
     *                              </td>
2691
     *                              </tr>
2692
     *                              <tr valign="top">
2693
     *                              <td><b>ENT_SUBSTITUTE</b></td>
2694
     *                              <td>
2695
     *                              Replace invalid code unit sequences with a Unicode Replacement Character
2696
     *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty
2697
     *                              string.
2698
     *                              </td>
2699
     *                              </tr>
2700
     *                              <tr valign="top">
2701
     *                              <td><b>ENT_DISALLOWED</b></td>
2702
     *                              <td>
2703
     *                              Replace invalid code points for the given document type with a
2704
     *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2705
     *                              (otherwise) instead of leaving them as is. This may be useful, for
2706
     *                              instance, to ensure the well-formedness of XML documents with
2707
     *                              embedded external content.
2708
     *                              </td>
2709
     *                              </tr>
2710
     *                              <tr valign="top">
2711
     *                              <td><b>ENT_HTML401</b></td>
2712
     *                              <td>
2713
     *                              Handle code as HTML 4.01.
2714
     *                              </td>
2715
     *                              </tr>
2716
     *                              <tr valign="top">
2717
     *                              <td><b>ENT_XML1</b></td>
2718
     *                              <td>
2719
     *                              Handle code as XML 1.
2720
     *                              </td>
2721
     *                              </tr>
2722
     *                              <tr valign="top">
2723
     *                              <td><b>ENT_XHTML</b></td>
2724
     *                              <td>
2725
     *                              Handle code as XHTML.
2726
     *                              </td>
2727
     *                              </tr>
2728
     *                              <tr valign="top">
2729
     *                              <td><b>ENT_HTML5</b></td>
2730
     *                              <td>
2731
     *                              Handle code as HTML 5.
2732
     *                              </td>
2733
     *                              </tr>
2734
     *                              </table>
2735
     *                              </p>
2736
     * @param string $encoding      [optional] <p>
2737
     *                              Like <b>htmlspecialchars</b>,
2738
     *                              <b>htmlentities</b> takes an optional third argument
2739
     *                              <i>encoding</i> which defines encoding used in
2740
     *                              conversion.
2741
     *                              Although this argument is technically optional, you are highly
2742
     *                              encouraged to specify the correct value for your code.
2743
     *                              </p>
2744
     * @param bool   $double_encode [optional] <p>
2745
     *                              When <i>double_encode</i> is turned off PHP will not
2746
     *                              encode existing html entities. The default is to convert everything.
2747
     *                              </p>
2748
     *
2749
     * @return string
2750
     *                <p>
2751
     *                The encoded string.
2752
     *                <br><br>
2753
     *                If the input <i>string</i> contains an invalid code unit
2754
     *                sequence within the given <i>encoding</i> an empty string
2755
     *                will be returned, unless either the <b>ENT_IGNORE</b> or
2756
     *                <b>ENT_SUBSTITUTE</b> flags are set.
2757
     *                </p>
2758
     */
2759 9
    public static function htmlentities(
2760
        string $str,
2761
        int $flags = \ENT_COMPAT,
2762
        string $encoding = 'UTF-8',
2763
        bool $double_encode = true
2764
    ): string {
2765 9
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2766 7
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2767
        }
2768
2769 9
        $str = \htmlentities($str, $flags, $encoding, $double_encode);
2770
2771
        /**
2772
         * PHP doesn't replace a backslash to its html entity since this is something
2773
         * that's mostly used to escape characters when inserting in a database. Since
2774
         * we're using a decent database layer, we don't need this shit and we're replacing
2775
         * the double backslashes by its' html entity equivalent.
2776
         *
2777
         * https://github.com/forkcms/library/blob/master/spoon/filter/filter.php#L303
2778
         */
2779 9
        $str = \str_replace('\\', '&#92;', $str);
2780
2781 9
        return self::html_encode($str, true, $encoding);
2782
    }
2783
2784
    /**
2785
     * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
2786
     *
2787
     * INFO: Take a look at "UTF8::htmlentities()"
2788
     *
2789
     * @see http://php.net/manual/en/function.htmlspecialchars.php
2790
     *
2791
     * @param string $str           <p>
2792
     *                              The string being converted.
2793
     *                              </p>
2794
     * @param int    $flags         [optional] <p>
2795
     *                              A bitmask of one or more of the following flags, which specify how to handle
2796
     *                              quotes, invalid code unit sequences and the used document type. The default is
2797
     *                              ENT_COMPAT | ENT_HTML401.
2798
     *                              <table>
2799
     *                              Available <i>flags</i> constants
2800
     *                              <tr valign="top">
2801
     *                              <td>Constant Name</td>
2802
     *                              <td>Description</td>
2803
     *                              </tr>
2804
     *                              <tr valign="top">
2805
     *                              <td><b>ENT_COMPAT</b></td>
2806
     *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2807
     *                              </tr>
2808
     *                              <tr valign="top">
2809
     *                              <td><b>ENT_QUOTES</b></td>
2810
     *                              <td>Will convert both double and single quotes.</td>
2811
     *                              </tr>
2812
     *                              <tr valign="top">
2813
     *                              <td><b>ENT_NOQUOTES</b></td>
2814
     *                              <td>Will leave both double and single quotes unconverted.</td>
2815
     *                              </tr>
2816
     *                              <tr valign="top">
2817
     *                              <td><b>ENT_IGNORE</b></td>
2818
     *                              <td>
2819
     *                              Silently discard invalid code unit sequences instead of returning
2820
     *                              an empty string. Using this flag is discouraged as it
2821
     *                              may have security implications.
2822
     *                              </td>
2823
     *                              </tr>
2824
     *                              <tr valign="top">
2825
     *                              <td><b>ENT_SUBSTITUTE</b></td>
2826
     *                              <td>
2827
     *                              Replace invalid code unit sequences with a Unicode Replacement Character
2828
     *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty
2829
     *                              string.
2830
     *                              </td>
2831
     *                              </tr>
2832
     *                              <tr valign="top">
2833
     *                              <td><b>ENT_DISALLOWED</b></td>
2834
     *                              <td>
2835
     *                              Replace invalid code points for the given document type with a
2836
     *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2837
     *                              (otherwise) instead of leaving them as is. This may be useful, for
2838
     *                              instance, to ensure the well-formedness of XML documents with
2839
     *                              embedded external content.
2840
     *                              </td>
2841
     *                              </tr>
2842
     *                              <tr valign="top">
2843
     *                              <td><b>ENT_HTML401</b></td>
2844
     *                              <td>
2845
     *                              Handle code as HTML 4.01.
2846
     *                              </td>
2847
     *                              </tr>
2848
     *                              <tr valign="top">
2849
     *                              <td><b>ENT_XML1</b></td>
2850
     *                              <td>
2851
     *                              Handle code as XML 1.
2852
     *                              </td>
2853
     *                              </tr>
2854
     *                              <tr valign="top">
2855
     *                              <td><b>ENT_XHTML</b></td>
2856
     *                              <td>
2857
     *                              Handle code as XHTML.
2858
     *                              </td>
2859
     *                              </tr>
2860
     *                              <tr valign="top">
2861
     *                              <td><b>ENT_HTML5</b></td>
2862
     *                              <td>
2863
     *                              Handle code as HTML 5.
2864
     *                              </td>
2865
     *                              </tr>
2866
     *                              </table>
2867
     *                              </p>
2868
     * @param string $encoding      [optional] <p>
2869
     *                              Defines encoding used in conversion.
2870
     *                              </p>
2871
     *                              <p>
2872
     *                              For the purposes of this function, the encodings
2873
     *                              ISO-8859-1, ISO-8859-15,
2874
     *                              UTF-8, cp866,
2875
     *                              cp1251, cp1252, and
2876
     *                              KOI8-R are effectively equivalent, provided the
2877
     *                              <i>string</i> itself is valid for the encoding, as
2878
     *                              the characters affected by <b>htmlspecialchars</b> occupy
2879
     *                              the same positions in all of these encodings.
2880
     *                              </p>
2881
     * @param bool   $double_encode [optional] <p>
2882
     *                              When <i>double_encode</i> is turned off PHP will not
2883
     *                              encode existing html entities, the default is to convert everything.
2884
     *                              </p>
2885
     *
2886
     * @return string the converted string.
2887
     *                </p>
2888
     *                <p>
2889
     *                If the input <i>string</i> contains an invalid code unit
2890
     *                sequence within the given <i>encoding</i> an empty string
2891
     *                will be returned, unless either the <b>ENT_IGNORE</b> or
2892
     *                <b>ENT_SUBSTITUTE</b> flags are set
2893
     */
2894 8
    public static function htmlspecialchars(
2895
        string $str,
2896
        int $flags = \ENT_COMPAT,
2897
        string $encoding = 'UTF-8',
2898
        bool $double_encode = true
2899
    ): string {
2900 8
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2901 8
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2902
        }
2903
2904 8
        return \htmlspecialchars($str, $flags, $encoding, $double_encode);
2905
    }
2906
2907
    /**
2908
     * Checks whether iconv is available on the server.
2909
     *
2910
     * @return bool
2911
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
2912
     */
2913
    public static function iconv_loaded(): bool
2914
    {
2915
        return \extension_loaded('iconv');
2916
    }
2917
2918
    /**
2919
     * alias for "UTF8::decimal_to_chr()"
2920
     *
2921
     * @param mixed $int
2922
     *
2923
     * @return string
2924
     *
2925
     * @see UTF8::decimal_to_chr()
2926
     */
2927 4
    public static function int_to_chr($int): string
2928
    {
2929 4
        return self::decimal_to_chr($int);
2930
    }
2931
2932
    /**
2933
     * Converts Integer to hexadecimal U+xxxx code point representation.
2934
     *
2935
     * INFO: opposite to UTF8::hex_to_int()
2936
     *
2937
     * @param int    $int  <p>The integer to be converted to hexadecimal code point.</p>
2938
     * @param string $pfix [optional]
2939
     *
2940
     * @return string the code point, or empty string on failure
2941
     */
2942 6
    public static function int_to_hex(int $int, string $pfix = 'U+'): string
2943
    {
2944 6
        $hex = \dechex($int);
2945
2946 6
        $hex = (\strlen($hex) < 4 ? \substr('0000' . $hex, -4) : $hex);
2947
2948 6
        return $pfix . $hex . '';
2949
    }
2950
2951
    /**
2952
     * Checks whether intl-char is available on the server.
2953
     *
2954
     * @return bool
2955
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
2956
     */
2957
    public static function intlChar_loaded(): bool
2958
    {
2959
        return \class_exists('IntlChar');
2960
    }
2961
2962
    /**
2963
     * Checks whether intl is available on the server.
2964
     *
2965
     * @return bool
2966
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
2967
     */
2968 5
    public static function intl_loaded(): bool
2969
    {
2970 5
        return \extension_loaded('intl');
2971
    }
2972
2973
    /**
2974
     * alias for "UTF8::is_ascii()"
2975
     *
2976
     * @param string $str
2977
     *
2978
     * @return bool
2979
     *
2980
     * @see UTF8::is_ascii()
2981
     * @deprecated <p>use "UTF8::is_ascii()"</p>
2982
     */
2983 2
    public static function isAscii(string $str): bool
2984
    {
2985 2
        return self::is_ascii($str);
2986
    }
2987
2988
    /**
2989
     * alias for "UTF8::is_base64()"
2990
     *
2991
     * @param string $str
2992
     *
2993
     * @return bool
2994
     *
2995
     * @see UTF8::is_base64()
2996
     * @deprecated <p>use "UTF8::is_base64()"</p>
2997
     */
2998 2
    public static function isBase64($str): bool
2999
    {
3000 2
        return self::is_base64($str);
3001
    }
3002
3003
    /**
3004
     * alias for "UTF8::is_binary()"
3005
     *
3006
     * @param mixed $str
3007
     * @param bool  $strict
3008
     *
3009
     * @return bool
3010
     *
3011
     * @see UTF8::is_binary()
3012
     * @deprecated <p>use "UTF8::is_binary()"</p>
3013
     */
3014 4
    public static function isBinary($str, $strict = false): bool
3015
    {
3016 4
        return self::is_binary($str, $strict);
3017
    }
3018
3019
    /**
3020
     * alias for "UTF8::is_bom()"
3021
     *
3022
     * @param string $utf8_chr
3023
     *
3024
     * @return bool
3025
     *
3026
     * @see UTF8::is_bom()
3027
     * @deprecated <p>use "UTF8::is_bom()"</p>
3028
     */
3029 2
    public static function isBom(string $utf8_chr): bool
3030
    {
3031 2
        return self::is_bom($utf8_chr);
3032
    }
3033
3034
    /**
3035
     * alias for "UTF8::is_html()"
3036
     *
3037
     * @param string $str
3038
     *
3039
     * @return bool
3040
     *
3041
     * @see UTF8::is_html()
3042
     * @deprecated <p>use "UTF8::is_html()"</p>
3043
     */
3044 2
    public static function isHtml(string $str): bool
3045
    {
3046 2
        return self::is_html($str);
3047
    }
3048
3049
    /**
3050
     * alias for "UTF8::is_json()"
3051
     *
3052
     * @param string $str
3053
     *
3054
     * @return bool
3055
     *
3056
     * @see UTF8::is_json()
3057
     * @deprecated <p>use "UTF8::is_json()"</p>
3058
     */
3059
    public static function isJson(string $str): bool
3060
    {
3061
        return self::is_json($str);
3062
    }
3063
3064
    /**
3065
     * alias for "UTF8::is_utf16()"
3066
     *
3067
     * @param mixed $str
3068
     *
3069
     * @return false|int
3070
     *                   <strong>false</strong> if is't not UTF16,<br>
3071
     *                   <strong>1</strong> for UTF-16LE,<br>
3072
     *                   <strong>2</strong> for UTF-16BE
3073
     *
3074
     * @see UTF8::is_utf16()
3075
     * @deprecated <p>use "UTF8::is_utf16()"</p>
3076
     */
3077 2
    public static function isUtf16($str)
3078
    {
3079 2
        return self::is_utf16($str);
3080
    }
3081
3082
    /**
3083
     * alias for "UTF8::is_utf32()"
3084
     *
3085
     * @param mixed $str
3086
     *
3087
     * @return false|int
3088
     *                   <strong>false</strong> if is't not UTF16,
3089
     *                   <strong>1</strong> for UTF-32LE,
3090
     *                   <strong>2</strong> for UTF-32BE
3091
     *
3092
     * @see UTF8::is_utf32()
3093
     * @deprecated <p>use "UTF8::is_utf32()"</p>
3094
     */
3095 2
    public static function isUtf32($str)
3096
    {
3097 2
        return self::is_utf32($str);
3098
    }
3099
3100
    /**
3101
     * alias for "UTF8::is_utf8()"
3102
     *
3103
     * @param string $str
3104
     * @param bool   $strict
3105
     *
3106
     * @return bool
3107
     *
3108
     * @see UTF8::is_utf8()
3109
     * @deprecated <p>use "UTF8::is_utf8()"</p>
3110
     */
3111 17
    public static function isUtf8($str, $strict = false): bool
3112
    {
3113 17
        return self::is_utf8($str, $strict);
3114
    }
3115
3116
    /**
3117
     * Returns true if the string contains only alphabetic chars, false otherwise.
3118
     *
3119
     * @param string $str
3120
     *
3121
     * @return bool
3122
     *              Whether or not $str contains only alphabetic chars
3123
     */
3124 10
    public static function is_alpha(string $str): bool
3125
    {
3126 10
        if (self::$SUPPORT['mbstring'] === true) {
3127
            /** @noinspection PhpComposerExtensionStubsInspection */
3128 10
            return \mb_ereg_match('^[[:alpha:]]*$', $str);
3129
        }
3130
3131
        return self::str_matches_pattern($str, '^[[:alpha:]]*$');
3132
    }
3133
3134
    /**
3135
     * Returns true if the string contains only alphabetic and numeric chars, false otherwise.
3136
     *
3137
     * @param string $str
3138
     *
3139
     * @return bool
3140
     *              Whether or not $str contains only alphanumeric chars
3141
     */
3142 13
    public static function is_alphanumeric(string $str): bool
3143
    {
3144 13
        if (self::$SUPPORT['mbstring'] === true) {
3145
            /** @noinspection PhpComposerExtensionStubsInspection */
3146 13
            return \mb_ereg_match('^[[:alnum:]]*$', $str);
3147
        }
3148
3149
        return self::str_matches_pattern($str, '^[[:alnum:]]*$');
3150
    }
3151
3152
    /**
3153
     * Checks if a string is 7 bit ASCII.
3154
     *
3155
     * @param string $str <p>The string to check.</p>
3156
     *
3157
     * @return bool
3158
     *              <strong>true</strong> if it is ASCII<br>
3159
     *              <strong>false</strong> otherwise
3160
     */
3161 137
    public static function is_ascii(string $str): bool
3162
    {
3163 137
        if ($str === '') {
3164 10
            return true;
3165
        }
3166
3167 136
        return !\preg_match('/[^\x09\x10\x13\x0A\x0D\x20-\x7E]/', $str);
3168
    }
3169
3170
    /**
3171
     * Returns true if the string is base64 encoded, false otherwise.
3172
     *
3173
     * @param mixed|string $str                <p>The input string.</p>
3174
     * @param bool         $emptyStringIsValid [optional] <p>Is an empty string valid base64 or not?</p>
3175
     *
3176
     * @return bool whether or not $str is base64 encoded
3177
     */
3178 16
    public static function is_base64($str, $emptyStringIsValid = false): bool
3179
    {
3180 16
        if ($emptyStringIsValid === false && $str === '') {
3181 3
            return false;
3182
        }
3183
3184
        /**
3185
         * @psalm-suppress RedundantConditionGivenDocblockType
3186
         */
3187 15
        if (\is_string($str) === false) {
3188 2
            return false;
3189
        }
3190
3191 15
        $base64String = \base64_decode($str, true);
3192
3193 15
        return $base64String !== false && \base64_encode($base64String) === $str;
3194
    }
3195
3196
    /**
3197
     * Check if the input is binary... (is look like a hack).
3198
     *
3199
     * @param mixed $input
3200
     * @param bool  $strict
3201
     *
3202
     * @return bool
3203
     */
3204 39
    public static function is_binary($input, bool $strict = false): bool
3205
    {
3206 39
        $input = (string) $input;
3207 39
        if ($input === '') {
3208 10
            return false;
3209
        }
3210
3211 39
        if (\preg_match('~^[01]+$~', $input)) {
3212 13
            return true;
3213
        }
3214
3215 39
        $ext = self::get_file_type($input);
3216 39
        if ($ext['type'] === 'binary') {
3217 7
            return true;
3218
        }
3219
3220 36
        $testLength = \strlen($input);
3221 36
        $testNull = \substr_count($input, "\x0", 0, $testLength);
3222 36
        if (($testNull / $testLength) > 0.25) {
3223 12
            return true;
3224
        }
3225
3226 34
        if ($strict === true) {
3227 34
            if (self::$SUPPORT['finfo'] === false) {
3228
                throw new \RuntimeException('ext-fileinfo: is not installed');
3229
            }
3230
3231
            /** @noinspection PhpComposerExtensionStubsInspection */
3232 34
            $finfo_encoding = (new \finfo(\FILEINFO_MIME_ENCODING))->buffer($input);
3233 34
            if ($finfo_encoding && $finfo_encoding === 'binary') {
3234 15
                return true;
3235
            }
3236
        }
3237
3238 30
        return false;
3239
    }
3240
3241
    /**
3242
     * Check if the file is binary.
3243
     *
3244
     * @param string $file
3245
     *
3246
     * @return bool
3247
     */
3248 6
    public static function is_binary_file($file): bool
3249
    {
3250
        // init
3251 6
        $block = '';
3252
3253 6
        $fp = \fopen($file, 'rb');
3254 6
        if (\is_resource($fp)) {
3255 6
            $block = \fread($fp, 512);
3256 6
            \fclose($fp);
3257
        }
3258
3259 6
        if ($block === '') {
3260 2
            return false;
3261
        }
3262
3263 6
        return self::is_binary($block, true);
3264
    }
3265
3266
    /**
3267
     * Returns true if the string contains only whitespace chars, false otherwise.
3268
     *
3269
     * @param string $str
3270
     *
3271
     * @return bool
3272
     *              Whether or not $str contains only whitespace characters
3273
     */
3274 15
    public static function is_blank(string $str): bool
3275
    {
3276 15
        if (self::$SUPPORT['mbstring'] === true) {
3277
            /** @noinspection PhpComposerExtensionStubsInspection */
3278 15
            return \mb_ereg_match('^[[:space:]]*$', $str);
3279
        }
3280
3281
        return self::str_matches_pattern($str, '^[[:space:]]*$');
3282
    }
3283
3284
    /**
3285
     * Checks if the given string is equal to any "Byte Order Mark".
3286
     *
3287
     * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
3288
     *
3289
     * @param string $str <p>The input string.</p>
3290
     *
3291
     * @return bool
3292
     *              <strong>true</strong> if the $utf8_chr is Byte Order Mark, <strong>false</strong> otherwise
3293
     */
3294 2
    public static function is_bom($str): bool
3295
    {
3296
        /** @noinspection PhpUnusedLocalVariableInspection */
3297 2
        foreach (self::$BOM as $bomString => &$bomByteLength) {
3298 2
            if ($str === $bomString) {
3299 2
                return true;
3300
            }
3301
        }
3302
3303 2
        return false;
3304
    }
3305
3306
    /**
3307
     * Determine whether the string is considered to be empty.
3308
     *
3309
     * A variable is considered empty if it does not exist or if its value equals FALSE.
3310
     * empty() does not generate a warning if the variable does not exist.
3311
     *
3312
     * @param mixed $str
3313
     *
3314
     * @return bool whether or not $str is empty()
3315
     */
3316
    public static function is_empty($str): bool
3317
    {
3318
        return empty($str);
3319
    }
3320
3321
    /**
3322
     * Returns true if the string contains only hexadecimal chars, false otherwise.
3323
     *
3324
     * @param string $str
3325
     *
3326
     * @return bool
3327
     *              Whether or not $str contains only hexadecimal chars
3328
     */
3329 13
    public static function is_hexadecimal(string $str): bool
3330
    {
3331 13
        if (self::$SUPPORT['mbstring'] === true) {
3332
            /** @noinspection PhpComposerExtensionStubsInspection */
3333 13
            return \mb_ereg_match('^[[:xdigit:]]*$', $str);
3334
        }
3335
3336
        return self::str_matches_pattern($str, '^[[:xdigit:]]*$');
3337
    }
3338
3339
    /**
3340
     * Check if the string contains any html-tags <lall>.
3341
     *
3342
     * @param string $str <p>The input string.</p>
3343
     *
3344
     * @return bool
3345
     */
3346 3
    public static function is_html(string $str): bool
3347
    {
3348 3
        if ($str === '') {
3349 3
            return false;
3350
        }
3351
3352
        // init
3353 3
        $matches = [];
3354
3355 3
        \preg_match("/<\/?\w+(?:(?:\s+\w+(?:\s*=\s*(?:\".*?\"|'.*?'|[^'\">\s]+))?)*+\s*|\s*)\/?>/u", $str, $matches);
3356
3357 3
        return \count($matches) !== 0;
3358
    }
3359
3360
    /**
3361
     * Try to check if "$str" is an json-string.
3362
     *
3363
     * @param string $str                              <p>The input string.</p>
3364
     * @param bool   $onlyArrayOrObjectResultsAreValid [optional] <p>Only array and objects are valid json results.</p>
3365
     *
3366
     * @return bool
3367
     */
3368 42
    public static function is_json(string $str, $onlyArrayOrObjectResultsAreValid = true): bool
3369
    {
3370 42
        if ($str === '') {
3371 4
            return false;
3372
        }
3373
3374 40
        if (self::$SUPPORT['json'] === false) {
3375
            throw new \RuntimeException('ext-json: is not installed');
3376
        }
3377
3378 40
        $json = self::json_decode($str);
3379 40
        if ($json === null && \strtoupper($str) !== 'NULL') {
3380 18
            return false;
3381
        }
3382
3383
        if (
3384 24
            $onlyArrayOrObjectResultsAreValid === true
3385
            &&
3386 24
            \is_object($json) === false
3387
            &&
3388 24
            \is_array($json) === false
3389
        ) {
3390 5
            return false;
3391
        }
3392
3393
        /** @noinspection PhpComposerExtensionStubsInspection */
3394 19
        return \json_last_error() === \JSON_ERROR_NONE;
3395
    }
3396
3397
    /**
3398
     * @param string $str
3399
     *
3400
     * @return bool
3401
     */
3402 8
    public static function is_lowercase(string $str): bool
3403
    {
3404 8
        if (self::$SUPPORT['mbstring'] === true) {
3405
            /** @noinspection PhpComposerExtensionStubsInspection */
3406 8
            return \mb_ereg_match('^[[:lower:]]*$', $str);
3407
        }
3408
3409
        return self::str_matches_pattern($str, '^[[:lower:]]*$');
3410
    }
3411
3412
    /**
3413
     * Returns true if the string is serialized, false otherwise.
3414
     *
3415
     * @param string $str
3416
     *
3417
     * @return bool whether or not $str is serialized
3418
     */
3419 7
    public static function is_serialized(string $str): bool
3420
    {
3421 7
        if ($str === '') {
3422 1
            return false;
3423
        }
3424
3425
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
3426
        /** @noinspection UnserializeExploitsInspection */
3427 6
        return $str === 'b:0;'
3428
               ||
3429 6
               @\unserialize($str) !== false;
3430
    }
3431
3432
    /**
3433
     * Returns true if the string contains only lower case chars, false
3434
     * otherwise.
3435
     *
3436
     * @param string $str <p>The input string.</p>
3437
     *
3438
     * @return bool
3439
     *              Whether or not $str contains only lower case characters
3440
     */
3441 8
    public static function is_uppercase(string $str): bool
3442
    {
3443 8
        if (self::$SUPPORT['mbstring'] === true) {
3444
            /** @noinspection PhpComposerExtensionStubsInspection */
3445 8
            return \mb_ereg_match('^[[:upper:]]*$', $str);
3446
        }
3447
3448
        return self::str_matches_pattern($str, '^[[:upper:]]*$');
3449
    }
3450
3451
    /**
3452
     * Check if the string is UTF-16.
3453
     *
3454
     * @param mixed $str                   <p>The input string.</p>
3455
     * @param bool  $checkIfStringIsBinary
3456
     *
3457
     * @return false|int
3458
     *                   <strong>false</strong> if is't not UTF-16,<br>
3459
     *                   <strong>1</strong> for UTF-16LE,<br>
3460
     *                   <strong>2</strong> for UTF-16BE
3461
     */
3462 22
    public static function is_utf16($str, $checkIfStringIsBinary = true)
3463
    {
3464
        // init
3465 22
        $str = (string) $str;
3466 22
        $strChars = [];
3467
3468
        if (
3469 22
            $checkIfStringIsBinary === true
3470
            &&
3471 22
            self::is_binary($str, true) === false
3472
        ) {
3473 2
            return false;
3474
        }
3475
3476 22
        if (self::$SUPPORT['mbstring'] === false) {
3477 3
            \trigger_error('UTF8::is_utf16() without mbstring may did not work correctly', \E_USER_WARNING);
3478
        }
3479
3480 22
        $str = self::remove_bom($str);
3481
3482 22
        $maybeUTF16LE = 0;
3483 22
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
3484 22
        if ($test) {
3485 15
            $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
3486 15
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
3487 15
            if ($test3 === $test) {
3488 15
                if (\count($strChars) === 0) {
3489 15
                    $strChars = self::count_chars($str, true, false);
3490
                }
3491 15
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3492 15
                    if (\in_array($test3char, $strChars, true) === true) {
3493 15
                        ++$maybeUTF16LE;
3494
                    }
3495
                }
3496 15
                unset($test3charEmpty);
3497
            }
3498
        }
3499
3500 22
        $maybeUTF16BE = 0;
3501 22
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
3502 22
        if ($test) {
3503 15
            $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
3504 15
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
3505 15
            if ($test3 === $test) {
3506 15
                if (\count($strChars) === 0) {
3507 7
                    $strChars = self::count_chars($str, true, false);
3508
                }
3509 15
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3510 15
                    if (\in_array($test3char, $strChars, true) === true) {
3511 15
                        ++$maybeUTF16BE;
3512
                    }
3513
                }
3514 15
                unset($test3charEmpty);
3515
            }
3516
        }
3517
3518 22
        if ($maybeUTF16BE !== $maybeUTF16LE) {
3519 6
            if ($maybeUTF16LE > $maybeUTF16BE) {
3520 4
                return 1;
3521
            }
3522
3523 6
            return 2;
3524
        }
3525
3526 18
        return false;
3527
    }
3528
3529
    /**
3530
     * Check if the string is UTF-32.
3531
     *
3532
     * @param mixed $str                   <p>The input string.</p>
3533
     * @param bool  $checkIfStringIsBinary
3534
     *
3535
     * @return false|int
3536
     *                   <strong>false</strong> if is't not UTF-32,<br>
3537
     *                   <strong>1</strong> for UTF-32LE,<br>
3538
     *                   <strong>2</strong> for UTF-32BE
3539
     */
3540 18
    public static function is_utf32($str, $checkIfStringIsBinary = true)
3541
    {
3542
        // init
3543 18
        $str = (string) $str;
3544 18
        $strChars = [];
3545
3546
        if (
3547 18
            $checkIfStringIsBinary === true
3548
            &&
3549 18
            self::is_binary($str, true) === false
3550
        ) {
3551 2
            return false;
3552
        }
3553
3554 18
        if (self::$SUPPORT['mbstring'] === false) {
3555 3
            \trigger_error('UTF8::is_utf32() without mbstring may did not work correctly', \E_USER_WARNING);
3556
        }
3557
3558 18
        $str = self::remove_bom($str);
3559
3560 18
        $maybeUTF32LE = 0;
3561 18
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
3562 18
        if ($test) {
3563 11
            $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
3564 11
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
3565 11
            if ($test3 === $test) {
3566 11
                if (\count($strChars) === 0) {
3567 11
                    $strChars = self::count_chars($str, true, false);
3568
                }
3569 11
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3570 11
                    if (\in_array($test3char, $strChars, true) === true) {
3571 11
                        ++$maybeUTF32LE;
3572
                    }
3573
                }
3574 11
                unset($test3charEmpty);
3575
            }
3576
        }
3577
3578 18
        $maybeUTF32BE = 0;
3579 18
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
3580 18
        if ($test) {
3581 11
            $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
3582 11
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
3583 11
            if ($test3 === $test) {
3584 11
                if (\count($strChars) === 0) {
3585 7
                    $strChars = self::count_chars($str, true, false);
3586
                }
3587 11
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3588 11
                    if (\in_array($test3char, $strChars, true) === true) {
3589 11
                        ++$maybeUTF32BE;
3590
                    }
3591
                }
3592 11
                unset($test3charEmpty);
3593
            }
3594
        }
3595
3596 18
        if ($maybeUTF32BE !== $maybeUTF32LE) {
3597 2
            if ($maybeUTF32LE > $maybeUTF32BE) {
3598 2
                return 1;
3599
            }
3600
3601 2
            return 2;
3602
        }
3603
3604 18
        return false;
3605
    }
3606
3607
    /**
3608
     * Checks whether the passed string contains only byte sequences that appear valid UTF-8 characters.
3609
     *
3610
     * @see http://hsivonen.iki.fi/php-utf8/
3611
     *
3612
     * @param string|string[] $str    <p>The string to be checked.</p>
3613
     * @param bool            $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
3614
     *
3615
     * @return bool
3616
     */
3617 106
    public static function is_utf8($str, bool $strict = false): bool
3618
    {
3619 106
        if (\is_array($str) === true) {
3620 2
            foreach ($str as &$v) {
3621 2
                if (self::is_utf8($v, $strict) === false) {
3622 2
                    return false;
3623
                }
3624
            }
3625
3626
            return true;
3627
        }
3628
3629 106
        if ($str === '') {
3630 12
            return true;
3631
        }
3632
3633 102
        if ($strict === true) {
3634 2
            $isBinary = self::is_binary($str, true);
3635
3636 2
            if ($isBinary && self::is_utf16($str, false) !== false) {
3637 2
                return false;
3638
            }
3639
3640
            if ($isBinary && self::is_utf32($str, false) !== false) {
3641
                return false;
3642
            }
3643
        }
3644
3645 102
        if (self::pcre_utf8_support() !== true) {
3646
3647
            // If even just the first character can be matched, when the /u
3648
            // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
3649
            // invalid, nothing at all will match, even if the string contains
3650
            // some valid sequences
3651
            return \preg_match('/^.{1}/us', $str, $ar) === 1;
3652
        }
3653
3654 102
        $mState = 0; // cached expected number of octets after the current octet
3655
        // until the beginning of the next UTF8 character sequence
3656 102
        $mUcs4 = 0; // cached Unicode character
3657 102
        $mBytes = 1; // cached expected number of octets in the current sequence
3658
3659 102
        if (self::$ORD === null) {
3660
            self::$ORD = self::getData('ord');
3661
        }
3662
3663 102
        $len = \strlen((string) $str);
3664
        /** @noinspection ForeachInvariantsInspection */
3665 102
        for ($i = 0; $i < $len; ++$i) {
3666 102
            $in = self::$ORD[$str[$i]];
3667 102
            if ($mState === 0) {
3668
                // When mState is zero we expect either a US-ASCII character or a
3669
                // multi-octet sequence.
3670 102
                if ((0x80 & $in) === 0) {
3671
                    // US-ASCII, pass straight through.
3672 97
                    $mBytes = 1;
3673 83
                } elseif ((0xE0 & $in) === 0xC0) {
3674
                    // First octet of 2 octet sequence.
3675 73
                    $mUcs4 = $in;
3676 73
                    $mUcs4 = ($mUcs4 & 0x1F) << 6;
3677 73
                    $mState = 1;
3678 73
                    $mBytes = 2;
3679 58
                } elseif ((0xF0 & $in) === 0xE0) {
3680
                    // First octet of 3 octet sequence.
3681 42
                    $mUcs4 = $in;
3682 42
                    $mUcs4 = ($mUcs4 & 0x0F) << 12;
3683 42
                    $mState = 2;
3684 42
                    $mBytes = 3;
3685 29
                } elseif ((0xF8 & $in) === 0xF0) {
3686
                    // First octet of 4 octet sequence.
3687 18
                    $mUcs4 = $in;
3688 18
                    $mUcs4 = ($mUcs4 & 0x07) << 18;
3689 18
                    $mState = 3;
3690 18
                    $mBytes = 4;
3691 13
                } elseif ((0xFC & $in) === 0xF8) {
3692
                    /* First octet of 5 octet sequence.
3693
                     *
3694
                     * This is illegal because the encoded codepoint must be either
3695
                     * (a) not the shortest form or
3696
                     * (b) outside the Unicode range of 0-0x10FFFF.
3697
                     * Rather than trying to resynchronize, we will carry on until the end
3698
                     * of the sequence and let the later error handling code catch it.
3699
                     */
3700 5
                    $mUcs4 = $in;
3701 5
                    $mUcs4 = ($mUcs4 & 0x03) << 24;
3702 5
                    $mState = 4;
3703 5
                    $mBytes = 5;
3704 10
                } elseif ((0xFE & $in) === 0xFC) {
3705
                    // First octet of 6 octet sequence, see comments for 5 octet sequence.
3706 5
                    $mUcs4 = $in;
3707 5
                    $mUcs4 = ($mUcs4 & 1) << 30;
3708 5
                    $mState = 5;
3709 5
                    $mBytes = 6;
3710
                } else {
3711
                    // Current octet is neither in the US-ASCII range nor a legal first
3712
                    // octet of a multi-octet sequence.
3713 102
                    return false;
3714
                }
3715 83
            } elseif ((0xC0 & $in) === 0x80) {
3716
3717
                // When mState is non-zero, we expect a continuation of the multi-octet
3718
                // sequence
3719
3720
                // Legal continuation.
3721 75
                $shift = ($mState - 1) * 6;
3722 75
                $tmp = $in;
3723 75
                $tmp = ($tmp & 0x0000003F) << $shift;
3724 75
                $mUcs4 |= $tmp;
3725
                // Prefix: End of the multi-octet sequence. mUcs4 now contains the final
3726
                // Unicode code point to be output.
3727 75
                if (--$mState === 0) {
3728
                    // Check for illegal sequences and code points.
3729
                    //
3730
                    // From Unicode 3.1, non-shortest form is illegal
3731
                    if (
3732 75
                        ($mBytes === 2 && $mUcs4 < 0x0080)
3733
                        ||
3734 75
                        ($mBytes === 3 && $mUcs4 < 0x0800)
3735
                        ||
3736 75
                        ($mBytes === 4 && $mUcs4 < 0x10000)
3737
                        ||
3738 75
                        ($mBytes > 4)
3739
                        ||
3740
                        // From Unicode 3.2, surrogate characters are illegal.
3741 75
                        (($mUcs4 & 0xFFFFF800) === 0xD800)
3742
                        ||
3743
                        // Code points outside the Unicode range are illegal.
3744 75
                        ($mUcs4 > 0x10FFFF)
3745
                    ) {
3746 8
                        return false;
3747
                    }
3748
                    // initialize UTF8 cache
3749 75
                    $mState = 0;
3750 75
                    $mUcs4 = 0;
3751 75
                    $mBytes = 1;
3752
                }
3753
            } else {
3754
                // ((0xC0 & (*in) != 0x80) && (mState != 0))
3755
                // Incomplete multi-octet sequence.
3756 35
                return false;
3757
            }
3758
        }
3759
3760 67
        return true;
3761
    }
3762
3763
    /**
3764
     * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3765
     * Decodes a JSON string
3766
     *
3767
     * @see http://php.net/manual/en/function.json-decode.php
3768
     *
3769
     * @param string $json    <p>
3770
     *                        The <i>json</i> string being decoded.
3771
     *                        </p>
3772
     *                        <p>
3773
     *                        This function only works with UTF-8 encoded strings.
3774
     *                        </p>
3775
     *                        <p>PHP implements a superset of
3776
     *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3777
     *                        only supports these values when they are nested inside an array or an object.
3778
     *                        </p>
3779
     * @param bool   $assoc   [optional] <p>
3780
     *                        When <b>TRUE</b>, returned objects will be converted into
3781
     *                        associative arrays.
3782
     *                        </p>
3783
     * @param int    $depth   [optional] <p>
3784
     *                        User specified recursion depth.
3785
     *                        </p>
3786
     * @param int    $options [optional] <p>
3787
     *                        Bitmask of JSON decode options. Currently only
3788
     *                        <b>JSON_BIGINT_AS_STRING</b>
3789
     *                        is supported (default is to cast large integers as floats)
3790
     *                        </p>
3791
     *
3792
     * @return mixed
3793
     *               The value encoded in <i>json</i> in appropriate PHP type. Values true, false and
3794
     *               null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b> and <b>NULL</b> respectively.
3795
     *               <b>NULL</b> is returned if the <i>json</i> cannot be decoded or if the encoded data
3796
     *               is deeper than the recursion limit.
3797
     */
3798 43
    public static function json_decode(
3799
        string $json,
3800
        bool $assoc = false,
3801
        int $depth = 512,
3802
        int $options = 0
3803
    ) {
3804 43
        $json = self::filter($json);
3805
3806 43
        if (self::$SUPPORT['json'] === false) {
3807
            throw new \RuntimeException('ext-json: is not installed');
3808
        }
3809
3810
        /** @noinspection PhpComposerExtensionStubsInspection */
3811 43
        return \json_decode($json, $assoc, $depth, $options);
3812
    }
3813
3814
    /**
3815
     * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3816
     * Returns the JSON representation of a value.
3817
     *
3818
     * @see http://php.net/manual/en/function.json-encode.php
3819
     *
3820
     * @param mixed $value   <p>
3821
     *                       The <i>value</i> being encoded. Can be any type except
3822
     *                       a resource.
3823
     *                       </p>
3824
     *                       <p>
3825
     *                       All string data must be UTF-8 encoded.
3826
     *                       </p>
3827
     *                       <p>PHP implements a superset of
3828
     *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3829
     *                       only supports these values when they are nested inside an array or an object.
3830
     *                       </p>
3831
     * @param int   $options [optional] <p>
3832
     *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
3833
     *                       <b>JSON_HEX_TAG</b>,
3834
     *                       <b>JSON_HEX_AMP</b>,
3835
     *                       <b>JSON_HEX_APOS</b>,
3836
     *                       <b>JSON_NUMERIC_CHECK</b>,
3837
     *                       <b>JSON_PRETTY_PRINT</b>,
3838
     *                       <b>JSON_UNESCAPED_SLASHES</b>,
3839
     *                       <b>JSON_FORCE_OBJECT</b>,
3840
     *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
3841
     *                       constants is described on
3842
     *                       the JSON constants page.
3843
     *                       </p>
3844
     * @param int   $depth   [optional] <p>
3845
     *                       Set the maximum depth. Must be greater than zero.
3846
     *                       </p>
3847
     *
3848
     * @return false|string
3849
     *                      A JSON encoded <strong>string</strong> on success or<br>
3850
     *                      <strong>FALSE</strong> on failure
3851
     */
3852 5
    public static function json_encode($value, int $options = 0, int $depth = 512)
3853
    {
3854 5
        $value = self::filter($value);
3855
3856 5
        if (self::$SUPPORT['json'] === false) {
3857
            throw new \RuntimeException('ext-json: is not installed');
3858
        }
3859
3860
        /** @noinspection PhpComposerExtensionStubsInspection */
3861 5
        return \json_encode($value, $options, $depth);
3862
    }
3863
3864
    /**
3865
     * Checks whether JSON is available on the server.
3866
     *
3867
     * @return bool
3868
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
3869
     */
3870
    public static function json_loaded(): bool
3871
    {
3872
        return \function_exists('json_decode');
3873
    }
3874
3875
    /**
3876
     * Makes string's first char lowercase.
3877
     *
3878
     * @param string      $str                   <p>The input string</p>
3879
     * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
3880
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
3881
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
3882
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
3883
     *
3884
     * @return string the resulting string
3885
     */
3886 46
    public static function lcfirst(
3887
        string $str,
3888
        string $encoding = 'UTF-8',
3889
        bool $cleanUtf8 = false,
3890
        string $lang = null,
3891
        bool $tryToKeepStringLength = false
3892
    ): string {
3893 46
        if ($cleanUtf8 === true) {
3894
            $str = self::clean($str);
3895
        }
3896
3897 46
        $useMbFunction = $lang === null && $tryToKeepStringLength === false;
3898
3899 46
        if ($encoding === 'UTF-8') {
3900 43
            $strPartTwo = (string) \mb_substr($str, 1);
3901
3902 43
            if ($useMbFunction === true) {
3903 43
                $strPartOne = \mb_strtolower(
3904 43
                    (string) \mb_substr($str, 0, 1)
3905
                );
3906
            } else {
3907
                $strPartOne = self::strtolower(
3908
                    (string) \mb_substr($str, 0, 1),
3909
                    $encoding,
3910
                    false,
3911
                    $lang,
3912 43
                    $tryToKeepStringLength
3913
                );
3914
            }
3915
        } else {
3916 3
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
3917
3918 3
            $strPartTwo = (string) self::substr($str, 1, null, $encoding);
3919
3920 3
            $strPartOne = self::strtolower(
3921 3
                (string) self::substr($str, 0, 1, $encoding),
3922 3
                $encoding,
3923 3
                false,
3924 3
                $lang,
3925 3
                $tryToKeepStringLength
3926
            );
3927
        }
3928
3929 46
        return $strPartOne . $strPartTwo;
3930
    }
3931
3932
    /**
3933
     * alias for "UTF8::lcfirst()"
3934
     *
3935
     * @param string      $str
3936
     * @param string      $encoding
3937
     * @param bool        $cleanUtf8
3938
     * @param string|null $lang
3939
     * @param bool        $tryToKeepStringLength
3940
     *
3941
     * @return string
3942
     *
3943
     * @see UTF8::lcfirst()
3944
     */
3945 2
    public static function lcword(
3946
        string $str,
3947
        string $encoding = 'UTF-8',
3948
        bool $cleanUtf8 = false,
3949
        string $lang = null,
3950
        bool $tryToKeepStringLength = false
3951
    ): string {
3952 2
        return self::lcfirst($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
3953
    }
3954
3955
    /**
3956
     * Lowercase for all words in the string.
3957
     *
3958
     * @param string      $str                   <p>The input string.</p>
3959
     * @param string[]    $exceptions            [optional] <p>Exclusion for some words.</p>
3960
     * @param string      $charlist              [optional] <p>Additional chars that contains to words and do not start
3961
     *                                           a new word.</p>
3962
     * @param string      $encoding              [optional] <p>Set the charset.</p>
3963
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
3964
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
3965
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
3966
     *
3967
     * @return string
3968
     */
3969 2
    public static function lcwords(
3970
        string $str,
3971
        array $exceptions = [],
3972
        string $charlist = '',
3973
        string $encoding = 'UTF-8',
3974
        bool $cleanUtf8 = false,
3975
        string $lang = null,
3976
        bool $tryToKeepStringLength = false
3977
    ): string {
3978 2
        if (!$str) {
3979 2
            return '';
3980
        }
3981
3982 2
        $words = self::str_to_words($str, $charlist);
3983 2
        $useExceptions = \count($exceptions) > 0;
3984
3985 2
        foreach ($words as &$word) {
3986 2
            if (!$word) {
3987 2
                continue;
3988
            }
3989
3990
            if (
3991 2
                $useExceptions === false
3992
                ||
3993 2
                !\in_array($word, $exceptions, true)
3994
            ) {
3995 2
                $word = self::lcfirst($word, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
3996
            }
3997
        }
3998
3999 2
        return \implode('', $words);
4000
    }
4001
4002
    /**
4003
     * alias for "UTF8::lcfirst()"
4004
     *
4005
     * @param string      $str
4006
     * @param string      $encoding
4007
     * @param bool        $cleanUtf8
4008
     * @param string|null $lang
4009
     * @param bool        $tryToKeepStringLength
4010
     *
4011
     * @return string
4012
     *
4013
     * @see UTF8::lcfirst()
4014
     */
4015 5
    public static function lowerCaseFirst(
4016
        string $str,
4017
        string $encoding = 'UTF-8',
4018
        bool $cleanUtf8 = false,
4019
        string $lang = null,
4020
        bool $tryToKeepStringLength = false
4021
    ): string {
4022 5
        return self::lcfirst($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
4023
    }
4024
4025
    /**
4026
     * Strip whitespace or other characters from beginning of a UTF-8 string.
4027
     *
4028
     * @param string      $str   <p>The string to be trimmed</p>
4029
     * @param string|null $chars <p>Optional characters to be stripped</p>
4030
     *
4031
     * @return string the string with unwanted characters stripped from the left
4032
     */
4033 22
    public static function ltrim(string $str = '', string $chars = null): string
4034
    {
4035 22
        if ($str === '') {
4036 3
            return '';
4037
        }
4038
4039 21
        if ($chars) {
4040 10
            $chars = \preg_quote($chars, '/');
4041 10
            $pattern = "^[${chars}]+";
4042
        } else {
4043
            $pattern = "^[\s]+";
4044
        }
4045
4046
        if (self::$SUPPORT['mbstring'] === true) {
4047
            /** @noinspection PhpComposerExtensionStubsInspection */
4048
            return (string) \mb_ereg_replace($pattern, '', $str);
4049
        }
4050
4051
        return self::regex_replace($str, $pattern, '', '', '/');
4052
    }
4053
4054
    /**
4055
     * Returns the UTF-8 character with the maximum code point in the given data.
4056
     *
4057
     * @param array<string>|string $arg <p>A UTF-8 encoded string or an array of such strings.</p>
4058
     *
4059
     * @return string|null the character with the highest code point than others, returns null on failure or empty input
4060
     */
4061
    public static function max($arg)
4062
    {
4063 2
        if (\is_array($arg) === true) {
4064 2
            $arg = \implode('', $arg);
4065
        }
4066
4067 2
        $codepoints = self::codepoints($arg, false);
4068 2
        if (\count($codepoints) === 0) {
4069 2
            return null;
4070
        }
4071
4072 2
        $codepoint_max = \max($codepoints);
4073
4074 2
        return self::chr($codepoint_max);
4075
    }
4076
4077
    /**
4078
     * Calculates and returns the maximum number of bytes taken by any
4079
     * UTF-8 encoded character in the given string.
4080
     *
4081
     * @param string $str <p>The original Unicode string.</p>
4082
     *
4083
     * @return int max byte lengths of the given chars
4084
     */
4085
    public static function max_chr_width(string $str): int
4086
    {
4087 2
        $bytes = self::chr_size_list($str);
4088 2
        if (\count($bytes) > 0) {
4089 2
            return (int) \max($bytes);
4090
        }
4091
4092 2
        return 0;
4093
    }
4094
4095
    /**
4096
     * Checks whether mbstring is available on the server.
4097
     *
4098
     * @return bool
4099
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
4100
     */
4101
    public static function mbstring_loaded(): bool
4102
    {
4103 27
        return \extension_loaded('mbstring');
4104
    }
4105
4106
    /**
4107
     * Returns the UTF-8 character with the minimum code point in the given data.
4108
     *
4109
     * @param mixed $arg <strong>A UTF-8 encoded string or an array of such strings.</strong>
4110
     *
4111
     * @return string|null the character with the lowest code point than others, returns null on failure or empty input
4112
     */
4113
    public static function min($arg)
4114
    {
4115 2
        if (\is_array($arg) === true) {
4116 2
            $arg = \implode('', $arg);
4117
        }
4118
4119 2
        $codepoints = self::codepoints($arg, false);
4120 2
        if (\count($codepoints) === 0) {
4121 2
            return null;
4122
        }
4123
4124 2
        $codepoint_min = \min($codepoints);
4125
4126 2
        return self::chr($codepoint_min);
4127
    }
4128
4129
    /**
4130
     * alias for "UTF8::normalize_encoding()"
4131
     *
4132
     * @param mixed $encoding
4133
     * @param mixed $fallback
4134
     *
4135
     * @return mixed
4136
     *
4137
     * @see UTF8::normalize_encoding()
4138
     * @deprecated <p>use "UTF8::normalize_encoding()"</p>
4139
     */
4140
    public static function normalizeEncoding($encoding, $fallback = '')
4141
    {
4142 2
        return self::normalize_encoding($encoding, $fallback);
4143
    }
4144
4145
    /**
4146
     * Normalize the encoding-"name" input.
4147
     *
4148
     * @param mixed $encoding <p>e.g.: ISO, UTF8, WINDOWS-1251 etc.</p>
4149
     * @param mixed $fallback <p>e.g.: UTF-8</p>
4150
     *
4151
     * @return mixed e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.<br>Will return a empty string as fallback (by default)
4152
     */
4153
    public static function normalize_encoding($encoding, $fallback = '')
4154
    {
4155 323
        static $STATIC_NORMALIZE_ENCODING_CACHE = [];
4156
4157
        // init
4158 323
        $encoding = (string) $encoding;
4159
4160 323
        if (!$encoding) {
4161 278
            return $fallback;
4162
        }
4163
4164
        if (
4165 50
            $encoding === 'UTF-8'
4166
            ||
4167 50
            $encoding === 'UTF8'
4168
        ) {
4169 24
            return 'UTF-8';
4170
        }
4171
4172
        if (
4173 43
            $encoding === '8BIT'
4174
            ||
4175 43
            $encoding === 'BINARY'
4176
        ) {
4177
            return 'CP850';
4178
        }
4179
4180
        if (
4181 43
            $encoding === 'HTML'
4182
            ||
4183 43
            $encoding === 'HTML-ENTITIES'
4184
        ) {
4185 2
            return 'HTML-ENTITIES';
4186
        }
4187
4188
        if (
4189 43
            $encoding === '1' // only a fallback, for non "strict_types" usage ...
4190
            ||
4191 43
            $encoding === '0' // only a fallback, for non "strict_types" usage ...
4192
        ) {
4193 1
            return $fallback;
4194
        }
4195
4196 42
        if (isset($STATIC_NORMALIZE_ENCODING_CACHE[$encoding])) {
4197 40
            return $STATIC_NORMALIZE_ENCODING_CACHE[$encoding];
4198
        }
4199
4200 6
        if (self::$ENCODINGS === null) {
4201 1
            self::$ENCODINGS = self::getData('encodings');
4202
        }
4203
4204 6
        if (\in_array($encoding, self::$ENCODINGS, true)) {
4205 4
            $STATIC_NORMALIZE_ENCODING_CACHE[$encoding] = $encoding;
4206
4207 4
            return $encoding;
4208
        }
4209
4210 5
        $encodingOrig = $encoding;
4211 5
        $encoding = \strtoupper($encoding);
4212 5
        $encodingUpperHelper = (string) \preg_replace('/[^a-zA-Z0-9\s]/u', '', $encoding);
4213
4214
        $equivalences = [
4215 5
            'ISO8859'     => 'ISO-8859-1',
4216
            'ISO88591'    => 'ISO-8859-1',
4217
            'ISO'         => 'ISO-8859-1',
4218
            'LATIN'       => 'ISO-8859-1',
4219
            'LATIN1'      => 'ISO-8859-1', // Western European
4220
            'ISO88592'    => 'ISO-8859-2',
4221
            'LATIN2'      => 'ISO-8859-2', // Central European
4222
            'ISO88593'    => 'ISO-8859-3',
4223
            'LATIN3'      => 'ISO-8859-3', // Southern European
4224
            'ISO88594'    => 'ISO-8859-4',
4225
            'LATIN4'      => 'ISO-8859-4', // Northern European
4226
            'ISO88595'    => 'ISO-8859-5',
4227
            'ISO88596'    => 'ISO-8859-6', // Greek
4228
            'ISO88597'    => 'ISO-8859-7',
4229
            'ISO88598'    => 'ISO-8859-8', // Hebrew
4230
            'ISO88599'    => 'ISO-8859-9',
4231
            'LATIN5'      => 'ISO-8859-9', // Turkish
4232
            'ISO885911'   => 'ISO-8859-11',
4233
            'TIS620'      => 'ISO-8859-11', // Thai
4234
            'ISO885910'   => 'ISO-8859-10',
4235
            'LATIN6'      => 'ISO-8859-10', // Nordic
4236
            'ISO885913'   => 'ISO-8859-13',
4237
            'LATIN7'      => 'ISO-8859-13', // Baltic
4238
            'ISO885914'   => 'ISO-8859-14',
4239
            'LATIN8'      => 'ISO-8859-14', // Celtic
4240
            'ISO885915'   => 'ISO-8859-15',
4241
            'LATIN9'      => 'ISO-8859-15', // Western European (with some extra chars e.g. €)
4242
            'ISO885916'   => 'ISO-8859-16',
4243
            'LATIN10'     => 'ISO-8859-16', // Southeast European
4244
            'CP1250'      => 'WINDOWS-1250',
4245
            'WIN1250'     => 'WINDOWS-1250',
4246
            'WINDOWS1250' => 'WINDOWS-1250',
4247
            'CP1251'      => 'WINDOWS-1251',
4248
            'WIN1251'     => 'WINDOWS-1251',
4249
            'WINDOWS1251' => 'WINDOWS-1251',
4250
            'CP1252'      => 'WINDOWS-1252',
4251
            'WIN1252'     => 'WINDOWS-1252',
4252
            'WINDOWS1252' => 'WINDOWS-1252',
4253
            'CP1253'      => 'WINDOWS-1253',
4254
            'WIN1253'     => 'WINDOWS-1253',
4255
            'WINDOWS1253' => 'WINDOWS-1253',
4256
            'CP1254'      => 'WINDOWS-1254',
4257
            'WIN1254'     => 'WINDOWS-1254',
4258
            'WINDOWS1254' => 'WINDOWS-1254',
4259
            'CP1255'      => 'WINDOWS-1255',
4260
            'WIN1255'     => 'WINDOWS-1255',
4261
            'WINDOWS1255' => 'WINDOWS-1255',
4262
            'CP1256'      => 'WINDOWS-1256',
4263
            'WIN1256'     => 'WINDOWS-1256',
4264
            'WINDOWS1256' => 'WINDOWS-1256',
4265
            'CP1257'      => 'WINDOWS-1257',
4266
            'WIN1257'     => 'WINDOWS-1257',
4267
            'WINDOWS1257' => 'WINDOWS-1257',
4268
            'CP1258'      => 'WINDOWS-1258',
4269
            'WIN1258'     => 'WINDOWS-1258',
4270
            'WINDOWS1258' => 'WINDOWS-1258',
4271
            'UTF16'       => 'UTF-16',
4272
            'UTF32'       => 'UTF-32',
4273
            'UTF8'        => 'UTF-8',
4274
            'UTF'         => 'UTF-8',
4275
            'UTF7'        => 'UTF-7',
4276
            '8BIT'        => 'CP850',
4277
            'BINARY'      => 'CP850',
4278
        ];
4279
4280 5
        if (!empty($equivalences[$encodingUpperHelper])) {
4281 4
            $encoding = $equivalences[$encodingUpperHelper];
4282
        }
4283
4284 5
        $STATIC_NORMALIZE_ENCODING_CACHE[$encodingOrig] = $encoding;
4285
4286 5
        return $encoding;
4287
    }
4288
4289
    /**
4290
     * Standardize line ending to unix-like.
4291
     *
4292
     * @param string $str
4293
     *
4294
     * @return string
4295
     */
4296
    public static function normalize_line_ending(string $str): string
4297
    {
4298 5
        return \str_replace(["\r\n", "\r"], "\n", $str);
4299
    }
4300
4301
    /**
4302
     * Normalize some MS Word special characters.
4303
     *
4304
     * @param string $str <p>The string to be normalized.</p>
4305
     *
4306
     * @return string
4307
     */
4308
    public static function normalize_msword(string $str): string
4309
    {
4310 38
        if ($str === '') {
4311 2
            return '';
4312
        }
4313
4314
        $keys = [
4315 38
            "\xc2\xab", // « (U+00AB) in UTF-8
4316
            "\xc2\xbb", // » (U+00BB) in UTF-8
4317
            "\xe2\x80\x98", // ‘ (U+2018) in UTF-8
4318
            "\xe2\x80\x99", // ’ (U+2019) in UTF-8
4319
            "\xe2\x80\x9a", // ‚ (U+201A) in UTF-8
4320
            "\xe2\x80\x9b", // ‛ (U+201B) in UTF-8
4321
            "\xe2\x80\x9c", // “ (U+201C) in UTF-8
4322
            "\xe2\x80\x9d", // ” (U+201D) in UTF-8
4323
            "\xe2\x80\x9e", // „ (U+201E) in UTF-8
4324
            "\xe2\x80\x9f", // ‟ (U+201F) in UTF-8
4325
            "\xe2\x80\xb9", // ‹ (U+2039) in UTF-8
4326
            "\xe2\x80\xba", // › (U+203A) in UTF-8
4327
            "\xe2\x80\x93", // – (U+2013) in UTF-8
4328
            "\xe2\x80\x94", // — (U+2014) in UTF-8
4329
            "\xe2\x80\xa6", // … (U+2026) in UTF-8
4330
        ];
4331
4332
        $values = [
4333 38
            '"', // « (U+00AB) in UTF-8
4334
            '"', // » (U+00BB) in UTF-8
4335
            "'", // ‘ (U+2018) in UTF-8
4336
            "'", // ’ (U+2019) in UTF-8
4337
            "'", // ‚ (U+201A) in UTF-8
4338
            "'", // ‛ (U+201B) in UTF-8
4339
            '"', // “ (U+201C) in UTF-8
4340
            '"', // ” (U+201D) in UTF-8
4341
            '"', // „ (U+201E) in UTF-8
4342
            '"', // ‟ (U+201F) in UTF-8
4343
            "'", // ‹ (U+2039) in UTF-8
4344
            "'", // › (U+203A) in UTF-8
4345
            '-', // – (U+2013) in UTF-8
4346
            '-', // — (U+2014) in UTF-8
4347
            '...', // … (U+2026) in UTF-8
4348
        ];
4349
4350 38
        return \str_replace($keys, $values, $str);
4351
    }
4352
4353
    /**
4354
     * Normalize the whitespace.
4355
     *
4356
     * @param string $str                     <p>The string to be normalized.</p>
4357
     * @param bool   $keepNonBreakingSpace    [optional] <p>Set to true, to keep non-breaking-spaces.</p>
4358
     * @param bool   $keepBidiUnicodeControls [optional] <p>Set to true, to keep non-printable (for the web)
4359
     *                                        bidirectional text chars.</p>
4360
     *
4361
     * @return string
4362
     */
4363
    public static function normalize_whitespace(string $str, bool $keepNonBreakingSpace = false, bool $keepBidiUnicodeControls = false): string
4364
    {
4365 86
        if ($str === '') {
4366 9
            return '';
4367
        }
4368
4369 86
        static $WHITESPACE_CACHE = [];
4370 86
        $cacheKey = (int) $keepNonBreakingSpace;
4371
4372 86
        if (!isset($WHITESPACE_CACHE[$cacheKey])) {
4373 2
            $WHITESPACE_CACHE[$cacheKey] = self::$WHITESPACE_TABLE;
4374
4375 2
            if ($keepNonBreakingSpace === true) {
4376 1
                unset($WHITESPACE_CACHE[$cacheKey]['NO-BREAK SPACE']);
4377
            }
4378
4379 2
            $WHITESPACE_CACHE[$cacheKey] = \array_values($WHITESPACE_CACHE[$cacheKey]);
4380
        }
4381
4382 86
        if ($keepBidiUnicodeControls === false) {
4383 86
            static $BIDI_UNICODE_CONTROLS_CACHE = null;
4384
4385 86
            if ($BIDI_UNICODE_CONTROLS_CACHE === null) {
4386 1
                $BIDI_UNICODE_CONTROLS_CACHE = \array_values(self::$BIDI_UNI_CODE_CONTROLS_TABLE);
4387
            }
4388
4389 86
            $str = \str_replace($BIDI_UNICODE_CONTROLS_CACHE, '', $str);
4390
        }
4391
4392 86
        return \str_replace($WHITESPACE_CACHE[$cacheKey], ' ', $str);
4393
    }
4394
4395
    /**
4396
     * Calculates Unicode code point of the given UTF-8 encoded character.
4397
     *
4398
     * INFO: opposite to UTF8::chr()
4399
     *
4400
     * @param string $chr      <p>The character of which to calculate code point.<p/>
4401
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
4402
     *
4403
     * @return int
4404
     *             Unicode code point of the given character,<br>
4405
     *             0 on invalid UTF-8 byte sequence
4406
     */
4407
    public static function ord($chr, string $encoding = 'UTF-8'): int
4408
    {
4409 30
        static $CHAR_CACHE = [];
4410
4411
        // init
4412 30
        $chr = (string) $chr;
4413
4414 30
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
4415 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4416
        }
4417
4418 30
        $cacheKey = $chr . $encoding;
4419 30
        if (isset($CHAR_CACHE[$cacheKey]) === true) {
4420 30
            return $CHAR_CACHE[$cacheKey];
4421
        }
4422
4423
        // check again, if it's still not UTF-8
4424 12
        if ($encoding !== 'UTF-8') {
4425 3
            $chr = self::encode($encoding, $chr);
4426
        }
4427
4428 12
        if (self::$ORD === null) {
4429
            self::$ORD = self::getData('ord');
4430
        }
4431
4432 12
        if (isset(self::$ORD[$chr])) {
4433 12
            return $CHAR_CACHE[$cacheKey] = self::$ORD[$chr];
4434
        }
4435
4436
        //
4437
        // fallback via "IntlChar"
4438
        //
4439
4440 6
        if (self::$SUPPORT['intlChar'] === true) {
4441
            /** @noinspection PhpComposerExtensionStubsInspection */
4442 5
            $code = \IntlChar::ord($chr);
4443 5
            if ($code) {
4444 5
                return $CHAR_CACHE[$cacheKey] = $code;
4445
            }
4446
        }
4447
4448
        //
4449
        // fallback via vanilla php
4450
        //
4451
4452
        /** @noinspection CallableParameterUseCaseInTypeContextInspection */
4453 1
        $chr = \unpack('C*', (string) \substr($chr, 0, 4));
4454 1
        $code = $chr ? $chr[1] : 0;
4455
4456 1
        if ($code >= 0xF0 && isset($chr[4])) {
4457
            /** @noinspection UnnecessaryCastingInspection */
4458
            return $CHAR_CACHE[$cacheKey] = (int) ((($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80);
4459
        }
4460
4461 1
        if ($code >= 0xE0 && isset($chr[3])) {
4462
            /** @noinspection UnnecessaryCastingInspection */
4463 1
            return $CHAR_CACHE[$cacheKey] = (int) ((($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80);
4464
        }
4465
4466 1
        if ($code >= 0xC0 && isset($chr[2])) {
4467
            /** @noinspection UnnecessaryCastingInspection */
4468 1
            return $CHAR_CACHE[$cacheKey] = (int) ((($code - 0xC0) << 6) + $chr[2] - 0x80);
4469
        }
4470
4471
        return $CHAR_CACHE[$cacheKey] = $code;
4472
    }
4473
4474
    /**
4475
     * Parses the string into an array (into the the second parameter).
4476
     *
4477
     * WARNING: Instead of "parse_str()" this method do not (re-)placing variables in the current scope,
4478
     *          if the second parameter is not set!
4479
     *
4480
     * @see http://php.net/manual/en/function.parse-str.php
4481
     *
4482
     * @param string $str       <p>The input string.</p>
4483
     * @param array  $result    <p>The result will be returned into this reference parameter.</p>
4484
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4485
     *
4486
     * @return bool
4487
     *              Will return <strong>false</strong> if php can't parse the string and we haven't any $result
4488
     */
4489
    public static function parse_str(string $str, &$result, bool $cleanUtf8 = false): bool
4490
    {
4491 2
        if ($cleanUtf8 === true) {
4492 2
            $str = self::clean($str);
4493
        }
4494
4495 2
        if (self::$SUPPORT['mbstring'] === true) {
4496 2
            $return = \mb_parse_str($str, $result);
4497
4498 2
            return $return !== false && $result !== [];
4499
        }
4500
4501
        /** @noinspection PhpVoidFunctionResultUsedInspection */
4502
        \parse_str($str, $result);
4503
4504
        return $result !== [];
4505
    }
4506
4507
    /**
4508
     * Checks if \u modifier is available that enables Unicode support in PCRE.
4509
     *
4510
     * @return bool
4511
     *              <strong>true</strong> if support is available,<br>
4512
     *              <strong>false</strong> otherwise
4513
     */
4514
    public static function pcre_utf8_support(): bool
4515
    {
4516
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
4517 102
        return (bool) @\preg_match('//u', '');
4518
    }
4519
4520
    /**
4521
     * Create an array containing a range of UTF-8 characters.
4522
     *
4523
     * @param mixed $var1 <p>Numeric or hexadecimal code points, or a UTF-8 character to start from.</p>
4524
     * @param mixed $var2 <p>Numeric or hexadecimal code points, or a UTF-8 character to end at.</p>
4525
     *
4526
     * @return string[]
4527
     */
4528
    public static function range($var1, $var2): array
4529
    {
4530 2
        if (!$var1 || !$var2) {
4531 2
            return [];
4532
        }
4533
4534 2
        if (self::$SUPPORT['ctype'] === false) {
4535
            throw new \RuntimeException('ext-ctype: is not installed');
4536
        }
4537
4538
        /** @noinspection PhpComposerExtensionStubsInspection */
4539 2
        if (\ctype_digit((string) $var1)) {
4540 2
            $start = (int) $var1;
4541 2
        } /** @noinspection PhpComposerExtensionStubsInspection */ elseif (\ctype_xdigit($var1)) {
4542
            $start = (int) self::hex_to_int($var1);
4543
        } else {
4544 2
            $start = self::ord($var1);
4545
        }
4546
4547 2
        if (!$start) {
4548
            return [];
4549
        }
4550
4551
        /** @noinspection PhpComposerExtensionStubsInspection */
4552 2
        if (\ctype_digit((string) $var2)) {
4553 2
            $end = (int) $var2;
4554 2
        } /** @noinspection PhpComposerExtensionStubsInspection */ elseif (\ctype_xdigit($var2)) {
4555
            $end = (int) self::hex_to_int($var2);
4556
        } else {
4557 2
            $end = self::ord($var2);
4558
        }
4559
4560 2
        if (!$end) {
4561
            return [];
4562
        }
4563
4564 2
        return \array_map(
4565
            static function (int $i): string {
4566 2
                return (string) self::chr($i);
4567 2
            },
4568 2
            \range($start, $end)
4569
        );
4570
    }
4571
4572
    /**
4573
     * Multi decode html entity & fix urlencoded-win1252-chars.
4574
     *
4575
     * e.g:
4576
     * 'test+test'                     => 'test+test'
4577
     * 'D&#252;sseldorf'               => 'Düsseldorf'
4578
     * 'D%FCsseldorf'                  => 'Düsseldorf'
4579
     * 'D&#xFC;sseldorf'               => 'Düsseldorf'
4580
     * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
4581
     * 'Düsseldorf'                   => 'Düsseldorf'
4582
     * 'D%C3%BCsseldorf'               => 'Düsseldorf'
4583
     * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
4584
     * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
4585
     *
4586
     * @param string $str          <p>The input string.</p>
4587
     * @param bool   $multi_decode <p>Decode as often as possible.</p>
4588
     *
4589
     * @return string
4590
     */
4591
    public static function rawurldecode(string $str, bool $multi_decode = true): string
4592
    {
4593 6
        if ($str === '') {
4594 4
            return '';
4595
        }
4596
4597
        if (
4598 6
            \strpos($str, '&') === false
4599
            &&
4600 6
            \strpos($str, '%') === false
4601
            &&
4602 6
            \strpos($str, '+') === false
4603
            &&
4604 6
            \strpos($str, '\u') === false
4605
        ) {
4606 4
            return self::fix_simple_utf8($str);
4607
        }
4608
4609 6
        $str = self::urldecode_unicode_helper($str);
4610
4611
        do {
4612 6
            $str_compare = $str;
4613
4614
            /**
4615
             * @psalm-suppress PossiblyInvalidArgument
4616
             */
4617 6
            $str = self::fix_simple_utf8(
4618 6
                \rawurldecode(
4619 6
                    self::html_entity_decode(
4620 6
                        self::to_utf8($str),
4621 6
                        \ENT_QUOTES | \ENT_HTML5
4622
                    )
4623
                )
4624
            );
4625 6
        } while ($multi_decode === true && $str_compare !== $str);
4626
4627 6
        return $str;
4628
    }
4629
4630
    /**
4631
     * Replaces all occurrences of $pattern in $str by $replacement.
4632
     *
4633
     * @param string $str         <p>The input string.</p>
4634
     * @param string $pattern     <p>The regular expression pattern.</p>
4635
     * @param string $replacement <p>The string to replace with.</p>
4636
     * @param string $options     [optional] <p>Matching conditions to be used.</p>
4637
     * @param string $delimiter   [optional] <p>Delimiter the the regex. Default: '/'</p>
4638
     *
4639
     * @return string
4640
     */
4641
    public static function regex_replace(
4642
        string $str,
4643
        string $pattern,
4644
        string $replacement,
4645
        string $options = '',
4646
        string $delimiter = '/'
4647
    ): string {
4648 18
        if ($options === 'msr') {
4649 9
            $options = 'ms';
4650
        }
4651
4652
        // fallback
4653 18
        if (!$delimiter) {
4654
            $delimiter = '/';
4655
        }
4656
4657 18
        return (string) \preg_replace(
4658 18
            $delimiter . $pattern . $delimiter . 'u' . $options,
4659 18
            $replacement,
4660 18
            $str
4661
        );
4662
    }
4663
4664
    /**
4665
     * alias for "UTF8::remove_bom()"
4666
     *
4667
     * @param string $str
4668
     *
4669
     * @return string
4670
     *
4671
     * @see UTF8::remove_bom()
4672
     * @deprecated <p>use "UTF8::remove_bom()"</p>
4673
     */
4674
    public static function removeBOM(string $str): string
4675
    {
4676
        return self::remove_bom($str);
4677
    }
4678
4679
    /**
4680
     * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
4681
     *
4682
     * @param string $str <p>The input string.</p>
4683
     *
4684
     * @return string string without UTF-BOM
4685
     */
4686
    public static function remove_bom(string $str): string
4687
    {
4688 82
        if ($str === '') {
4689 9
            return '';
4690
        }
4691
4692 82
        $strLength = \strlen($str);
4693 82
        foreach (self::$BOM as $bomString => $bomByteLength) {
4694 82
            if (\strpos($str, $bomString, 0) === 0) {
4695 10
                $strTmp = \substr($str, $bomByteLength, $strLength);
4696 10
                if ($strTmp === false) {
4697
                    return '';
4698
                }
4699
4700 10
                $strLength -= (int) $bomByteLength;
4701
4702 82
                $str = (string) $strTmp;
4703
            }
4704
        }
4705
4706 82
        return $str;
4707
    }
4708
4709
    /**
4710
     * Removes duplicate occurrences of a string in another string.
4711
     *
4712
     * @param string          $str  <p>The base string.</p>
4713
     * @param string|string[] $what <p>String to search for in the base string.</p>
4714
     *
4715
     * @return string the result string with removed duplicates
4716
     */
4717
    public static function remove_duplicates(string $str, $what = ' '): string
4718
    {
4719 2
        if (\is_string($what) === true) {
4720 2
            $what = [$what];
4721
        }
4722
4723 2
        if (\is_array($what) === true) {
0 ignored issues
show
introduced by
The condition is_array($what) === true is always true.
Loading history...
4724
            /** @noinspection ForeachSourceInspection */
4725 2
            foreach ($what as $item) {
4726 2
                $str = (string) \preg_replace('/(' . \preg_quote($item, '/') . ')+/u', $item, $str);
4727
            }
4728
        }
4729
4730 2
        return $str;
4731
    }
4732
4733
    /**
4734
     * Remove html via "strip_tags()" from the string.
4735
     *
4736
     * @param string $str
4737
     * @param string $allowableTags [optional] <p>You can use the optional second parameter to specify tags which should
4738
     *                              not be stripped. Default: null
4739
     *                              </p>
4740
     *
4741
     * @return string
4742
     */
4743
    public static function remove_html(string $str, string $allowableTags = ''): string
4744
    {
4745 6
        return \strip_tags($str, $allowableTags);
4746
    }
4747
4748
    /**
4749
     * Remove all breaks [<br> | \r\n | \r | \n | ...] from the string.
4750
     *
4751
     * @param string $str
4752
     * @param string $replacement [optional] <p>Default is a empty string.</p>
4753
     *
4754
     * @return string
4755
     */
4756
    public static function remove_html_breaks(string $str, string $replacement = ''): string
4757
    {
4758 6
        return (string) \preg_replace("#/\r\n|\r|\n|<br.*/?>#isU", $replacement, $str);
4759
    }
4760
4761
    /**
4762
     * Remove invisible characters from a string.
4763
     *
4764
     * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
4765
     *
4766
     * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
4767
     *
4768
     * @param string $str
4769
     * @param bool   $url_encoded
4770
     * @param string $replacement
4771
     *
4772
     * @return string
4773
     */
4774
    public static function remove_invisible_characters(string $str, bool $url_encoded = true, string $replacement = ''): string
4775
    {
4776
        // init
4777 116
        $non_displayables = [];
4778
4779
        // every control character except newline (dec 10),
4780
        // carriage return (dec 13) and horizontal tab (dec 09)
4781 116
        if ($url_encoded) {
4782 116
            $non_displayables[] = '/%0[0-8bcefBCEF]/'; // url encoded 00-08, 11, 12, 14, 15
4783 116
            $non_displayables[] = '/%1[0-9a-fA-F]/'; // url encoded 16-31
4784
        }
4785
4786 116
        $non_displayables[] = '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S'; // 00-08, 11, 12, 14-31, 127
4787
4788
        do {
4789 116
            $str = (string) \preg_replace($non_displayables, $replacement, $str, -1, $count);
4790 116
        } while ($count !== 0);
4791
4792 116
        return $str;
4793
    }
4794
4795
    /**
4796
     * Returns a new string with the prefix $substring removed, if present.
4797
     *
4798
     * @param string $str
4799
     * @param string $substring <p>The prefix to remove.</p>
4800
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
4801
     *
4802
     * @return string string without the prefix $substring
4803
     */
4804
    public static function remove_left(string $str, string $substring, string $encoding = 'UTF-8'): string
4805
    {
4806 12
        if ($substring && \strpos($str, $substring) === 0) {
4807 6
            if ($encoding === 'UTF-8') {
4808 4
                return (string) \mb_substr(
4809 4
                    $str,
4810 4
                    (int) \mb_strlen($substring)
4811
                );
4812
            }
4813
4814 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4815
4816 2
            return (string) self::substr(
4817 2
                $str,
4818 2
                (int) self::strlen($substring, $encoding),
4819 2
                null,
4820 2
                $encoding
4821
            );
4822
        }
4823
4824 6
        return $str;
4825
    }
4826
4827
    /**
4828
     * Returns a new string with the suffix $substring removed, if present.
4829
     *
4830
     * @param string $str
4831
     * @param string $substring <p>The suffix to remove.</p>
4832
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
4833
     *
4834
     * @return string string having a $str without the suffix $substring
4835
     */
4836
    public static function remove_right(string $str, string $substring, string $encoding = 'UTF-8'): string
4837
    {
4838 12
        if ($substring && \substr($str, -\strlen($substring)) === $substring) {
4839 6
            if ($encoding === 'UTF-8') {
4840 4
                return (string) \mb_substr(
4841 4
                    $str,
4842 4
                    0,
4843 4
                    (int) \mb_strlen($str) - (int) \mb_strlen($substring)
4844
                );
4845
            }
4846
4847 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4848
4849 2
            return (string) self::substr(
4850 2
                $str,
4851 2
                0,
4852 2
                (int) self::strlen($str, $encoding) - (int) self::strlen($substring, $encoding),
4853 2
                $encoding
4854
            );
4855
        }
4856
4857 6
        return $str;
4858
    }
4859
4860
    /**
4861
     * Replaces all occurrences of $search in $str by $replacement.
4862
     *
4863
     * @param string $str           <p>The input string.</p>
4864
     * @param string $search        <p>The needle to search for.</p>
4865
     * @param string $replacement   <p>The string to replace with.</p>
4866
     * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4867
     *
4868
     * @return string string after the replacements
4869
     */
4870
    public static function replace(
4871
        string $str,
4872
        string $search,
4873
        string $replacement,
4874
        bool $caseSensitive = true
4875
    ): string {
4876 29
        if ($caseSensitive) {
4877 22
            return \str_replace($search, $replacement, $str);
4878
        }
4879
4880 7
        return self::str_ireplace($search, $replacement, $str);
4881
    }
4882
4883
    /**
4884
     * Replaces all occurrences of $search in $str by $replacement.
4885
     *
4886
     * @param string       $str           <p>The input string.</p>
4887
     * @param array        $search        <p>The elements to search for.</p>
4888
     * @param array|string $replacement   <p>The string to replace with.</p>
4889
     * @param bool         $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4890
     *
4891
     * @return string string after the replacements
4892
     */
4893
    public static function replace_all(
4894
        string $str,
4895
        array $search,
4896
        $replacement,
4897
        bool $caseSensitive = true
4898
    ): string {
4899 30
        if ($caseSensitive) {
4900 23
            return \str_replace($search, $replacement, $str);
4901
        }
4902
4903 7
        return self::str_ireplace($search, $replacement, $str);
4904
    }
4905
4906
    /**
4907
     * Replace the diamond question mark (�) and invalid-UTF8 chars with the replacement.
4908
     *
4909
     * @param string $str                <p>The input string</p>
4910
     * @param string $replacementChar    <p>The replacement character.</p>
4911
     * @param bool   $processInvalidUtf8 <p>Convert invalid UTF-8 chars </p>
4912
     *
4913
     * @return string
4914
     */
4915
    public static function replace_diamond_question_mark(string $str, string $replacementChar = '', bool $processInvalidUtf8 = true): string
4916
    {
4917 62
        if ($str === '') {
4918 9
            return '';
4919
        }
4920
4921 62
        if ($processInvalidUtf8 === true) {
4922 62
            $replacementCharHelper = $replacementChar;
4923 62
            if ($replacementChar === '') {
4924 62
                $replacementCharHelper = 'none';
4925
            }
4926
4927 62
            if (self::$SUPPORT['mbstring'] === false) {
4928
                // if there is no native support for "mbstring",
4929
                // then we need to clean the string before ...
4930
                $str = self::clean($str);
4931
            }
4932
4933 62
            $save = \mb_substitute_character();
4934 62
            \mb_substitute_character($replacementCharHelper);
4935
            // the polyfill maybe return false, so cast to string
4936 62
            $str = (string) \mb_convert_encoding($str, 'UTF-8', 'UTF-8');
4937 62
            \mb_substitute_character($save);
4938
        }
4939
4940 62
        return \str_replace(
4941
            [
4942 62
                "\xEF\xBF\xBD",
4943
                '�',
4944
            ],
4945
            [
4946 62
                $replacementChar,
4947 62
                $replacementChar,
4948
            ],
4949 62
            $str
4950
        );
4951
    }
4952
4953
    /**
4954
     * Strip whitespace or other characters from end of a UTF-8 string.
4955
     *
4956
     * @param string      $str   <p>The string to be trimmed.</p>
4957
     * @param string|null $chars <p>Optional characters to be stripped.</p>
4958
     *
4959
     * @return string the string with unwanted characters stripped from the right
4960
     */
4961
    public static function rtrim(string $str = '', string $chars = null): string
4962
    {
4963 20
        if ($str === '') {
4964 3
            return '';
4965
        }
4966
4967 19
        if ($chars) {
4968 8
            $chars = \preg_quote($chars, '/');
4969 8
            $pattern = "[${chars}]+\$";
4970
        } else {
4971 14
            $pattern = "[\s]+\$";
4972
        }
4973
4974 19
        if (self::$SUPPORT['mbstring'] === true) {
4975
            /** @noinspection PhpComposerExtensionStubsInspection */
4976 19
            return (string) \mb_ereg_replace($pattern, '', $str);
4977
        }
4978
4979
        return self::regex_replace($str, $pattern, '', '', '/');
4980
    }
4981
4982
    /**
4983
     * WARNING: Print native UTF-8 support (libs), e.g. for debugging.
4984
     *
4985
     * @psalm-suppress MissingReturnType
4986
     */
4987
    public static function showSupport()
4988
    {
4989 2
        echo '<pre>';
4990 2
        foreach (self::$SUPPORT as $key => &$value) {
4991 2
            echo $key . ' - ' . \print_r($value, true) . "\n<br>";
4992
        }
4993 2
        unset($value);
4994 2
        echo '</pre>';
4995 2
    }
4996
4997
    /**
4998
     * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
4999
     *
5000
     * @param string $char           <p>The Unicode character to be encoded as numbered entity.</p>
5001
     * @param bool   $keepAsciiChars <p>Set to <strong>true</strong> to keep ASCII chars.</>
5002
     * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
5003
     *
5004
     * @return string the HTML numbered entity
5005
     */
5006
    public static function single_chr_html_encode(string $char, bool $keepAsciiChars = false, string $encoding = 'UTF-8'): string
5007
    {
5008 2
        if ($char === '') {
5009 2
            return '';
5010
        }
5011
5012
        if (
5013 2
            $keepAsciiChars === true
5014
            &&
5015 2
            self::is_ascii($char) === true
5016
        ) {
5017 2
            return $char;
5018
        }
5019
5020 2
        return '&#' . self::ord($char, $encoding) . ';';
5021
    }
5022
5023
    /**
5024
     * @param string $str
5025
     * @param int    $tabLength
5026
     *
5027
     * @return string
5028
     */
5029
    public static function spaces_to_tabs(string $str, int $tabLength = 4): string
5030
    {
5031 5
        if ($tabLength === 4) {
5032 3
            $tab = '    ';
5033 2
        } elseif ($tabLength === 2) {
5034 1
            $tab = '  ';
5035
        } else {
5036 1
            $tab = \str_repeat(' ', $tabLength);
5037
        }
5038
5039 5
        return \str_replace($tab, "\t", $str);
5040
    }
5041
5042
    /**
5043
     * alias for "UTF8::str_split()"
5044
     *
5045
     * @param string|string[] $str
5046
     * @param int             $length
5047
     * @param bool            $cleanUtf8
5048
     *
5049
     * @return string[]
5050
     *
5051
     * @see UTF8::str_split()
5052
     */
5053
    public static function split(
5054
        $str,
5055
        int $length = 1,
5056
        bool $cleanUtf8 = false
5057
    ): array {
5058 9
        return self::str_split($str, $length, $cleanUtf8);
5059
    }
5060
5061
    /**
5062
     * alias for "UTF8::str_starts_with()"
5063
     *
5064
     * @param string $haystack
5065
     * @param string $needle
5066
     *
5067
     * @return bool
5068
     *
5069
     * @see UTF8::str_starts_with()
5070
     */
5071
    public static function str_begins(string $haystack, string $needle): bool
5072
    {
5073
        return self::str_starts_with($haystack, $needle);
5074
    }
5075
5076
    /**
5077
     * Returns a camelCase version of the string. Trims surrounding spaces,
5078
     * capitalizes letters following digits, spaces, dashes and underscores,
5079
     * and removes spaces, dashes, as well as underscores.
5080
     *
5081
     * @param string      $str                   <p>The input string.</p>
5082
     * @param string      $encoding              [optional] <p>Default: 'UTF-8'</p>
5083
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
5084
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
5085
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
5086
     *
5087
     * @return string
5088
     */
5089
    public static function str_camelize(
5090
        string $str,
5091
        string $encoding = 'UTF-8',
5092
        bool $cleanUtf8 = false,
5093
        string $lang = null,
5094
        bool $tryToKeepStringLength = false
5095
    ): string {
5096 32
        if ($cleanUtf8 === true) {
5097
            $str = self::clean($str);
5098
        }
5099
5100 32
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
5101 26
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
5102
        }
5103
5104 32
        $str = self::lcfirst(
5105 32
            \trim($str),
5106 32
            $encoding,
5107 32
            false,
5108 32
            $lang,
5109 32
            $tryToKeepStringLength
5110
        );
5111 32
        $str = (string) \preg_replace('/^[-_]+/', '', $str);
5112
5113 32
        $useMbFunction = $lang === null && $tryToKeepStringLength === false;
5114
5115 32
        $str = (string) \preg_replace_callback(
5116 32
            '/[-_\s]+(.)?/u',
5117
            /**
5118
             * @param array $match
5119
             *
5120
             * @return string
5121
             */
5122
            static function (array $match) use ($useMbFunction, $encoding, $lang, $tryToKeepStringLength): string {
5123 27
                if (isset($match[1])) {
5124 27
                    if ($useMbFunction === true) {
5125 27
                        if ($encoding === 'UTF-8') {
5126 27
                            return \mb_strtoupper($match[1]);
5127
                        }
5128
5129
                        return \mb_strtoupper($match[1], $encoding);
5130
                    }
5131
5132
                    return self::strtoupper($match[1], $encoding, false, $lang, $tryToKeepStringLength);
5133
                }
5134
5135 1
                return '';
5136 32
            },
5137 32
            $str
5138
        );
5139
5140 32
        return (string) \preg_replace_callback(
5141 32
            '/[\d]+(.)?/u',
5142
            /**
5143
             * @param array $match
5144
             *
5145
             * @return string
5146
             */
5147
            static function (array $match) use ($useMbFunction, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength): string {
5148 6
                if ($useMbFunction === true) {
5149 6
                    if ($encoding === 'UTF-8') {
5150 6
                        return \mb_strtoupper($match[0]);
5151
                    }
5152
5153
                    return \mb_strtoupper($match[0], $encoding);
5154
                }
5155
5156
                return self::strtoupper($match[0], $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
5157 32
            },
5158 32
            $str
5159
        );
5160
    }
5161
5162
    /**
5163
     * Returns the string with the first letter of each word capitalized,
5164
     * except for when the word is a name which shouldn't be capitalized.
5165
     *
5166
     * @param string $str
5167
     *
5168
     * @return string string with $str capitalized
5169
     */
5170
    public static function str_capitalize_name(string $str): string
5171
    {
5172 1
        return self::str_capitalize_name_helper(
5173 1
            self::str_capitalize_name_helper(
5174 1
                self::collapse_whitespace($str),
5175 1
                ' '
5176
            ),
5177 1
            '-'
5178
        );
5179
    }
5180
5181
    /**
5182
     * Returns true if the string contains $needle, false otherwise. By default
5183
     * the comparison is case-sensitive, but can be made insensitive by setting
5184
     * $caseSensitive to false.
5185
     *
5186
     * @param string $haystack      <p>The input string.</p>
5187
     * @param string $needle        <p>Substring to look for.</p>
5188
     * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5189
     *
5190
     * @return bool whether or not $haystack contains $needle
5191
     */
5192
    public static function str_contains(
5193
        string $haystack,
5194
        string $needle,
5195
        bool $caseSensitive = true
5196
    ): bool {
5197 21
        if ($caseSensitive) {
5198 11
            return \strpos($haystack, $needle) !== false;
5199
        }
5200
5201 10
        return \mb_stripos($haystack, $needle) !== false;
5202
    }
5203
5204
    /**
5205
     * Returns true if the string contains all $needles, false otherwise. By
5206
     * default the comparison is case-sensitive, but can be made insensitive by
5207
     * setting $caseSensitive to false.
5208
     *
5209
     * @param string $haystack      <p>The input string.</p>
5210
     * @param array  $needles       <p>SubStrings to look for.</p>
5211
     * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5212
     *
5213
     * @return bool whether or not $haystack contains $needle
5214
     */
5215
    public static function str_contains_all(
5216
        string $haystack,
5217
        array $needles,
5218
        bool $caseSensitive = true
5219
    ): bool {
5220 44
        if ($haystack === '' || $needles === []) {
5221 1
            return false;
5222
        }
5223
5224
        /** @noinspection LoopWhichDoesNotLoopInspection */
5225 43
        foreach ($needles as &$needle) {
5226 43
            if (!$needle) {
5227 1
                return false;
5228
            }
5229
5230 42
            if ($caseSensitive) {
5231 22
                return \strpos($haystack, $needle) !== false;
5232
            }
5233
5234 20
            return \mb_stripos($haystack, $needle) !== false;
5235
        }
5236
5237
        return true;
5238
    }
5239
5240
    /**
5241
     * Returns true if the string contains any $needles, false otherwise. By
5242
     * default the comparison is case-sensitive, but can be made insensitive by
5243
     * setting $caseSensitive to false.
5244
     *
5245
     * @param string $haystack      <p>The input string.</p>
5246
     * @param array  $needles       <p>SubStrings to look for.</p>
5247
     * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5248
     *
5249
     * @return bool
5250
     *              Whether or not $str contains $needle
5251
     */
5252
    public static function str_contains_any(
5253
        string $haystack,
5254
        array $needles,
5255
        bool $caseSensitive = true
5256
    ): bool {
5257 43
        if ($haystack === '' || $needles === []) {
5258 1
            return false;
5259
        }
5260
5261
        /** @noinspection LoopWhichDoesNotLoopInspection */
5262 42
        foreach ($needles as &$needle) {
5263 42
            if (!$needle) {
5264
                return false;
5265
            }
5266
5267 42
            if ($caseSensitive) {
5268 22
                return \strpos($haystack, $needle) !== false;
5269
            }
5270
5271 20
            return \mb_stripos($haystack, $needle) !== false;
5272
        }
5273
5274
        return false;
5275
    }
5276
5277
    /**
5278
     * Returns a lowercase and trimmed string separated by dashes. Dashes are
5279
     * inserted before uppercase characters (with the exception of the first
5280
     * character of the string), and in place of spaces as well as underscores.
5281
     *
5282
     * @param string $str      <p>The input string.</p>
5283
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5284
     *
5285
     * @return string
5286
     */
5287
    public static function str_dasherize(string $str, string $encoding = 'UTF-8'): string
5288
    {
5289 19
        return self::str_delimit($str, '-', $encoding);
5290
    }
5291
5292
    /**
5293
     * Returns a lowercase and trimmed string separated by the given delimiter.
5294
     * Delimiters are inserted before uppercase characters (with the exception
5295
     * of the first character of the string), and in place of spaces, dashes,
5296
     * and underscores. Alpha delimiters are not converted to lowercase.
5297
     *
5298
     * @param string      $str                   <p>The input string.</p>
5299
     * @param string      $delimiter             <p>Sequence used to separate parts of the string.</p>
5300
     * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
5301
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
5302
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt,
5303
     *                                           tr</p>
5304
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ ->
5305
     *                                           ß</p>
5306
     *
5307
     * @return string
5308
     */
5309
    public static function str_delimit(
5310
        string $str,
5311
        string $delimiter,
5312
        string $encoding = 'UTF-8',
5313
        bool $cleanUtf8 = false,
5314
        string $lang = null,
5315
        bool $tryToKeepStringLength = false
5316
    ): string {
5317 49
        if (self::$SUPPORT['mbstring'] === true) {
5318
            /** @noinspection PhpComposerExtensionStubsInspection */
5319 49
            $str = (string) \mb_ereg_replace('\B(\p{Lu})', '-\1', \trim($str));
5320
5321 49
            $useMbFunction = $lang === null && $tryToKeepStringLength === false;
5322 49
            if ($useMbFunction === true && $encoding === 'UTF-8') {
5323 22
                $str = \mb_strtolower($str);
5324
            } else {
5325 27
                $str = self::strtolower($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
5326
            }
5327
5328
            /** @noinspection PhpComposerExtensionStubsInspection */
5329 49
            return (string) \mb_ereg_replace('[-_\s]+', $delimiter, $str);
5330
        }
5331
5332
        $str = (string) \preg_replace('/\B(\p{Lu})/u', '-\1', \trim($str));
5333
5334
        $useMbFunction = $lang === null && $tryToKeepStringLength === false;
5335
        if ($useMbFunction === true && $encoding === 'UTF-8') {
5336
            $str = \mb_strtolower($str);
5337
        } else {
5338
            $str = self::strtolower($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
5339
        }
5340
5341
        return (string) \preg_replace('/[-_\s]+/u', $delimiter, $str);
5342
    }
5343
5344
    /**
5345
     * Optimized "mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
5346
     *
5347
     * @param string $str <p>The input string.</p>
5348
     *
5349
     * @return false|string
5350
     *                      The detected string-encoding e.g. UTF-8 or UTF-16BE,<br>
5351
     *                      otherwise it will return false e.g. for BINARY or not detected encoding.
5352
     */
5353
    public static function str_detect_encoding($str)
5354
    {
5355
        // init
5356 30
        $str = (string) $str;
5357
5358
        //
5359
        // 1.) check binary strings (010001001...) like UTF-16 / UTF-32 / PDF / Images / ...
5360
        //
5361
5362 30
        if (self::is_binary($str, true) === true) {
5363 11
            $isUtf16 = self::is_utf16($str, false);
5364 11
            if ($isUtf16 === 1) {
5365 2
                return 'UTF-16LE';
5366
            }
5367 11
            if ($isUtf16 === 2) {
5368 2
                return 'UTF-16BE';
5369
            }
5370
5371 9
            $isUtf32 = self::is_utf32($str, false);
5372 9
            if ($isUtf32 === 1) {
5373
                return 'UTF-32LE';
5374
            }
5375 9
            if ($isUtf32 === 2) {
5376
                return 'UTF-32BE';
5377
            }
5378
5379
            // is binary but not "UTF-16" or "UTF-32"
5380 9
            return false;
5381
        }
5382
5383
        //
5384
        // 2.) simple check for ASCII chars
5385
        //
5386
5387 26
        if (self::is_ascii($str) === true) {
5388 10
            return 'ASCII';
5389
        }
5390
5391
        //
5392
        // 3.) simple check for UTF-8 chars
5393
        //
5394
5395 26
        if (self::is_utf8($str) === true) {
5396 19
            return 'UTF-8';
5397
        }
5398
5399
        //
5400
        // 4.) check via "mb_detect_encoding()"
5401
        //
5402
        // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "mb_detect_encoding()"
5403
5404
        $detectOrder = [
5405 15
            'ISO-8859-1',
5406
            'ISO-8859-2',
5407
            'ISO-8859-3',
5408
            'ISO-8859-4',
5409
            'ISO-8859-5',
5410
            'ISO-8859-6',
5411
            'ISO-8859-7',
5412
            'ISO-8859-8',
5413
            'ISO-8859-9',
5414
            'ISO-8859-10',
5415
            'ISO-8859-13',
5416
            'ISO-8859-14',
5417
            'ISO-8859-15',
5418
            'ISO-8859-16',
5419
            'WINDOWS-1251',
5420
            'WINDOWS-1252',
5421
            'WINDOWS-1254',
5422
            'CP932',
5423
            'CP936',
5424
            'CP950',
5425
            'CP866',
5426
            'CP850',
5427
            'CP51932',
5428
            'CP50220',
5429
            'CP50221',
5430
            'CP50222',
5431
            'ISO-2022-JP',
5432
            'ISO-2022-KR',
5433
            'JIS',
5434
            'JIS-ms',
5435
            'EUC-CN',
5436
            'EUC-JP',
5437
        ];
5438
5439 15
        if (self::$SUPPORT['mbstring'] === true) {
5440
            // info: do not use the symfony polyfill here
5441 15
            $encoding = \mb_detect_encoding($str, $detectOrder, true);
5442 15
            if ($encoding) {
5443 15
                return $encoding;
5444
            }
5445
        }
5446
5447
        //
5448
        // 5.) check via "iconv()"
5449
        //
5450
5451
        if (self::$ENCODINGS === null) {
5452
            self::$ENCODINGS = self::getData('encodings');
5453
        }
5454
5455
        foreach (self::$ENCODINGS as $encodingTmp) {
5456
            // INFO: //IGNORE but still throw notice
5457
            /** @noinspection PhpUsageOfSilenceOperatorInspection */
5458
            if ((string) @\iconv($encodingTmp, $encodingTmp . '//IGNORE', $str) === $str) {
5459
                return $encodingTmp;
5460
            }
5461
        }
5462
5463
        return false;
5464
    }
5465
5466
    /**
5467
     * alias for "UTF8::str_ends_with()"
5468
     *
5469
     * @param string $haystack
5470
     * @param string $needle
5471
     *
5472
     * @return bool
5473
     *
5474
     * @see UTF8::str_ends_with()
5475
     */
5476
    public static function str_ends(string $haystack, string $needle): bool
5477
    {
5478
        return self::str_ends_with($haystack, $needle);
5479
    }
5480
5481
    /**
5482
     * Check if the string ends with the given substring.
5483
     *
5484
     * @param string $haystack <p>The string to search in.</p>
5485
     * @param string $needle   <p>The substring to search for.</p>
5486
     *
5487
     * @return bool
5488
     */
5489
    public static function str_ends_with(string $haystack, string $needle): bool
5490
    {
5491 9
        if ($needle === '') {
5492 2
            return true;
5493
        }
5494
5495 9
        if ($haystack === '') {
5496
            return false;
5497
        }
5498
5499 9
        return \substr($haystack, -\strlen($needle)) === $needle;
5500
    }
5501
5502
    /**
5503
     * Returns true if the string ends with any of $substrings, false otherwise.
5504
     *
5505
     * - case-sensitive
5506
     *
5507
     * @param string   $str        <p>The input string.</p>
5508
     * @param string[] $substrings <p>Substrings to look for.</p>
5509
     *
5510
     * @return bool whether or not $str ends with $substring
5511
     */
5512
    public static function str_ends_with_any(string $str, array $substrings): bool
5513
    {
5514 7
        if ($substrings === []) {
5515
            return false;
5516
        }
5517
5518 7
        foreach ($substrings as &$substring) {
5519 7
            if (\substr($str, -\strlen($substring)) === $substring) {
5520 7
                return true;
5521
            }
5522
        }
5523
5524 6
        return false;
5525
    }
5526
5527
    /**
5528
     * Ensures that the string begins with $substring. If it doesn't, it's
5529
     * prepended.
5530
     *
5531
     * @param string $str       <p>The input string.</p>
5532
     * @param string $substring <p>The substring to add if not present.</p>
5533
     *
5534
     * @return string
5535
     */
5536
    public static function str_ensure_left(string $str, string $substring): string
5537
    {
5538
        if (
5539 10
            $substring !== ''
5540
            &&
5541 10
            \strpos($str, $substring) === 0
5542
        ) {
5543 6
            return $str;
5544
        }
5545
5546 4
        return $substring . $str;
5547
    }
5548
5549
    /**
5550
     * Ensures that the string ends with $substring. If it doesn't, it's appended.
5551
     *
5552
     * @param string $str       <p>The input string.</p>
5553
     * @param string $substring <p>The substring to add if not present.</p>
5554
     *
5555
     * @return string
5556
     */
5557
    public static function str_ensure_right(string $str, string $substring): string
5558
    {
5559
        if (
5560 10
            $str === ''
5561
            ||
5562 10
            $substring === ''
5563
            ||
5564 10
            \substr($str, -\strlen($substring)) !== $substring
5565
        ) {
5566 4
            $str .= $substring;
5567
        }
5568
5569 10
        return $str;
5570
    }
5571
5572
    /**
5573
     * Capitalizes the first word of the string, replaces underscores with
5574
     * spaces, and strips '_id'.
5575
     *
5576
     * @param string $str
5577
     *
5578
     * @return string
5579
     */
5580
    public static function str_humanize($str): string
5581
    {
5582 3
        $str = \str_replace(
5583
            [
5584 3
                '_id',
5585
                '_',
5586
            ],
5587
            [
5588 3
                '',
5589
                ' ',
5590
            ],
5591 3
            $str
5592
        );
5593
5594 3
        return self::ucfirst(\trim($str));
5595
    }
5596
5597
    /**
5598
     * alias for "UTF8::str_istarts_with()"
5599
     *
5600
     * @param string $haystack
5601
     * @param string $needle
5602
     *
5603
     * @return bool
5604
     *
5605
     * @see UTF8::str_istarts_with()
5606
     */
5607
    public static function str_ibegins(string $haystack, string $needle): bool
5608
    {
5609
        return self::str_istarts_with($haystack, $needle);
5610
    }
5611
5612
    /**
5613
     * alias for "UTF8::str_iends_with()"
5614
     *
5615
     * @param string $haystack
5616
     * @param string $needle
5617
     *
5618
     * @return bool
5619
     *
5620
     * @see UTF8::str_iends_with()
5621
     */
5622
    public static function str_iends(string $haystack, string $needle): bool
5623
    {
5624
        return self::str_iends_with($haystack, $needle);
5625
    }
5626
5627
    /**
5628
     * Check if the string ends with the given substring, case insensitive.
5629
     *
5630
     * @param string $haystack <p>The string to search in.</p>
5631
     * @param string $needle   <p>The substring to search for.</p>
5632
     *
5633
     * @return bool
5634
     */
5635
    public static function str_iends_with(string $haystack, string $needle): bool
5636
    {
5637 12
        if ($needle === '') {
5638 2
            return true;
5639
        }
5640
5641 12
        if ($haystack === '') {
5642
            return false;
5643
        }
5644
5645 12
        return self::strcasecmp(\substr($haystack, -\strlen($needle)), $needle) === 0;
5646
    }
5647
5648
    /**
5649
     * Returns true if the string ends with any of $substrings, false otherwise.
5650
     *
5651
     * - case-insensitive
5652
     *
5653
     * @param string   $str        <p>The input string.</p>
5654
     * @param string[] $substrings <p>Substrings to look for.</p>
5655
     *
5656
     * @return bool whether or not $str ends with $substring
5657
     */
5658
    public static function str_iends_with_any(string $str, array $substrings): bool
5659
    {
5660 4
        if ($substrings === []) {
5661
            return false;
5662
        }
5663
5664 4
        foreach ($substrings as &$substring) {
5665 4
            if (self::str_iends_with($str, $substring)) {
5666 4
                return true;
5667
            }
5668
        }
5669
5670
        return false;
5671
    }
5672
5673
    /**
5674
     * Returns the index of the first occurrence of $needle in the string,
5675
     * and false if not found. Accepts an optional offset from which to begin
5676
     * the search.
5677
     *
5678
     * @param string $str      <p>The input string.</p>
5679
     * @param string $needle   <p>Substring to look for.</p>
5680
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5681
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5682
     *
5683
     * @return false|int
5684
     *                   The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>
5685
     */
5686
    public static function str_iindex_first(
5687
        string $str,
5688
        string $needle,
5689
        int $offset = 0,
5690
        string $encoding = 'UTF-8'
5691
    ) {
5692 2
        return self::stripos(
5693 2
            $str,
5694 2
            $needle,
5695 2
            $offset,
5696 2
            $encoding
5697
        );
5698
    }
5699
5700
    /**
5701
     * Returns the index of the last occurrence of $needle in the string,
5702
     * and false if not found. Accepts an optional offset from which to begin
5703
     * the search. Offsets may be negative to count from the last character
5704
     * in the string.
5705
     *
5706
     * @param string $str      <p>The input string.</p>
5707
     * @param string $needle   <p>Substring to look for.</p>
5708
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5709
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5710
     *
5711
     * @return false|int
5712
     *                   The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>
5713
     */
5714
    public static function str_iindex_last(
5715
        string $str,
5716
        string $needle,
5717
        int $offset = 0,
5718
        string $encoding = 'UTF-8'
5719
    ) {
5720
        return self::strripos(
5721
            $str,
5722
            $needle,
5723
            $offset,
5724
            $encoding
5725
        );
5726
    }
5727
5728
    /**
5729
     * Returns the index of the first occurrence of $needle in the string,
5730
     * and false if not found. Accepts an optional offset from which to begin
5731
     * the search.
5732
     *
5733
     * @param string $str      <p>The input string.</p>
5734
     * @param string $needle   <p>Substring to look for.</p>
5735
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5736
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5737
     *
5738
     * @return false|int
5739
     *                   The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>
5740
     */
5741
    public static function str_index_first(
5742
        string $str,
5743
        string $needle,
5744
        int $offset = 0,
5745
        string $encoding = 'UTF-8'
5746
    ) {
5747 10
        return self::strpos(
5748 10
            $str,
5749 10
            $needle,
5750 10
            $offset,
5751 10
            $encoding
5752
        );
5753
    }
5754
5755
    /**
5756
     * Returns the index of the last occurrence of $needle in the string,
5757
     * and false if not found. Accepts an optional offset from which to begin
5758
     * the search. Offsets may be negative to count from the last character
5759
     * in the string.
5760
     *
5761
     * @param string $str      <p>The input string.</p>
5762
     * @param string $needle   <p>Substring to look for.</p>
5763
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5764
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5765
     *
5766
     * @return false|int
5767
     *                   The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>
5768
     */
5769
    public static function str_index_last(
5770
        string $str,
5771
        string $needle,
5772
        int $offset = 0,
5773
        string $encoding = 'UTF-8'
5774
    ) {
5775 10
        return self::strrpos(
5776 10
            $str,
5777 10
            $needle,
5778 10
            $offset,
5779 10
            $encoding
5780
        );
5781
    }
5782
5783
    /**
5784
     * Inserts $substring into the string at the $index provided.
5785
     *
5786
     * @param string $str       <p>The input string.</p>
5787
     * @param string $substring <p>String to be inserted.</p>
5788
     * @param int    $index     <p>The index at which to insert the substring.</p>
5789
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
5790
     *
5791
     * @return string
5792
     */
5793
    public static function str_insert(
5794
        string $str,
5795
        string $substring,
5796
        int $index,
5797
        string $encoding = 'UTF-8'
5798
    ): string {
5799 8
        if ($encoding === 'UTF-8') {
5800 4
            $len = (int) \mb_strlen($str);
5801 4
            if ($index > $len) {
5802
                return $str;
5803
            }
5804
5805
            /** @noinspection UnnecessaryCastingInspection */
5806 4
            return (string) \mb_substr($str, 0, $index) .
5807 4
                   $substring .
5808 4
                   (string) \mb_substr($str, $index, $len);
5809
        }
5810
5811 4
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
5812
5813 4
        $len = (int) self::strlen($str, $encoding);
5814 4
        if ($index > $len) {
5815 1
            return $str;
5816
        }
5817
5818 3
        return ((string) self::substr($str, 0, $index, $encoding)) .
5819 3
               $substring .
5820 3
               ((string) self::substr($str, $index, $len, $encoding));
5821
    }
5822
5823
    /**
5824
     * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
5825
     *
5826
     * @see http://php.net/manual/en/function.str-ireplace.php
5827
     *
5828
     * @param mixed $search  <p>
5829
     *                       Every replacement with search array is
5830
     *                       performed on the result of previous replacement.
5831
     *                       </p>
5832
     * @param mixed $replace <p>
5833
     *                       </p>
5834
     * @param mixed $subject <p>
5835
     *                       If subject is an array, then the search and
5836
     *                       replace is performed with every entry of
5837
     *                       subject, and the return value is an array as
5838
     *                       well.
5839
     *                       </p>
5840
     * @param int   $count   [optional] <p>
5841
     *                       The number of matched and replaced needles will
5842
     *                       be returned in count which is passed by
5843
     *                       reference.
5844
     *                       </p>
5845
     *
5846
     * @return mixed a string or an array of replacements
5847
     */
5848
    public static function str_ireplace($search, $replace, $subject, &$count = null)
5849
    {
5850 29
        $search = (array) $search;
5851
5852
        /** @noinspection AlterInForeachInspection */
5853 29
        foreach ($search as &$s) {
5854 29
            $s = (string) $s;
5855 29
            if ($s === '') {
5856 6
                $s = '/^(?<=.)$/';
5857
            } else {
5858 29
                $s = '/' . \preg_quote($s, '/') . '/ui';
5859
            }
5860
        }
5861
5862 29
        $subject = \preg_replace($search, $replace, $subject, -1, $replace);
5863 29
        $count = $replace; // used as reference parameter
5864
5865 29
        return $subject;
5866
    }
5867
5868
    /**
5869
     * Replaces $search from the beginning of string with $replacement.
5870
     *
5871
     * @param string $str         <p>The input string.</p>
5872
     * @param string $search      <p>The string to search for.</p>
5873
     * @param string $replacement <p>The replacement.</p>
5874
     *
5875
     * @return string string after the replacements
5876
     */
5877
    public static function str_ireplace_beginning(string $str, string $search, string $replacement): string
5878
    {
5879 17
        if ($str === '') {
5880 4
            if ($replacement === '') {
5881 2
                return '';
5882
            }
5883
5884 2
            if ($search === '') {
5885 2
                return $replacement;
5886
            }
5887
        }
5888
5889 13
        if ($search === '') {
5890 2
            return $str . $replacement;
5891
        }
5892
5893 11
        if (\stripos($str, $search) === 0) {
5894 10
            return $replacement . \substr($str, \strlen($search));
5895
        }
5896
5897 1
        return $str;
5898
    }
5899
5900
    /**
5901
     * Replaces $search from the ending of string with $replacement.
5902
     *
5903
     * @param string $str         <p>The input string.</p>
5904
     * @param string $search      <p>The string to search for.</p>
5905
     * @param string $replacement <p>The replacement.</p>
5906
     *
5907
     * @return string string after the replacements
5908
     */
5909
    public static function str_ireplace_ending(string $str, string $search, string $replacement): string
5910
    {
5911 17
        if ($str === '') {
5912 4
            if ($replacement === '') {
5913 2
                return '';
5914
            }
5915
5916 2
            if ($search === '') {
5917 2
                return $replacement;
5918
            }
5919
        }
5920
5921 13
        if ($search === '') {
5922 2
            return $str . $replacement;
5923
        }
5924
5925 11
        if (\stripos($str, $search, \strlen($str) - \strlen($search)) !== false) {
5926 9
            $str = \substr($str, 0, -\strlen($search)) . $replacement;
5927
        }
5928
5929 11
        return $str;
5930
    }
5931
5932
    /**
5933
     * Check if the string starts with the given substring, case insensitive.
5934
     *
5935
     * @param string $haystack <p>The string to search in.</p>
5936
     * @param string $needle   <p>The substring to search for.</p>
5937
     *
5938
     * @return bool
5939
     */
5940
    public static function str_istarts_with(string $haystack, string $needle): bool
5941
    {
5942 12
        if ($needle === '') {
5943 2
            return true;
5944
        }
5945
5946 12
        if ($haystack === '') {
5947
            return false;
5948
        }
5949
5950 12
        return self::stripos($haystack, $needle) === 0;
5951
    }
5952
5953
    /**
5954
     * Returns true if the string begins with any of $substrings, false otherwise.
5955
     *
5956
     * - case-insensitive
5957
     *
5958
     * @param string $str        <p>The input string.</p>
5959
     * @param array  $substrings <p>Substrings to look for.</p>
5960
     *
5961
     * @return bool whether or not $str starts with $substring
5962
     */
5963
    public static function str_istarts_with_any(string $str, array $substrings): bool
5964
    {
5965 4
        if ($str === '') {
5966
            return false;
5967
        }
5968
5969 4
        if ($substrings === []) {
5970
            return false;
5971
        }
5972
5973 4
        foreach ($substrings as &$substring) {
5974 4
            if (self::str_istarts_with($str, $substring)) {
5975 4
                return true;
5976
            }
5977
        }
5978
5979
        return false;
5980
    }
5981
5982
    /**
5983
     * Gets the substring after the first occurrence of a separator.
5984
     *
5985
     * @param string $str       <p>The input string.</p>
5986
     * @param string $separator <p>The string separator.</p>
5987
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
5988
     *
5989
     * @return string
5990
     */
5991
    public static function str_isubstr_after_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
5992
    {
5993 1
        if ($separator === '' || $str === '') {
5994 1
            return '';
5995
        }
5996
5997 1
        $offset = self::str_iindex_first($str, $separator);
5998 1
        if ($offset === false) {
5999 1
            return '';
6000
        }
6001
6002 1
        if ($encoding === 'UTF-8') {
6003 1
            return (string) \mb_substr(
6004 1
                $str,
6005 1
                $offset + (int) \mb_strlen($separator)
6006
            );
6007
        }
6008
6009
        return (string) self::substr(
6010
            $str,
6011
            $offset + (int) self::strlen($separator, $encoding),
6012
            null,
6013
            $encoding
6014
        );
6015
    }
6016
6017
    /**
6018
     * Gets the substring after the last occurrence of a separator.
6019
     *
6020
     * @param string $str       <p>The input string.</p>
6021
     * @param string $separator <p>The string separator.</p>
6022
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6023
     *
6024
     * @return string
6025
     */
6026
    public static function str_isubstr_after_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6027
    {
6028 1
        if ($separator === '' || $str === '') {
6029 1
            return '';
6030
        }
6031
6032 1
        $offset = self::strripos($str, $separator);
6033 1
        if ($offset === false) {
6034 1
            return '';
6035
        }
6036
6037 1
        if ($encoding === 'UTF-8') {
6038 1
            return (string) \mb_substr(
6039 1
                $str,
6040 1
                $offset + (int) self::strlen($separator)
6041
            );
6042
        }
6043
6044
        return (string) self::substr(
6045
            $str,
6046
            $offset + (int) self::strlen($separator, $encoding),
6047
            null,
6048
            $encoding
6049
        );
6050
    }
6051
6052
    /**
6053
     * Gets the substring before the first occurrence of a separator.
6054
     *
6055
     * @param string $str       <p>The input string.</p>
6056
     * @param string $separator <p>The string separator.</p>
6057
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6058
     *
6059
     * @return string
6060
     */
6061
    public static function str_isubstr_before_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6062
    {
6063 1
        if ($separator === '' || $str === '') {
6064 1
            return '';
6065
        }
6066
6067 1
        $offset = self::str_iindex_first($str, $separator);
6068 1
        if ($offset === false) {
6069 1
            return '';
6070
        }
6071
6072 1
        if ($encoding === 'UTF-8') {
6073 1
            return (string) \mb_substr($str, 0, $offset);
6074
        }
6075
6076
        return (string) self::substr($str, 0, $offset, $encoding);
6077
    }
6078
6079
    /**
6080
     * Gets the substring before the last occurrence of a separator.
6081
     *
6082
     * @param string $str       <p>The input string.</p>
6083
     * @param string $separator <p>The string separator.</p>
6084
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6085
     *
6086
     * @return string
6087
     */
6088
    public static function str_isubstr_before_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6089
    {
6090 1
        if ($separator === '' || $str === '') {
6091 1
            return '';
6092
        }
6093
6094 1
        if ($encoding === 'UTF-8') {
6095 1
            $offset = \mb_strripos($str, $separator);
6096 1
            if ($offset === false) {
6097 1
                return '';
6098
            }
6099
6100 1
            return (string) \mb_substr($str, 0, $offset);
6101
        }
6102
6103
        $offset = self::strripos($str, $separator, 0, $encoding);
6104
        if ($offset === false) {
6105
            return '';
6106
        }
6107
6108
        return (string) self::substr($str, 0, $offset, $encoding);
6109
    }
6110
6111
    /**
6112
     * Gets the substring after (or before via "$beforeNeedle") the first occurrence of the "$needle".
6113
     *
6114
     * @param string $str          <p>The input string.</p>
6115
     * @param string $needle       <p>The string to look for.</p>
6116
     * @param bool   $beforeNeedle [optional] <p>Default: false</p>
6117
     * @param string $encoding     [optional] <p>Default: 'UTF-8'</p>
6118
     *
6119
     * @return string
6120
     */
6121
    public static function str_isubstr_first(
6122
        string $str,
6123
        string $needle,
6124
        bool $beforeNeedle = false,
6125
        string $encoding = 'UTF-8'
6126
    ): string {
6127
        if (
6128 2
            $needle === ''
6129
            ||
6130 2
            $str === ''
6131
        ) {
6132 2
            return '';
6133
        }
6134
6135 2
        $part = self::stristr(
6136 2
            $str,
6137 2
            $needle,
6138 2
            $beforeNeedle,
6139 2
            $encoding
6140
        );
6141 2
        if ($part === false) {
6142 2
            return '';
6143
        }
6144
6145 2
        return $part;
6146
    }
6147
6148
    /**
6149
     * Gets the substring after (or before via "$beforeNeedle") the last occurrence of the "$needle".
6150
     *
6151
     * @param string $str          <p>The input string.</p>
6152
     * @param string $needle       <p>The string to look for.</p>
6153
     * @param bool   $beforeNeedle [optional] <p>Default: false</p>
6154
     * @param string $encoding     [optional] <p>Default: 'UTF-8'</p>
6155
     *
6156
     * @return string
6157
     */
6158
    public static function str_isubstr_last(
6159
        string $str,
6160
        string $needle,
6161
        bool $beforeNeedle = false,
6162
        string $encoding = 'UTF-8'
6163
    ): string {
6164
        if (
6165 1
            $needle === ''
6166
            ||
6167 1
            $str === ''
6168
        ) {
6169 1
            return '';
6170
        }
6171
6172 1
        $part = self::strrichr($str, $needle, $beforeNeedle, $encoding);
6173 1
        if ($part === false) {
6174 1
            return '';
6175
        }
6176
6177 1
        return $part;
6178
    }
6179
6180
    /**
6181
     * Returns the last $n characters of the string.
6182
     *
6183
     * @param string $str      <p>The input string.</p>
6184
     * @param int    $n        <p>Number of characters to retrieve from the end.</p>
6185
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6186
     *
6187
     * @return string
6188
     */
6189
    public static function str_last_char(string $str, int $n = 1, string $encoding = 'UTF-8'): string
6190
    {
6191 12
        if ($str === '' || $n <= 0) {
6192 4
            return '';
6193
        }
6194
6195 8
        if ($encoding === 'UTF-8') {
6196 4
            return (string) \mb_substr($str, -$n);
6197
        }
6198
6199 4
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6200
6201 4
        return (string) self::substr($str, -$n, null, $encoding);
6202
    }
6203
6204
    /**
6205
     * Limit the number of characters in a string.
6206
     *
6207
     * @param string $str      <p>The input string.</p>
6208
     * @param int    $length   [optional] <p>Default: 100</p>
6209
     * @param string $strAddOn [optional] <p>Default: …</p>
6210
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6211
     *
6212
     * @return string
6213
     */
6214
    public static function str_limit(
6215
        string $str,
6216
        int $length = 100,
6217
        string $strAddOn = '…',
6218
        string $encoding = 'UTF-8'
6219
    ): string {
6220 2
        if ($str === '' || $length <= 0) {
6221 2
            return '';
6222
        }
6223
6224 2
        if ($encoding === 'UTF-8') {
6225 2
            if ((int) \mb_strlen($str) <= $length) {
6226 2
                return $str;
6227
            }
6228
6229
            /** @noinspection UnnecessaryCastingInspection */
6230 2
            return (string) \mb_substr($str, 0, $length - (int) self::strlen($strAddOn)) . $strAddOn;
6231
        }
6232
6233
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6234
6235
        if ((int) self::strlen($str, $encoding) <= $length) {
6236
            return $str;
6237
        }
6238
6239
        return ((string) self::substr($str, 0, $length - (int) self::strlen($strAddOn), $encoding)) . $strAddOn;
6240
    }
6241
6242
    /**
6243
     * Limit the number of characters in a string, but also after the next word.
6244
     *
6245
     * @param string $str      <p>The input string.</p>
6246
     * @param int    $length   [optional] <p>Default: 100</p>
6247
     * @param string $strAddOn [optional] <p>Default: …</p>
6248
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6249
     *
6250
     * @return string
6251
     */
6252
    public static function str_limit_after_word(
6253
        string $str,
6254
        int $length = 100,
6255
        string $strAddOn = '…',
6256
        string $encoding = 'UTF-8'
6257
    ): string {
6258 6
        if ($str === '' || $length <= 0) {
6259 2
            return '';
6260
        }
6261
6262 6
        if ($encoding === 'UTF-8') {
6263
            /** @noinspection UnnecessaryCastingInspection */
6264 2
            if ((int) \mb_strlen($str) <= $length) {
6265 2
                return $str;
6266
            }
6267
6268 2
            if (\mb_substr($str, $length - 1, 1) === ' ') {
6269 2
                return ((string) \mb_substr($str, 0, $length - 1)) . $strAddOn;
6270
            }
6271
6272 2
            $str = \mb_substr($str, 0, $length);
6273
6274 2
            $array = \explode(' ', $str);
6275 2
            \array_pop($array);
6276 2
            $new_str = \implode(' ', $array);
6277
6278 2
            if ($new_str === '') {
6279 2
                return ((string) \mb_substr($str, 0, $length - 1)) . $strAddOn;
6280
            }
6281
        } else {
6282 4
            if ((int) self::strlen($str, $encoding) <= $length) {
6283
                return $str;
6284
            }
6285
6286 4
            if (self::substr($str, $length - 1, 1, $encoding) === ' ') {
6287 3
                return ((string) self::substr($str, 0, $length - 1, $encoding)) . $strAddOn;
6288
            }
6289
6290 1
            $str = self::substr($str, 0, $length, $encoding);
6291 1
            if ($str === false) {
6292
                return '' . $strAddOn;
6293
            }
6294
6295 1
            $array = \explode(' ', $str);
6296 1
            \array_pop($array);
6297 1
            $new_str = \implode(' ', $array);
6298
6299 1
            if ($new_str === '') {
6300
                return ((string) self::substr($str, 0, $length - 1, $encoding)) . $strAddOn;
6301
            }
6302
        }
6303
6304 3
        return $new_str . $strAddOn;
6305
    }
6306
6307
    /**
6308
     * Returns the longest common prefix between the string and $otherStr.
6309
     *
6310
     * @param string $str      <p>The input sting.</p>
6311
     * @param string $otherStr <p>Second string for comparison.</p>
6312
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6313
     *
6314
     * @return string
6315
     */
6316
    public static function str_longest_common_prefix(string $str, string $otherStr, string $encoding = 'UTF-8'): string
6317
    {
6318
        // init
6319 10
        $longestCommonPrefix = '';
6320
6321 10
        if ($encoding === 'UTF-8') {
6322 5
            $maxLength = (int) \min(
6323 5
                \mb_strlen($str),
6324 5
                \mb_strlen($otherStr)
6325
            );
6326
6327 5
            for ($i = 0; $i < $maxLength; ++$i) {
6328 4
                $char = \mb_substr($str, $i, 1);
6329
6330
                if (
6331 4
                    $char !== false
6332
                    &&
6333 4
                    $char === \mb_substr($otherStr, $i, 1)
6334
                ) {
6335 3
                    $longestCommonPrefix .= $char;
6336
                } else {
6337 3
                    break;
6338
                }
6339
            }
6340
        } else {
6341 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
6342
6343 5
            $maxLength = (int) \min(
6344 5
                self::strlen($str, $encoding),
6345 5
                self::strlen($otherStr, $encoding)
6346
            );
6347
6348 5
            for ($i = 0; $i < $maxLength; ++$i) {
6349 4
                $char = self::substr($str, $i, 1, $encoding);
6350
6351
                if (
6352 4
                    $char !== false
6353
                    &&
6354 4
                    $char === self::substr($otherStr, $i, 1, $encoding)
6355
                ) {
6356 3
                    $longestCommonPrefix .= $char;
6357
                } else {
6358 3
                    break;
6359
                }
6360
            }
6361
        }
6362
6363 10
        return $longestCommonPrefix;
6364
    }
6365
6366
    /**
6367
     * Returns the longest common substring between the string and $otherStr.
6368
     * In the case of ties, it returns that which occurs first.
6369
     *
6370
     * @param string $str
6371
     * @param string $otherStr <p>Second string for comparison.</p>
6372
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6373
     *
6374
     * @return string string with its $str being the longest common substring
6375
     */
6376
    public static function str_longest_common_substring(string $str, string $otherStr, string $encoding = 'UTF-8'): string
6377
    {
6378 11
        if ($str === '' || $otherStr === '') {
6379 2
            return '';
6380
        }
6381
6382
        // Uses dynamic programming to solve
6383
        // http://en.wikipedia.org/wiki/Longest_common_substring_problem
6384
6385 9
        if ($encoding === 'UTF-8') {
6386 4
            $strLength = (int) \mb_strlen($str);
6387 4
            $otherLength = (int) \mb_strlen($otherStr);
6388
        } else {
6389 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
6390
6391 5
            $strLength = (int) self::strlen($str, $encoding);
6392 5
            $otherLength = (int) self::strlen($otherStr, $encoding);
6393
        }
6394
6395
        // Return if either string is empty
6396 9
        if ($strLength === 0 || $otherLength === 0) {
6397
            return '';
6398
        }
6399
6400 9
        $len = 0;
6401 9
        $end = 0;
6402 9
        $table = \array_fill(
6403 9
            0,
6404 9
            $strLength + 1,
6405 9
            \array_fill(0, $otherLength + 1, 0)
6406
        );
6407
6408 9
        if ($encoding === 'UTF-8') {
6409 9
            for ($i = 1; $i <= $strLength; ++$i) {
6410 9
                for ($j = 1; $j <= $otherLength; ++$j) {
6411 9
                    $strChar = \mb_substr($str, $i - 1, 1);
6412 9
                    $otherChar = \mb_substr($otherStr, $j - 1, 1);
6413
6414 9
                    if ($strChar === $otherChar) {
6415 8
                        $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
6416 8
                        if ($table[$i][$j] > $len) {
6417 8
                            $len = $table[$i][$j];
6418 8
                            $end = $i;
6419
                        }
6420
                    } else {
6421 9
                        $table[$i][$j] = 0;
6422
                    }
6423
                }
6424
            }
6425
        } else {
6426
            for ($i = 1; $i <= $strLength; ++$i) {
6427
                for ($j = 1; $j <= $otherLength; ++$j) {
6428
                    $strChar = self::substr($str, $i - 1, 1, $encoding);
6429
                    $otherChar = self::substr($otherStr, $j - 1, 1, $encoding);
6430
6431
                    if ($strChar === $otherChar) {
6432
                        $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
6433
                        if ($table[$i][$j] > $len) {
6434
                            $len = $table[$i][$j];
6435
                            $end = $i;
6436
                        }
6437
                    } else {
6438
                        $table[$i][$j] = 0;
6439
                    }
6440
                }
6441
            }
6442
        }
6443
6444 9
        if ($encoding === 'UTF-8') {
6445 9
            return (string) \mb_substr($str, $end - $len, $len);
6446
        }
6447
6448
        return (string) self::substr($str, $end - $len, $len, $encoding);
6449
    }
6450
6451
    /**
6452
     * Returns the longest common suffix between the string and $otherStr.
6453
     *
6454
     * @param string $str
6455
     * @param string $otherStr <p>Second string for comparison.</p>
6456
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6457
     *
6458
     * @return string
6459
     */
6460
    public static function str_longest_common_suffix(string $str, string $otherStr, string $encoding = 'UTF-8'): string
6461
    {
6462 10
        if ($str === '' || $otherStr === '') {
6463 2
            return '';
6464
        }
6465
6466 8
        if ($encoding === 'UTF-8') {
6467 4
            $maxLength = (int) \min(
6468 4
                \mb_strlen($str, $encoding),
6469 4
                \mb_strlen($otherStr, $encoding)
6470
            );
6471
6472 4
            $longestCommonSuffix = '';
6473 4
            for ($i = 1; $i <= $maxLength; ++$i) {
6474 4
                $char = \mb_substr($str, -$i, 1);
6475
6476
                if (
6477 4
                    $char !== false
6478
                    &&
6479 4
                    $char === \mb_substr($otherStr, -$i, 1)
6480
                ) {
6481 3
                    $longestCommonSuffix = $char . $longestCommonSuffix;
6482
                } else {
6483 3
                    break;
6484
                }
6485
            }
6486
        } else {
6487 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
6488
6489 4
            $maxLength = (int) \min(
6490 4
                self::strlen($str, $encoding),
6491 4
                self::strlen($otherStr, $encoding)
6492
            );
6493
6494 4
            $longestCommonSuffix = '';
6495 4
            for ($i = 1; $i <= $maxLength; ++$i) {
6496 4
                $char = self::substr($str, -$i, 1, $encoding);
6497
6498
                if (
6499 4
                    $char !== false
6500
                    &&
6501 4
                    $char === self::substr($otherStr, -$i, 1, $encoding)
6502
                ) {
6503 3
                    $longestCommonSuffix = $char . $longestCommonSuffix;
6504
                } else {
6505 3
                    break;
6506
                }
6507
            }
6508
        }
6509
6510 8
        return $longestCommonSuffix;
6511
    }
6512
6513
    /**
6514
     * Returns true if $str matches the supplied pattern, false otherwise.
6515
     *
6516
     * @param string $str     <p>The input string.</p>
6517
     * @param string $pattern <p>Regex pattern to match against.</p>
6518
     *
6519
     * @return bool whether or not $str matches the pattern
6520
     */
6521
    public static function str_matches_pattern(string $str, string $pattern): bool
6522
    {
6523
        return (bool) \preg_match('/' . $pattern . '/u', $str);
6524
    }
6525
6526
    /**
6527
     * Returns whether or not a character exists at an index. Offsets may be
6528
     * negative to count from the last character in the string. Implements
6529
     * part of the ArrayAccess interface.
6530
     *
6531
     * @param string $str      <p>The input string.</p>
6532
     * @param int    $offset   <p>The index to check.</p>
6533
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6534
     *
6535
     * @return bool whether or not the index exists
6536
     */
6537
    public static function str_offset_exists(string $str, int $offset, string $encoding = 'UTF-8'): bool
6538
    {
6539
        // init
6540 6
        $length = (int) self::strlen($str, $encoding);
6541
6542 6
        if ($offset >= 0) {
6543 3
            return $length > $offset;
6544
        }
6545
6546 3
        return $length >= \abs($offset);
6547
    }
6548
6549
    /**
6550
     * Returns the character at the given index. Offsets may be negative to
6551
     * count from the last character in the string. Implements part of the
6552
     * ArrayAccess interface, and throws an OutOfBoundsException if the index
6553
     * does not exist.
6554
     *
6555
     * @param string $str      <p>The input string.</p>
6556
     * @param int    $index    <p>The <strong>index</strong> from which to retrieve the char.</p>
6557
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6558
     *
6559
     * @throws \OutOfBoundsException if the positive or negative offset does not exist
6560
     *
6561
     * @return string the character at the specified index
6562
     */
6563
    public static function str_offset_get(string $str, int $index, string $encoding = 'UTF-8'): string
6564
    {
6565
        // init
6566 2
        $length = (int) self::strlen($str);
6567
6568
        if (
6569 2
            ($index >= 0 && $length <= $index)
6570
            ||
6571 2
            $length < \abs($index)
6572
        ) {
6573 1
            throw new \OutOfBoundsException('No character exists at the index');
6574
        }
6575
6576 1
        return self::char_at($str, $index, $encoding);
6577
    }
6578
6579
    /**
6580
     * Pad a UTF-8 string to given length with another string.
6581
     *
6582
     * @param string     $str        <p>The input string.</p>
6583
     * @param int        $pad_length <p>The length of return string.</p>
6584
     * @param string     $pad_string [optional] <p>String to use for padding the input string.</p>
6585
     * @param int|string $pad_type   [optional] <p>
6586
     *                               Can be <strong>STR_PAD_RIGHT</strong> (default), [or string "right"]<br>
6587
     *                               <strong>STR_PAD_LEFT</strong> [or string "left"] or<br>
6588
     *                               <strong>STR_PAD_BOTH</strong> [or string "both"]
6589
     *                               </p>
6590
     * @param string     $encoding   [optional] <p>Default: 'UTF-8'</p>
6591
     *
6592
     * @return string returns the padded string
6593
     */
6594
    public static function str_pad(
6595
        string $str,
6596
        int $pad_length,
6597
        string $pad_string = ' ',
6598
        $pad_type = \STR_PAD_RIGHT,
6599
        string $encoding = 'UTF-8'
6600
    ): string {
6601 41
        if ($pad_length === 0 || $pad_string === '') {
6602 1
            return $str;
6603
        }
6604
6605 41
        if ($pad_type !== (int) $pad_type) {
6606 13
            if ($pad_type === 'left') {
6607 3
                $pad_type = \STR_PAD_LEFT;
6608 10
            } elseif ($pad_type === 'right') {
6609 6
                $pad_type = \STR_PAD_RIGHT;
6610 4
            } elseif ($pad_type === 'both') {
6611 3
                $pad_type = \STR_PAD_BOTH;
6612
            } else {
6613 1
                throw new \InvalidArgumentException(
6614 1
                    'Pad expects $padType to be "STR_PAD_*" or ' . "to be one of 'left', 'right' or 'both'"
6615
                );
6616
            }
6617
        }
6618
6619 40
        if ($encoding === 'UTF-8') {
6620 25
            $str_length = (int) \mb_strlen($str);
6621
6622 25
            if ($pad_length >= $str_length) {
6623
                switch ($pad_type) {
6624 25
                    case \STR_PAD_LEFT:
6625 8
                        $ps_length = (int) \mb_strlen($pad_string);
6626
6627 8
                        $diff = ($pad_length - $str_length);
6628
6629 8
                        $pre = (string) \mb_substr(
6630 8
                            \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
6631 8
                            0,
6632 8
                            $diff
6633
                        );
6634 8
                        $post = '';
6635
6636 8
                        break;
6637
6638 20
                    case \STR_PAD_BOTH:
6639 14
                        $diff = ($pad_length - $str_length);
6640
6641 14
                        $ps_length_left = (int) \floor($diff / 2);
6642
6643 14
                        $ps_length_right = (int) \ceil($diff / 2);
6644
6645 14
                        $pre = (string) \mb_substr(
6646 14
                            \str_repeat($pad_string, $ps_length_left),
6647 14
                            0,
6648 14
                            $ps_length_left
6649
                        );
6650 14
                        $post = (string) \mb_substr(
6651 14
                            \str_repeat($pad_string, $ps_length_right),
6652 14
                            0,
6653 14
                            $ps_length_right
6654
                        );
6655
6656 14
                        break;
6657
6658 9
                    case \STR_PAD_RIGHT:
6659
                    default:
6660 9
                        $ps_length = (int) \mb_strlen($pad_string);
6661
6662 9
                        $diff = ($pad_length - $str_length);
6663
6664 9
                        $post = (string) \mb_substr(
6665 9
                            \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
6666 9
                            0,
6667 9
                            $diff
6668
                        );
6669 9
                        $pre = '';
6670
                }
6671
6672 25
                return $pre . $str . $post;
6673
            }
6674
6675 3
            return $str;
6676
        }
6677
6678 15
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6679
6680 15
        $str_length = (int) self::strlen($str, $encoding);
6681
6682 15
        if ($pad_length >= $str_length) {
6683
            switch ($pad_type) {
6684 14
                case \STR_PAD_LEFT:
6685 5
                    $ps_length = (int) self::strlen($pad_string, $encoding);
6686
6687 5
                    $diff = ($pad_length - $str_length);
6688
6689 5
                    $pre = (string) self::substr(
6690 5
                        \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
6691 5
                        0,
6692 5
                        $diff,
6693 5
                        $encoding
6694
                    );
6695 5
                    $post = '';
6696
6697 5
                    break;
6698
6699 9
                case \STR_PAD_BOTH:
6700 3
                    $diff = ($pad_length - $str_length);
6701
6702 3
                    $ps_length_left = (int) \floor($diff / 2);
6703
6704 3
                    $ps_length_right = (int) \ceil($diff / 2);
6705
6706 3
                    $pre = (string) self::substr(
6707 3
                        \str_repeat($pad_string, $ps_length_left),
6708 3
                        0,
6709 3
                        $ps_length_left,
6710 3
                        $encoding
6711
                    );
6712 3
                    $post = (string) self::substr(
6713 3
                        \str_repeat($pad_string, $ps_length_right),
6714 3
                        0,
6715 3
                        $ps_length_right,
6716 3
                        $encoding
6717
                    );
6718
6719 3
                    break;
6720
6721 6
                case \STR_PAD_RIGHT:
6722
                default:
6723 6
                    $ps_length = (int) self::strlen($pad_string, $encoding);
6724
6725 6
                    $diff = ($pad_length - $str_length);
6726
6727 6
                    $post = (string) self::substr(
6728 6
                        \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
6729 6
                        0,
6730 6
                        $diff,
6731 6
                        $encoding
6732
                    );
6733 6
                    $pre = '';
6734
            }
6735
6736 14
            return $pre . $str . $post;
6737
        }
6738
6739 1
        return $str;
6740
    }
6741
6742
    /**
6743
     * Returns a new string of a given length such that both sides of the
6744
     * string are padded. Alias for pad() with a $padType of 'both'.
6745
     *
6746
     * @param string $str
6747
     * @param int    $length   <p>Desired string length after padding.</p>
6748
     * @param string $padStr   [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6749
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6750
     *
6751
     * @return string string with padding applied
6752
     */
6753
    public static function str_pad_both(
6754
        string $str,
6755
        int $length,
6756
        string $padStr = ' ',
6757
        string $encoding = 'UTF-8'
6758
    ): string {
6759 11
        return self::str_pad($str, $length, $padStr, \STR_PAD_BOTH, $encoding);
6760
    }
6761
6762
    /**
6763
     * Returns a new string of a given length such that the beginning of the
6764
     * string is padded. Alias for pad() with a $padType of 'left'.
6765
     *
6766
     * @param string $str
6767
     * @param int    $length   <p>Desired string length after padding.</p>
6768
     * @param string $padStr   [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6769
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6770
     *
6771
     * @return string string with left padding
6772
     */
6773
    public static function str_pad_left(
6774
        string $str,
6775
        int $length,
6776
        string $padStr = ' ',
6777
        string $encoding = 'UTF-8'
6778
    ): string {
6779 7
        return self::str_pad($str, $length, $padStr, \STR_PAD_LEFT, $encoding);
6780
    }
6781
6782
    /**
6783
     * Returns a new string of a given length such that the end of the string
6784
     * is padded. Alias for pad() with a $padType of 'right'.
6785
     *
6786
     * @param string $str
6787
     * @param int    $length   <p>Desired string length after padding.</p>
6788
     * @param string $padStr   [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6789
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6790
     *
6791
     * @return string string with right padding
6792
     */
6793
    public static function str_pad_right(
6794
        string $str,
6795
        int $length,
6796
        string $padStr = ' ',
6797
        string $encoding = 'UTF-8'
6798
    ): string {
6799 7
        return self::str_pad($str, $length, $padStr, \STR_PAD_RIGHT, $encoding);
6800
    }
6801
6802
    /**
6803
     * Repeat a string.
6804
     *
6805
     * @param string $str        <p>
6806
     *                           The string to be repeated.
6807
     *                           </p>
6808
     * @param int    $multiplier <p>
6809
     *                           Number of time the input string should be
6810
     *                           repeated.
6811
     *                           </p>
6812
     *                           <p>
6813
     *                           multiplier has to be greater than or equal to 0.
6814
     *                           If the multiplier is set to 0, the function
6815
     *                           will return an empty string.
6816
     *                           </p>
6817
     *
6818
     * @return string the repeated string
6819
     */
6820
    public static function str_repeat(string $str, int $multiplier): string
6821
    {
6822 9
        $str = self::filter($str);
6823
6824 9
        return \str_repeat($str, $multiplier);
6825
    }
6826
6827
    /**
6828
     * INFO: This is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe.
6829
     *
6830
     * Replace all occurrences of the search string with the replacement string
6831
     *
6832
     * @see http://php.net/manual/en/function.str-replace.php
6833
     *
6834
     * @param mixed $search  <p>
6835
     *                       The value being searched for, otherwise known as the needle.
6836
     *                       An array may be used to designate multiple needles.
6837
     *                       </p>
6838
     * @param mixed $replace <p>
6839
     *                       The replacement value that replaces found search
6840
     *                       values. An array may be used to designate multiple replacements.
6841
     *                       </p>
6842
     * @param mixed $subject <p>
6843
     *                       The string or array being searched and replaced on,
6844
     *                       otherwise known as the haystack.
6845
     *                       </p>
6846
     *                       <p>
6847
     *                       If subject is an array, then the search and
6848
     *                       replace is performed with every entry of
6849
     *                       subject, and the return value is an array as
6850
     *                       well.
6851
     *                       </p>
6852
     * @param int   $count   [optional] If passed, this will hold the number of matched and replaced needles
6853
     *
6854
     * @return mixed this function returns a string or an array with the replaced values
6855
     */
6856
    public static function str_replace(
6857
        $search,
6858
        $replace,
6859
        $subject,
6860
        int &$count = null
6861
    ) {
6862
        /**
6863
         * @psalm-suppress PossiblyNullArgument
6864
         */
6865 12
        return \str_replace(
6866 12
            $search,
6867 12
            $replace,
6868 12
            $subject,
6869 12
            $count
6870
        );
6871
    }
6872
6873
    /**
6874
     * Replaces $search from the beginning of string with $replacement.
6875
     *
6876
     * @param string $str         <p>The input string.</p>
6877
     * @param string $search      <p>The string to search for.</p>
6878
     * @param string $replacement <p>The replacement.</p>
6879
     *
6880
     * @return string string after the replacements
6881
     */
6882
    public static function str_replace_beginning(string $str, string $search, string $replacement): string
6883
    {
6884 17
        if ($str === '') {
6885 4
            if ($replacement === '') {
6886 2
                return '';
6887
            }
6888
6889 2
            if ($search === '') {
6890 2
                return $replacement;
6891
            }
6892
        }
6893
6894 13
        if ($search === '') {
6895 2
            return $str . $replacement;
6896
        }
6897
6898 11
        if (\strpos($str, $search) === 0) {
6899 9
            return $replacement . \substr($str, \strlen($search));
6900
        }
6901
6902 2
        return $str;
6903
    }
6904
6905
    /**
6906
     * Replaces $search from the ending of string with $replacement.
6907
     *
6908
     * @param string $str         <p>The input string.</p>
6909
     * @param string $search      <p>The string to search for.</p>
6910
     * @param string $replacement <p>The replacement.</p>
6911
     *
6912
     * @return string string after the replacements
6913
     */
6914
    public static function str_replace_ending(string $str, string $search, string $replacement): string
6915
    {
6916 17
        if ($str === '') {
6917 4
            if ($replacement === '') {
6918 2
                return '';
6919
            }
6920
6921 2
            if ($search === '') {
6922 2
                return $replacement;
6923
            }
6924
        }
6925
6926 13
        if ($search === '') {
6927 2
            return $str . $replacement;
6928
        }
6929
6930 11
        if (\strpos($str, $search, \strlen($str) - \strlen($search)) !== false) {
6931 8
            $str = \substr($str, 0, -\strlen($search)) . $replacement;
6932
        }
6933
6934 11
        return $str;
6935
    }
6936
6937
    /**
6938
     * Replace the first "$search"-term with the "$replace"-term.
6939
     *
6940
     * @param string $search
6941
     * @param string $replace
6942
     * @param string $subject
6943
     *
6944
     * @return string
6945
     *
6946
     * @psalm-suppress InvalidReturnType
6947
     */
6948
    public static function str_replace_first(string $search, string $replace, string $subject): string
6949
    {
6950 2
        $pos = self::strpos($subject, $search);
6951
6952 2
        if ($pos !== false) {
6953
            /**
6954
             * @psalm-suppress InvalidReturnStatement
6955
             */
6956 2
            return self::substr_replace(
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr_repl...)self::strlen($search)) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
6957 2
                $subject,
6958 2
                $replace,
6959 2
                $pos,
6960 2
                (int) self::strlen($search)
6961
            );
6962
        }
6963
6964 2
        return $subject;
6965
    }
6966
6967
    /**
6968
     * Replace the last "$search"-term with the "$replace"-term.
6969
     *
6970
     * @param string $search
6971
     * @param string $replace
6972
     * @param string $subject
6973
     *
6974
     * @return string
6975
     *
6976
     * @psalm-suppress InvalidReturnType
6977
     */
6978
    public static function str_replace_last(
6979
        string $search,
6980
        string $replace,
6981
        string $subject
6982
    ): string {
6983 2
        $pos = self::strrpos($subject, $search);
6984 2
        if ($pos !== false) {
6985
            /**
6986
             * @psalm-suppress InvalidReturnStatement
6987
             */
6988 2
            return self::substr_replace(
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr_repl...)self::strlen($search)) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
6989 2
                $subject,
6990 2
                $replace,
6991 2
                $pos,
6992 2
                (int) self::strlen($search)
6993
            );
6994
        }
6995
6996 2
        return $subject;
6997
    }
6998
6999
    /**
7000
     * Shuffles all the characters in the string.
7001
     *
7002
     * PS: uses random algorithm which is weak for cryptography purposes
7003
     *
7004
     * @param string $str      <p>The input string</p>
7005
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7006
     *
7007
     * @return string the shuffled string
7008
     */
7009
    public static function str_shuffle(string $str, string $encoding = 'UTF-8'): string
7010
    {
7011 5
        if ($encoding === 'UTF-8') {
7012 5
            $indexes = \range(0, (int) \mb_strlen($str) - 1);
7013
            /** @noinspection NonSecureShuffleUsageInspection */
7014 5
            \shuffle($indexes);
7015
7016
            // init
7017 5
            $shuffledStr = '';
7018
7019 5
            foreach ($indexes as &$i) {
7020 5
                $tmpSubStr = \mb_substr($str, $i, 1);
7021 5
                if ($tmpSubStr !== false) {
7022 5
                    $shuffledStr .= $tmpSubStr;
7023
                }
7024
            }
7025
        } else {
7026
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7027
7028
            $indexes = \range(0, (int) self::strlen($str, $encoding) - 1);
7029
            /** @noinspection NonSecureShuffleUsageInspection */
7030
            \shuffle($indexes);
7031
7032
            // init
7033
            $shuffledStr = '';
7034
7035
            foreach ($indexes as &$i) {
7036
                $tmpSubStr = self::substr($str, $i, 1, $encoding);
7037
                if ($tmpSubStr !== false) {
7038
                    $shuffledStr .= $tmpSubStr;
7039
                }
7040
            }
7041
        }
7042
7043 5
        return $shuffledStr;
7044
    }
7045
7046
    /**
7047
     * Returns the substring beginning at $start, and up to, but not including
7048
     * the index specified by $end. If $end is omitted, the function extracts
7049
     * the remaining string. If $end is negative, it is computed from the end
7050
     * of the string.
7051
     *
7052
     * @param string $str
7053
     * @param int    $start    <p>Initial index from which to begin extraction.</p>
7054
     * @param int    $end      [optional] <p>Index at which to end extraction. Default: null</p>
7055
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7056
     *
7057
     * @return false|string
7058
     *                      <p>The extracted substring.</p><p>If <i>str</i> is shorter than <i>start</i>
7059
     *                      characters long, <b>FALSE</b> will be returned.
7060
     */
7061
    public static function str_slice(
7062
        string $str,
7063
        int $start,
7064
        int $end = null,
7065
        string $encoding = 'UTF-8'
7066
    ) {
7067 18
        if ($encoding === 'UTF-8') {
7068 7
            if ($end === null) {
7069 1
                $length = (int) \mb_strlen($str);
7070 6
            } elseif ($end >= 0 && $end <= $start) {
7071 2
                return '';
7072 4
            } elseif ($end < 0) {
7073 1
                $length = (int) \mb_strlen($str) + $end - $start;
7074
            } else {
7075 3
                $length = $end - $start;
7076
            }
7077
7078 5
            return \mb_substr($str, $start, $length);
7079
        }
7080
7081 11
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
7082
7083 11
        if ($end === null) {
7084 5
            $length = (int) self::strlen($str, $encoding);
7085 6
        } elseif ($end >= 0 && $end <= $start) {
7086 2
            return '';
7087 4
        } elseif ($end < 0) {
7088 1
            $length = (int) self::strlen($str, $encoding) + $end - $start;
7089
        } else {
7090 3
            $length = $end - $start;
7091
        }
7092
7093 9
        return self::substr($str, $start, $length, $encoding);
7094
    }
7095
7096
    /**
7097
     * Convert a string to e.g.: "snake_case"
7098
     *
7099
     * @param string $str
7100
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7101
     *
7102
     * @return string string in snake_case
7103
     */
7104
    public static function str_snakeize(string $str, string $encoding = 'UTF-8'): string
7105
    {
7106 20
        if ($str === '') {
7107
            return '';
7108
        }
7109
7110 20
        $str = \str_replace(
7111 20
            '-',
7112 20
            '_',
7113 20
            self::normalize_whitespace($str)
7114
        );
7115
7116 20
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7117 17
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7118
        }
7119
7120 20
        $str = (string) \preg_replace_callback(
7121 20
            '/([\d|\p{Lu}])/u',
7122
            /**
7123
             * @param string[] $matches
7124
             *
7125
             * @return string
7126
             */
7127
            static function (array $matches) use ($encoding): string {
7128 9
                $match = $matches[1];
7129 9
                $matchInt = (int) $match;
7130
7131 9
                if ((string) $matchInt === $match) {
7132 4
                    return '_' . $match . '_';
7133
                }
7134
7135 5
                if ($encoding === 'UTF-8') {
7136 5
                    return '_' . \mb_strtolower($match);
7137
                }
7138
7139
                return '_' . self::strtolower($match, $encoding);
7140 20
            },
7141 20
            $str
7142
        );
7143
7144 20
        $str = (string) \preg_replace(
7145
            [
7146 20
                '/\s+/u',        // convert spaces to "_"
7147
                '/^\s+|\s+$/u',  // trim leading & trailing spaces
7148
                '/_+/',         // remove double "_"
7149
            ],
7150
            [
7151 20
                '_',
7152
                '',
7153
                '_',
7154
            ],
7155 20
            $str
7156
        );
7157
7158 20
        return \trim(\trim($str, '_')); // trim leading & trailing "_" + whitespace
7159
    }
7160
7161
    /**
7162
     * Sort all characters according to code points.
7163
     *
7164
     * @param string $str    <p>A UTF-8 string.</p>
7165
     * @param bool   $unique <p>Sort unique. If <strong>true</strong>, repeated characters are ignored.</p>
7166
     * @param bool   $desc   <p>If <strong>true</strong>, will sort characters in reverse code point order.</p>
7167
     *
7168
     * @return string string of sorted characters
7169
     */
7170
    public static function str_sort(string $str, bool $unique = false, bool $desc = false): string
7171
    {
7172 2
        $array = self::codepoints($str);
7173
7174 2
        if ($unique) {
7175 2
            $array = \array_flip(\array_flip($array));
7176
        }
7177
7178 2
        if ($desc) {
7179 2
            \arsort($array);
0 ignored issues
show
Bug introduced by
It seems like $array can also be of type null; however, parameter $array of arsort() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

7179
            \arsort(/** @scrutinizer ignore-type */ $array);
Loading history...
7180
        } else {
7181 2
            \asort($array);
0 ignored issues
show
Bug introduced by
It seems like $array can also be of type null; however, parameter $array of asort() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

7181
            \asort(/** @scrutinizer ignore-type */ $array);
Loading history...
7182
        }
7183
7184 2
        return self::string($array);
7185
    }
7186
7187
    /**
7188
     * Convert a string to an array of Unicode characters.
7189
     *
7190
     * @param int|int[]|string|string[] $str                <p>The string to split into array.</p>
7191
     * @param int                       $length             [optional] <p>Max character length of each array
7192
     *                                                      element.</p>
7193
     * @param bool                      $cleanUtf8          [optional] <p>Remove non UTF-8 chars from the string.</p>
7194
     * @param bool                      $tryToUseMbFunction [optional] <p>Set to false, if you don't want to use
7195
     *                                                      "mb_substr"</p>
7196
     *
7197
     * @return array
7198
     *               <p>An array containing chunks of the input.</p>
7199
     */
7200
    public static function str_split(
7201
        $str,
7202
        int $length = 1,
7203
        bool $cleanUtf8 = false,
7204
        bool $tryToUseMbFunction = true
7205
    ): array {
7206 87
        if ($length <= 0) {
7207 3
            return [];
7208
        }
7209
7210 86
        if (\is_array($str) === true) {
7211 2
            foreach ($str as $k => &$v) {
7212 2
                $v = self::str_split(
7213 2
                    $v,
7214 2
                    $length,
7215 2
                    $cleanUtf8,
7216 2
                    $tryToUseMbFunction
7217
                );
7218
            }
7219
7220 2
            return $str;
7221
        }
7222
7223
        // init
7224 86
        $str = (string) $str;
7225
7226 86
        if ($str === '') {
7227 13
            return [];
7228
        }
7229
7230 83
        if ($cleanUtf8 === true) {
7231 19
            $str = self::clean($str);
7232
        }
7233
7234
        if (
7235 83
            $tryToUseMbFunction === true
7236
            &&
7237 83
            self::$SUPPORT['mbstring'] === true
7238
        ) {
7239 79
            $iMax = \mb_strlen($str);
7240 79
            if ($iMax <= 127) {
7241 73
                $ret = [];
7242 73
                for ($i = 0; $i < $iMax; ++$i) {
7243 73
                    $ret[] = \mb_substr($str, $i, 1);
7244
                }
7245
            } else {
7246 15
                $retArray = [];
7247 15
                \preg_match_all('/./us', $str, $retArray);
7248 79
                $ret = $retArray[0] ?? [];
7249
            }
7250 23
        } elseif (self::$SUPPORT['pcre_utf8'] === true) {
7251 17
            $retArray = [];
7252 17
            \preg_match_all('/./us', $str, $retArray);
7253 17
            $ret = $retArray[0] ?? [];
7254
        } else {
7255
7256
            // fallback
7257
7258 8
            $ret = [];
7259 8
            $len = \strlen($str);
7260
7261
            /** @noinspection ForeachInvariantsInspection */
7262 8
            for ($i = 0; $i < $len; ++$i) {
7263 8
                if (($str[$i] & "\x80") === "\x00") {
7264 8
                    $ret[] = $str[$i];
7265
                } elseif (
7266 8
                    isset($str[$i + 1])
7267
                    &&
7268 8
                    ($str[$i] & "\xE0") === "\xC0"
7269
                ) {
7270 4
                    if (($str[$i + 1] & "\xC0") === "\x80") {
7271 4
                        $ret[] = $str[$i] . $str[$i + 1];
7272
7273 4
                        ++$i;
7274
                    }
7275
                } elseif (
7276 6
                    isset($str[$i + 2])
7277
                    &&
7278 6
                    ($str[$i] & "\xF0") === "\xE0"
7279
                ) {
7280
                    if (
7281 6
                        ($str[$i + 1] & "\xC0") === "\x80"
7282
                        &&
7283 6
                        ($str[$i + 2] & "\xC0") === "\x80"
7284
                    ) {
7285 6
                        $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2];
7286
7287 6
                        $i += 2;
7288
                    }
7289
                } elseif (
7290
                    isset($str[$i + 3])
7291
                    &&
7292
                    ($str[$i] & "\xF8") === "\xF0"
7293
                ) {
7294
                    if (
7295
                        ($str[$i + 1] & "\xC0") === "\x80"
7296
                        &&
7297
                        ($str[$i + 2] & "\xC0") === "\x80"
7298
                        &&
7299
                        ($str[$i + 3] & "\xC0") === "\x80"
7300
                    ) {
7301
                        $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3];
7302
7303
                        $i += 3;
7304
                    }
7305
                }
7306
            }
7307
        }
7308
7309 83
        if ($length > 1) {
7310 11
            $ret = \array_chunk($ret, $length);
7311
7312 11
            return \array_map(
7313
                static function (array &$item): string {
7314 11
                    return \implode('', $item);
7315 11
                },
7316 11
                $ret
7317
            );
7318
        }
7319
7320 76
        if (isset($ret[0]) && $ret[0] === '') {
7321
            return [];
7322
        }
7323
7324 76
        return $ret;
7325
    }
7326
7327
    /**
7328
     * Splits the string with the provided regular expression, returning an
7329
     * array of Stringy objects. An optional integer $limit will truncate the
7330
     * results.
7331
     *
7332
     * @param string $str
7333
     * @param string $pattern <p>The regex with which to split the string.</p>
7334
     * @param int    $limit   [optional] <p>Maximum number of results to return. Default: -1 === no limit</p>
7335
     *
7336
     * @return string[] an array of strings
7337
     */
7338
    public static function str_split_pattern(string $str, string $pattern, int $limit = -1): array
7339
    {
7340 16
        if ($limit === 0) {
7341 2
            return [];
7342
        }
7343
7344 14
        if ($pattern === '') {
7345 1
            return [$str];
7346
        }
7347
7348 13
        if (self::$SUPPORT['mbstring'] === true) {
7349 13
            if ($limit >= 0) {
7350
                /** @noinspection PhpComposerExtensionStubsInspection */
7351 8
                $resultTmp = \mb_split($pattern, $str);
7352
7353 8
                $result = [];
7354 8
                foreach ($resultTmp as $itemTmp) {
7355 8
                    if ($limit === 0) {
7356 4
                        break;
7357
                    }
7358 8
                    --$limit;
7359
7360 8
                    $result[] = $itemTmp;
7361
                }
7362
7363 8
                return $result;
7364
            }
7365
7366
            /** @noinspection PhpComposerExtensionStubsInspection */
7367 5
            return \mb_split($pattern, $str);
7368
        }
7369
7370
        if ($limit > 0) {
7371
            ++$limit;
7372
        } else {
7373
            $limit = -1;
7374
        }
7375
7376
        $array = \preg_split('/' . \preg_quote($pattern, '/') . '/u', $str, $limit);
7377
7378
        if ($array === false) {
7379
            return [];
7380
        }
7381
7382
        if ($limit > 0 && \count($array) === $limit) {
7383
            \array_pop($array);
7384
        }
7385
7386
        return $array;
7387
    }
7388
7389
    /**
7390
     * Check if the string starts with the given substring.
7391
     *
7392
     * @param string $haystack <p>The string to search in.</p>
7393
     * @param string $needle   <p>The substring to search for.</p>
7394
     *
7395
     * @return bool
7396
     */
7397
    public static function str_starts_with(string $haystack, string $needle): bool
7398
    {
7399 19
        if ($needle === '') {
7400 2
            return true;
7401
        }
7402
7403 19
        if ($haystack === '') {
7404
            return false;
7405
        }
7406
7407 19
        return \strpos($haystack, $needle) === 0;
7408
    }
7409
7410
    /**
7411
     * Returns true if the string begins with any of $substrings, false otherwise.
7412
     *
7413
     * - case-sensitive
7414
     *
7415
     * @param string $str        <p>The input string.</p>
7416
     * @param array  $substrings <p>Substrings to look for.</p>
7417
     *
7418
     * @return bool whether or not $str starts with $substring
7419
     */
7420
    public static function str_starts_with_any(string $str, array $substrings): bool
7421
    {
7422 8
        if ($str === '') {
7423
            return false;
7424
        }
7425
7426 8
        if ($substrings === []) {
7427
            return false;
7428
        }
7429
7430 8
        foreach ($substrings as &$substring) {
7431 8
            if (self::str_starts_with($str, $substring)) {
7432 8
                return true;
7433
            }
7434
        }
7435
7436 6
        return false;
7437
    }
7438
7439
    /**
7440
     * Gets the substring after the first occurrence of a separator.
7441
     *
7442
     * @param string $str       <p>The input string.</p>
7443
     * @param string $separator <p>The string separator.</p>
7444
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7445
     *
7446
     * @return string
7447
     */
7448
    public static function str_substr_after_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
7449
    {
7450 1
        if ($separator === '' || $str === '') {
7451 1
            return '';
7452
        }
7453
7454 1
        if ($encoding === 'UTF-8') {
7455 1
            $offset = \mb_strpos($str, $separator);
7456 1
            if ($offset === false) {
7457 1
                return '';
7458
            }
7459
7460 1
            return (string) \mb_substr(
7461 1
                $str,
7462 1
                $offset + (int) \mb_strlen($separator)
7463
            );
7464
        }
7465
7466
        $offset = self::strpos($str, $separator, 0, $encoding);
7467
        if ($offset === false) {
7468
            return '';
7469
        }
7470
7471
        return (string) \mb_substr(
7472
            $str,
7473
            $offset + (int) self::strlen($separator, $encoding),
7474
            null,
7475
            $encoding
7476
        );
7477
    }
7478
7479
    /**
7480
     * Gets the substring after the last occurrence of a separator.
7481
     *
7482
     * @param string $str       <p>The input string.</p>
7483
     * @param string $separator <p>The string separator.</p>
7484
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7485
     *
7486
     * @return string
7487
     */
7488
    public static function str_substr_after_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
7489
    {
7490 1
        if ($separator === '' || $str === '') {
7491 1
            return '';
7492
        }
7493
7494 1
        if ($encoding === 'UTF-8') {
7495 1
            $offset = \mb_strrpos($str, $separator);
7496 1
            if ($offset === false) {
7497 1
                return '';
7498
            }
7499
7500 1
            return (string) \mb_substr(
7501 1
                $str,
7502 1
                $offset + (int) \mb_strlen($separator)
7503
            );
7504
        }
7505
7506
        $offset = self::strrpos($str, $separator, 0, $encoding);
7507
        if ($offset === false) {
7508
            return '';
7509
        }
7510
7511
        return (string) self::substr(
7512
            $str,
7513
            $offset + (int) self::strlen($separator, $encoding),
7514
            null,
7515
            $encoding
7516
        );
7517
    }
7518
7519
    /**
7520
     * Gets the substring before the first occurrence of a separator.
7521
     *
7522
     * @param string $str       <p>The input string.</p>
7523
     * @param string $separator <p>The string separator.</p>
7524
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7525
     *
7526
     * @return string
7527
     */
7528
    public static function str_substr_before_first_separator(
7529
        string $str,
7530
        string $separator,
7531
        string $encoding = 'UTF-8'
7532
    ): string {
7533 1
        if ($separator === '' || $str === '') {
7534 1
            return '';
7535
        }
7536
7537 1
        if ($encoding === 'UTF-8') {
7538 1
            $offset = \mb_strpos($str, $separator);
7539 1
            if ($offset === false) {
7540 1
                return '';
7541
            }
7542
7543 1
            return (string) \mb_substr(
7544 1
                $str,
7545 1
                0,
7546 1
                $offset
7547
            );
7548
        }
7549
7550
        $offset = self::strpos($str, $separator, 0, $encoding);
7551
        if ($offset === false) {
7552
            return '';
7553
        }
7554
7555
        return (string) self::substr(
7556
            $str,
7557
            0,
7558
            $offset,
7559
            $encoding
7560
        );
7561
    }
7562
7563
    /**
7564
     * Gets the substring before the last occurrence of a separator.
7565
     *
7566
     * @param string $str       <p>The input string.</p>
7567
     * @param string $separator <p>The string separator.</p>
7568
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7569
     *
7570
     * @return string
7571
     */
7572
    public static function str_substr_before_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
7573
    {
7574 1
        if ($separator === '' || $str === '') {
7575 1
            return '';
7576
        }
7577
7578 1
        if ($encoding === 'UTF-8') {
7579 1
            $offset = \mb_strrpos($str, $separator);
7580 1
            if ($offset === false) {
7581 1
                return '';
7582
            }
7583
7584 1
            return (string) \mb_substr(
7585 1
                $str,
7586 1
                0,
7587 1
                $offset
7588
            );
7589
        }
7590
7591
        $offset = self::strrpos($str, $separator, 0, $encoding);
7592
        if ($offset === false) {
7593
            return '';
7594
        }
7595
7596
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
7597
7598
        return (string) self::substr(
7599
            $str,
7600
            0,
7601
            $offset,
7602
            $encoding
7603
        );
7604
    }
7605
7606
    /**
7607
     * Gets the substring after (or before via "$beforeNeedle") the first occurrence of the "$needle".
7608
     *
7609
     * @param string $str          <p>The input string.</p>
7610
     * @param string $needle       <p>The string to look for.</p>
7611
     * @param bool   $beforeNeedle [optional] <p>Default: false</p>
7612
     * @param string $encoding     [optional] <p>Default: 'UTF-8'</p>
7613
     *
7614
     * @return string
7615
     */
7616
    public static function str_substr_first(
7617
        string $str,
7618
        string $needle,
7619
        bool $beforeNeedle = false,
7620
        string $encoding = 'UTF-8'
7621
    ): string {
7622 2
        if ($str === '' || $needle === '') {
7623 2
            return '';
7624
        }
7625
7626 2
        if ($encoding === 'UTF-8') {
7627 2
            if ($beforeNeedle === true) {
7628 1
                $part = \mb_strstr(
7629 1
                    $str,
7630 1
                    $needle,
7631 1
                    $beforeNeedle
7632
                );
7633
            } else {
7634 1
                $part = \mb_strstr(
7635 1
                    $str,
7636 2
                    $needle
7637
                );
7638
            }
7639
        } else {
7640
            $part = self::strstr(
7641
                $str,
7642
                $needle,
7643
                $beforeNeedle,
7644
                $encoding
7645
            );
7646
        }
7647
7648 2
        return $part === false ? '' : $part;
7649
    }
7650
7651
    /**
7652
     * Gets the substring after (or before via "$beforeNeedle") the last occurrence of the "$needle".
7653
     *
7654
     * @param string $str          <p>The input string.</p>
7655
     * @param string $needle       <p>The string to look for.</p>
7656
     * @param bool   $beforeNeedle [optional] <p>Default: false</p>
7657
     * @param string $encoding     [optional] <p>Default: 'UTF-8'</p>
7658
     *
7659
     * @return string
7660
     */
7661
    public static function str_substr_last(
7662
        string $str,
7663
        string $needle,
7664
        bool $beforeNeedle = false,
7665
        string $encoding = 'UTF-8'
7666
    ): string {
7667 2
        if ($str === '' || $needle === '') {
7668 2
            return '';
7669
        }
7670
7671 2
        if ($encoding === 'UTF-8') {
7672 2
            if ($beforeNeedle === true) {
7673 1
                $part = \mb_strrchr(
7674 1
                    $str,
7675 1
                    $needle,
7676 1
                    $beforeNeedle
7677
                );
7678
            } else {
7679 1
                $part = \mb_strrchr(
7680 1
                    $str,
7681 2
                    $needle
7682
                );
7683
            }
7684
        } else {
7685
            $part = self::strrchr(
7686
                $str,
7687
                $needle,
7688
                $beforeNeedle,
7689
                $encoding
7690
            );
7691
        }
7692
7693 2
        return $part === false ? '' : $part;
7694
    }
7695
7696
    /**
7697
     * Surrounds $str with the given substring.
7698
     *
7699
     * @param string $str
7700
     * @param string $substring <p>The substring to add to both sides.</P>
7701
     *
7702
     * @return string string with the substring both prepended and appended
7703
     */
7704
    public static function str_surround(string $str, string $substring): string
7705
    {
7706 5
        return $substring . $str . $substring;
7707
    }
7708
7709
    /**
7710
     * Returns a trimmed string with the first letter of each word capitalized.
7711
     * Also accepts an array, $ignore, allowing you to list words not to be
7712
     * capitalized.
7713
     *
7714
     * @param string              $str
7715
     * @param array|string[]|null $ignore                [optional] <p>An array of words not to capitalize or null.
7716
     *                                                   Default: null</p>
7717
     * @param string              $encoding              [optional] <p>Default: 'UTF-8'</p>
7718
     * @param bool                $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
7719
     * @param string|null         $lang                  [optional] <p>Set the language for special cases: az, el, lt,
7720
     *                                                   tr</p>
7721
     * @param bool                $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ ->
7722
     *                                                   ß</p>
7723
     * @param bool                $useTrimFirst          [optional] <p>true === trim the input string, first</p>
7724
     *
7725
     * @return string the titleized string
7726
     */
7727
    public static function str_titleize(
7728
        string $str,
7729
        array $ignore = null,
7730
        string $encoding = 'UTF-8',
7731
        bool $cleanUtf8 = false,
7732
        string $lang = null,
7733
        bool $tryToKeepStringLength = false,
7734
        bool $useTrimFirst = true
7735
    ): string {
7736 5
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7737 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7738
        }
7739
7740 5
        if ($useTrimFirst === true) {
7741 5
            $str = \trim($str);
7742
        }
7743
7744 5
        if ($cleanUtf8 === true) {
7745
            $str = self::clean($str);
7746
        }
7747
7748 5
        $useMbFunction = $lang === null && $tryToKeepStringLength === false;
7749
7750 5
        return (string) \preg_replace_callback(
7751 5
            '/([\S]+)/u',
7752
            static function (array $match) use ($tryToKeepStringLength, $lang, $ignore, $useMbFunction, $encoding): string {
7753 5
                if ($ignore !== null && \in_array($match[0], $ignore, true)) {
7754 2
                    return $match[0];
7755
                }
7756
7757 5
                if ($useMbFunction === true) {
7758 5
                    if ($encoding === 'UTF-8') {
7759 5
                        return \mb_strtoupper(\mb_substr($match[0], 0, 1))
7760 5
                               . \mb_strtolower(\mb_substr($match[0], 1));
7761
                    }
7762
7763
                    return \mb_strtoupper(\mb_substr($match[0], 0, 1, $encoding), $encoding)
7764
                           . \mb_strtolower(\mb_substr($match[0], 1, null, $encoding), $encoding);
7765
                }
7766
7767
                return self::ucfirst(
7768
                    self::strtolower(
7769
                        $match[0],
7770
                        $encoding,
7771
                        false,
7772
                        $lang,
7773
                        $tryToKeepStringLength
7774
                    ),
7775
                    $encoding,
7776
                    false,
7777
                    $lang,
7778
                    $tryToKeepStringLength
7779
                );
7780 5
            },
7781 5
            $str
7782
        );
7783
    }
7784
7785
    /**
7786
     * Returns a trimmed string in proper title case.
7787
     *
7788
     * Also accepts an array, $ignore, allowing you to list words not to be
7789
     * capitalized.
7790
     *
7791
     * Adapted from John Gruber's script.
7792
     *
7793
     * @see https://gist.github.com/gruber/9f9e8650d68b13ce4d78
7794
     *
7795
     * @param string $str
7796
     * @param array  $ignore   <p>An array of words not to capitalize.</p>
7797
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7798
     *
7799
     * @return string the titleized string
7800
     */
7801
    public static function str_titleize_for_humans(string $str, array $ignore = [], string $encoding = 'UTF-8'): string
7802
    {
7803 35
        $smallWords = \array_merge(
7804
            [
7805 35
                '(?<!q&)a',
7806
                'an',
7807
                'and',
7808
                'as',
7809
                'at(?!&t)',
7810
                'but',
7811
                'by',
7812
                'en',
7813
                'for',
7814
                'if',
7815
                'in',
7816
                'of',
7817
                'on',
7818
                'or',
7819
                'the',
7820
                'to',
7821
                'v[.]?',
7822
                'via',
7823
                'vs[.]?',
7824
            ],
7825 35
            $ignore
7826
        );
7827
7828 35
        $smallWordsRx = \implode('|', $smallWords);
7829 35
        $apostropheRx = '(?x: [\'’] [[:lower:]]* )?';
7830
7831 35
        $str = \trim($str);
7832
7833 35
        if (self::has_lowercase($str) === false) {
7834 2
            $str = self::strtolower($str, $encoding);
7835
        }
7836
7837
        // the main substitutions
7838 35
        $str = (string) \preg_replace_callback(
7839
            '~\b (_*) (?:                                                              # 1. Leading underscore and
7840
                        ( (?<=[ ][/\\\\]) [[:alpha:]]+ [-_[:alpha:]/\\\\]+ |              # 2. file path or 
7841 35
                          [-_[:alpha:]]+ [@.:] [-_[:alpha:]@.:/]+ ' . $apostropheRx . ' ) #    URL, domain, or email
7842
                        |
7843 35
                        ( (?i: ' . $smallWordsRx . ' ) ' . $apostropheRx . ' )            # 3. or small word (case-insensitive)
7844
                        |
7845 35
                        ( [[:alpha:]] [[:lower:]\'’()\[\]{}]* ' . $apostropheRx . ' )     # 4. or word w/o internal caps
7846
                        |
7847 35
                        ( [[:alpha:]] [[:alpha:]\'’()\[\]{}]* ' . $apostropheRx . ' )     # 5. or some other word
7848
                      ) (_*) \b                                                           # 6. With trailing underscore
7849
                    ~ux',
7850
            /**
7851
             * @param string[] $matches
7852
             *
7853
             * @return string
7854
             */
7855
            static function (array $matches) use ($encoding): string {
7856
                // preserve leading underscore
7857 35
                $str = $matches[1];
7858 35
                if ($matches[2]) {
7859
                    // preserve URLs, domains, emails and file paths
7860 5
                    $str .= $matches[2];
7861 35
                } elseif ($matches[3]) {
7862
                    // lower-case small words
7863 25
                    $str .= self::strtolower($matches[3], $encoding);
7864 35
                } elseif ($matches[4]) {
7865
                    // capitalize word w/o internal caps
7866 34
                    $str .= static::str_upper_first($matches[4], $encoding);
7867
                } else {
7868
                    // preserve other kinds of word (iPhone)
7869 7
                    $str .= $matches[5];
7870
                }
7871
                // Preserve trailing underscore
7872 35
                $str .= $matches[6];
7873
7874 35
                return $str;
7875 35
            },
7876 35
            $str
7877
        );
7878
7879
        // Exceptions for small words: capitalize at start of title...
7880 35
        $str = (string) \preg_replace_callback(
7881
            '~(  \A [[:punct:]]*                # start of title...
7882
                      |  [:.;?!][ ]+               # or of subsentence...
7883
                      |  [ ][\'"“‘(\[][ ]* )       # or of inserted subphrase...
7884 35
                      ( ' . $smallWordsRx . ' ) \b # ...followed by small word
7885
                     ~uxi',
7886
            /**
7887
             * @param string[] $matches
7888
             *
7889
             * @return string
7890
             */
7891
            static function (array $matches) use ($encoding): string {
7892 11
                return $matches[1] . static::str_upper_first($matches[2], $encoding);
7893 35
            },
7894 35
            $str
7895
        );
7896
7897
        // ...and end of title
7898 35
        $str = (string) \preg_replace_callback(
7899 35
            '~\b ( ' . $smallWordsRx . ' ) # small word...
7900
                      (?= [[:punct:]]* \Z     # ...at the end of the title...
7901
                      |   [\'"’”)\]] [ ] )    # ...or of an inserted subphrase?
7902
                     ~uxi',
7903
            /**
7904
             * @param string[] $matches
7905
             *
7906
             * @return string
7907
             */
7908
            static function (array $matches) use ($encoding): string {
7909 3
                return static::str_upper_first($matches[1], $encoding);
7910 35
            },
7911 35
            $str
7912
        );
7913
7914
        // Exceptions for small words in hyphenated compound words.
7915
        // e.g. "in-flight" -> In-Flight
7916 35
        $str = (string) \preg_replace_callback(
7917
            '~\b
7918
                        (?<! -)                   # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (in-flight)
7919 35
                        ( ' . $smallWordsRx . ' )
7920
                        (?= -[[:alpha:]]+)        # lookahead for "-someword"
7921
                       ~uxi',
7922
            /**
7923
             * @param string[] $matches
7924
             *
7925
             * @return string
7926
             */
7927
            static function (array $matches) use ($encoding): string {
7928
                return static::str_upper_first($matches[1], $encoding);
7929 35
            },
7930 35
            $str
7931
        );
7932
7933
        // e.g. "Stand-in" -> "Stand-In" (Stand is already capped at this point)
7934 35
        $str = (string) \preg_replace_callback(
7935
            '~\b
7936
                      (?<!…)                    # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (stand-in)
7937
                      ( [[:alpha:]]+- )         # $1 = first word and hyphen, should already be properly capped
7938 35
                      ( ' . $smallWordsRx . ' ) # ...followed by small word
7939
                      (?!	- )                   # Negative lookahead for another -
7940
                     ~uxi',
7941
            /**
7942
             * @param string[] $matches
7943
             *
7944
             * @return string
7945
             */
7946
            static function (array $matches) use ($encoding): string {
7947
                return $matches[1] . static::str_upper_first($matches[2], $encoding);
7948 35
            },
7949 35
            $str
7950
        );
7951
7952 35
        return $str;
7953
    }
7954
7955
    /**
7956
     * Get a binary representation of a specific string.
7957
     *
7958
     * @param string $str <p>The input string.</p>
7959
     *
7960
     * @return string
7961
     */
7962
    public static function str_to_binary(string $str): string
7963
    {
7964 2
        $value = \unpack('H*', $str);
7965
7966 2
        return \base_convert($value[1], 16, 2);
7967
    }
7968
7969
    /**
7970
     * @param string   $str
7971
     * @param bool     $removeEmptyValues <p>Remove empty values.</p>
7972
     * @param int|null $removeShortValues <p>The min. string length or null to disable</p>
7973
     *
7974
     * @return string[]
7975
     */
7976
    public static function str_to_lines(string $str, bool $removeEmptyValues = false, int $removeShortValues = null): array
7977
    {
7978 17
        if ($str === '') {
7979 1
            return $removeEmptyValues === true ? [] : [''];
7980
        }
7981
7982 16
        if (self::$SUPPORT['mbstring'] === true) {
7983
            /** @noinspection PhpComposerExtensionStubsInspection */
7984 16
            $return = \mb_split('[\r\n]{1,2}', $str);
7985
        } else {
7986
            $return = \preg_split("/[\r\n]{1,2}/u", $str);
7987
        }
7988
7989 16
        if ($return === false) {
7990
            return $removeEmptyValues === true ? [] : [''];
7991
        }
7992
7993
        if (
7994 16
            $removeShortValues === null
7995
            &&
7996 16
            $removeEmptyValues === false
7997
        ) {
7998 16
            return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
7999
        }
8000
8001
        return self::reduce_string_array(
8002
            $return,
8003
            $removeEmptyValues,
8004
            $removeShortValues
8005
        );
8006
    }
8007
8008
    /**
8009
     * Convert a string into an array of words.
8010
     *
8011
     * @param string   $str
8012
     * @param string   $charList          <p>Additional chars for the definition of "words".</p>
8013
     * @param bool     $removeEmptyValues <p>Remove empty values.</p>
8014
     * @param int|null $removeShortValues <p>The min. string length or null to disable</p>
8015
     *
8016
     * @return string[]
8017
     */
8018
    public static function str_to_words(
8019
        string $str,
8020
        string $charList = '',
8021
        bool $removeEmptyValues = false,
8022
        int $removeShortValues = null
8023
    ): array {
8024 13
        if ($str === '') {
8025 4
            return $removeEmptyValues === true ? [] : [''];
8026
        }
8027
8028 13
        $charList = self::rxClass($charList, '\pL');
8029
8030 13
        $return = \preg_split("/({$charList}+(?:[\p{Pd}’']{$charList}+)*)/u", $str, -1, \PREG_SPLIT_DELIM_CAPTURE);
8031 13
        if ($return === false) {
8032
            return $removeEmptyValues === true ? [] : [''];
8033
        }
8034
8035
        if (
8036 13
            $removeShortValues === null
8037
            &&
8038 13
            $removeEmptyValues === false
8039
        ) {
8040 13
            return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
8041
        }
8042
8043 2
        $tmpReturn = self::reduce_string_array(
8044 2
            $return,
8045 2
            $removeEmptyValues,
8046 2
            $removeShortValues
8047
        );
8048
8049 2
        foreach ($tmpReturn as &$item) {
8050 2
            $item = (string) $item;
8051
        }
8052
8053 2
        return $tmpReturn;
8054
    }
8055
8056
    /**
8057
     * alias for "UTF8::to_ascii()"
8058
     *
8059
     * @param string $str
8060
     * @param string $unknown
8061
     * @param bool   $strict
8062
     *
8063
     * @return string
8064
     *
8065
     * @see UTF8::to_ascii()
8066
     */
8067
    public static function str_transliterate(string $str, string $unknown = '?', bool $strict = false): string
8068
    {
8069 8
        return self::to_ascii($str, $unknown, $strict);
8070
    }
8071
8072
    /**
8073
     * Truncates the string to a given length. If $substring is provided, and
8074
     * truncating occurs, the string is further truncated so that the substring
8075
     * may be appended without exceeding the desired length.
8076
     *
8077
     * @param string $str
8078
     * @param int    $length    <p>Desired length of the truncated string.</p>
8079
     * @param string $substring [optional] <p>The substring to append if it can fit. Default: ''</p>
8080
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
8081
     *
8082
     * @return string string after truncating
8083
     */
8084
    public static function str_truncate(
8085
        string $str,
8086
        int $length,
8087
        string $substring = '',
8088
        string $encoding = 'UTF-8'
8089
    ): string {
8090 22
        if ($str === '') {
8091
            return '';
8092
        }
8093
8094 22
        if ($encoding === 'UTF-8') {
8095 10
            if ($length >= (int) \mb_strlen($str)) {
8096 2
                return $str;
8097
            }
8098
8099 8
            if ($substring !== '') {
8100 4
                $length -= (int) \mb_strlen($substring);
8101
8102
                /** @noinspection UnnecessaryCastingInspection */
8103 4
                return (string) \mb_substr($str, 0, $length) . $substring;
8104
            }
8105
8106
            /** @noinspection UnnecessaryCastingInspection */
8107 4
            return (string) \mb_substr($str, 0, $length);
8108
        }
8109
8110 12
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
8111
8112 12
        if ($length >= (int) self::strlen($str, $encoding)) {
8113 2
            return $str;
8114
        }
8115
8116 10
        if ($substring !== '') {
8117 6
            $length -= (int) self::strlen($substring, $encoding);
8118
        }
8119
8120
        return (
8121 10
               (string) self::substr(
8122 10
                   $str,
8123 10
                   0,
8124 10
                   $length,
8125 10
                   $encoding
8126
               )
8127 10
               ) . $substring;
8128
    }
8129
8130
    /**
8131
     * Truncates the string to a given length, while ensuring that it does not
8132
     * split words. If $substring is provided, and truncating occurs, the
8133
     * string is further truncated so that the substring may be appended without
8134
     * exceeding the desired length.
8135
     *
8136
     * @param string $str
8137
     * @param int    $length                          <p>Desired length of the truncated string.</p>
8138
     * @param string $substring                       [optional] <p>The substring to append if it can fit. Default:
8139
     *                                                ''</p>
8140
     * @param string $encoding                        [optional] <p>Default: 'UTF-8'</p>
8141
     * @param bool   $ignoreDoNotSplitWordsForOneWord [optional] <p>Default: false</p>
8142
     *
8143
     * @return string string after truncating
8144
     */
8145
    public static function str_truncate_safe(
8146
        string $str,
8147
        int $length,
8148
        string $substring = '',
8149
        string $encoding = 'UTF-8',
8150
        bool $ignoreDoNotSplitWordsForOneWord = false
8151
    ): string {
8152 47
        if ($str === '' || $length <= 0) {
8153 1
            return $substring;
8154
        }
8155
8156 47
        if ($encoding === 'UTF-8') {
8157 21
            if ($length >= (int) \mb_strlen($str)) {
8158 5
                return $str;
8159
            }
8160
8161
            // need to further trim the string so we can append the substring
8162 17
            $length -= (int) \mb_strlen($substring);
8163 17
            if ($length <= 0) {
8164 1
                return $substring;
8165
            }
8166
8167 17
            $truncated = \mb_substr($str, 0, $length);
8168
8169 17
            if ($truncated === false) {
8170
                return '';
8171
            }
8172
8173
            // if the last word was truncated
8174 17
            $strPosSpace = \mb_strpos($str, ' ', $length - 1);
8175 17
            if ($strPosSpace !== $length) {
8176
                // find pos of the last occurrence of a space, get up to that
8177 13
                $lastPos = \mb_strrpos($truncated, ' ', 0);
8178
8179
                if (
8180 13
                    $lastPos !== false
8181
                    ||
8182 13
                    ($strPosSpace !== false && $ignoreDoNotSplitWordsForOneWord === false)
8183
                ) {
8184 17
                    $truncated = (string) \mb_substr($truncated, 0, (int) $lastPos);
8185
                }
8186
            }
8187
        } else {
8188 26
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8189
8190 26
            if ($length >= (int) self::strlen($str, $encoding)) {
8191 4
                return $str;
8192
            }
8193
8194
            // need to further trim the string so we can append the substring
8195 22
            $length -= (int) self::strlen($substring, $encoding);
8196 22
            if ($length <= 0) {
8197
                return $substring;
8198
            }
8199
8200 22
            $truncated = self::substr($str, 0, $length, $encoding);
8201
8202 22
            if ($truncated === false) {
8203
                return '';
8204
            }
8205
8206
            // if the last word was truncated
8207 22
            $strPosSpace = self::strpos($str, ' ', $length - 1, $encoding);
8208 22
            if ($strPosSpace !== $length) {
8209
                // find pos of the last occurrence of a space, get up to that
8210 12
                $lastPos = self::strrpos($truncated, ' ', 0, $encoding);
8211
8212
                if (
8213 12
                    $lastPos !== false
8214
                    ||
8215 12
                    ($strPosSpace !== false && $ignoreDoNotSplitWordsForOneWord === false)
8216
                ) {
8217 9
                    $truncated = (string) self::substr($truncated, 0, (int) $lastPos, $encoding);
8218
                }
8219
            }
8220
        }
8221
8222 39
        return $truncated . $substring;
8223
    }
8224
8225
    /**
8226
     * Returns a lowercase and trimmed string separated by underscores.
8227
     * Underscores are inserted before uppercase characters (with the exception
8228
     * of the first character of the string), and in place of spaces as well as
8229
     * dashes.
8230
     *
8231
     * @param string $str
8232
     *
8233
     * @return string the underscored string
8234
     */
8235
    public static function str_underscored(string $str): string
8236
    {
8237 16
        return self::str_delimit($str, '_');
8238
    }
8239
8240
    /**
8241
     * Returns an UpperCamelCase version of the supplied string. It trims
8242
     * surrounding spaces, capitalizes letters following digits, spaces, dashes
8243
     * and underscores, and removes spaces, dashes, underscores.
8244
     *
8245
     * @param string      $str                   <p>The input string.</p>
8246
     * @param string      $encoding              [optional] <p>Default: 'UTF-8'</p>
8247
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
8248
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
8249
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
8250
     *
8251
     * @return string string in UpperCamelCase
8252
     */
8253
    public static function str_upper_camelize(
8254
        string $str,
8255
        string $encoding = 'UTF-8',
8256
        bool $cleanUtf8 = false,
8257
        string $lang = null,
8258
        bool $tryToKeepStringLength = false
8259
    ): string {
8260 13
        return self::ucfirst(self::str_camelize($str, $encoding), $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
8261
    }
8262
8263
    /**
8264
     * alias for "UTF8::ucfirst()"
8265
     *
8266
     * @param string      $str
8267
     * @param string      $encoding
8268
     * @param bool        $cleanUtf8
8269
     * @param string|null $lang
8270
     * @param bool        $tryToKeepStringLength
8271
     *
8272
     * @return string
8273
     *
8274
     * @see UTF8::ucfirst()
8275
     */
8276
    public static function str_upper_first(
8277
        string $str,
8278
        string $encoding = 'UTF-8',
8279
        bool $cleanUtf8 = false,
8280
        string $lang = null,
8281
        bool $tryToKeepStringLength = false
8282
    ): string {
8283 39
        return self::ucfirst($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
8284
    }
8285
8286
    /**
8287
     * Counts number of words in the UTF-8 string.
8288
     *
8289
     * @param string $str      <p>The input string.</p>
8290
     * @param int    $format   [optional] <p>
8291
     *                         <strong>0</strong> => return a number of words (default)<br>
8292
     *                         <strong>1</strong> => return an array of words<br>
8293
     *                         <strong>2</strong> => return an array of words with word-offset as key
8294
     *                         </p>
8295
     * @param string $charlist [optional] <p>Additional chars that contains to words and do not start a new word.</p>
8296
     *
8297
     * @return int|string[] The number of words in the string
8298
     */
8299
    public static function str_word_count(string $str, int $format = 0, string $charlist = '')
8300
    {
8301 2
        $strParts = self::str_to_words($str, $charlist);
8302
8303 2
        $len = \count($strParts);
8304
8305 2
        if ($format === 1) {
8306 2
            $numberOfWords = [];
8307 2
            for ($i = 1; $i < $len; $i += 2) {
8308 2
                $numberOfWords[] = $strParts[$i];
8309
            }
8310 2
        } elseif ($format === 2) {
8311 2
            $numberOfWords = [];
8312 2
            $offset = (int) self::strlen($strParts[0]);
8313 2
            for ($i = 1; $i < $len; $i += 2) {
8314 2
                $numberOfWords[$offset] = $strParts[$i];
8315 2
                $offset += (int) self::strlen($strParts[$i]) + (int) self::strlen($strParts[$i + 1]);
8316
            }
8317
        } else {
8318 2
            $numberOfWords = (int) (($len - 1) / 2);
8319
        }
8320
8321 2
        return $numberOfWords;
8322
    }
8323
8324
    /**
8325
     * Case-insensitive string comparison.
8326
     *
8327
     * INFO: Case-insensitive version of UTF8::strcmp()
8328
     *
8329
     * @param string $str1     <p>The first string.</p>
8330
     * @param string $str2     <p>The second string.</p>
8331
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8332
     *
8333
     * @return int
8334
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
8335
     *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
8336
     *             <strong>0</strong> if they are equal
8337
     */
8338
    public static function strcasecmp(string $str1, string $str2, string $encoding = 'UTF-8'): int
8339
    {
8340 23
        return self::strcmp(
8341 23
            self::strtocasefold($str1, true, false, $encoding, null, false),
8342 23
            self::strtocasefold($str2, true, false, $encoding, null, false)
8343
        );
8344
    }
8345
8346
    /**
8347
     * alias for "UTF8::strstr()"
8348
     *
8349
     * @param string $haystack
8350
     * @param string $needle
8351
     * @param bool   $before_needle
8352
     * @param string $encoding
8353
     * @param bool   $cleanUtf8
8354
     *
8355
     * @return false|string
8356
     *
8357
     * @see UTF8::strstr()
8358
     */
8359
    public static function strchr(
8360
        string $haystack,
8361
        string $needle,
8362
        bool $before_needle = false,
8363
        string $encoding = 'UTF-8',
8364
        bool $cleanUtf8 = false
8365
    ) {
8366 2
        return self::strstr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
8367
    }
8368
8369
    /**
8370
     * Case-sensitive string comparison.
8371
     *
8372
     * @param string $str1 <p>The first string.</p>
8373
     * @param string $str2 <p>The second string.</p>
8374
     *
8375
     * @return int
8376
     *             <strong>&lt; 0</strong> if str1 is less than str2<br>
8377
     *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
8378
     *             <strong>0</strong> if they are equal
8379
     */
8380
    public static function strcmp(string $str1, string $str2): int
8381
    {
8382 29
        return $str1 . '' === $str2 . '' ? 0 : \strcmp(
8383 24
            \Normalizer::normalize($str1, \Normalizer::NFD),
8384 29
            \Normalizer::normalize($str2, \Normalizer::NFD)
8385
        );
8386
    }
8387
8388
    /**
8389
     * Find length of initial segment not matching mask.
8390
     *
8391
     * @param string $str
8392
     * @param string $charList
8393
     * @param int    $offset
8394
     * @param int    $length
8395
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8396
     *
8397
     * @return int
8398
     */
8399
    public static function strcspn(
8400
        string $str,
8401
        string $charList,
8402
        int $offset = null,
8403
        int $length = null,
8404
        string $encoding = 'UTF-8'
8405
    ): int {
8406 12
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8407
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8408
        }
8409
8410 12
        if ($charList === '') {
8411 2
            return (int) self::strlen($str, $encoding);
8412
        }
8413
8414 11
        if ($offset !== null || $length !== null) {
8415 3
            if ($encoding === 'UTF-8') {
8416 3
                if ($length === null) {
8417
                    /** @noinspection UnnecessaryCastingInspection */
8418 2
                    $strTmp = \mb_substr($str, (int) $offset);
8419
                } else {
8420
                    /** @noinspection UnnecessaryCastingInspection */
8421 3
                    $strTmp = \mb_substr($str, (int) $offset, $length);
8422
                }
8423
            } else {
8424
                /** @noinspection UnnecessaryCastingInspection */
8425
                $strTmp = self::substr($str, (int) $offset, $length, $encoding);
8426
            }
8427
8428 3
            if ($strTmp === false) {
8429
                return 0;
8430
            }
8431
8432 3
            $str = $strTmp;
8433
        }
8434
8435 11
        if ($str === '') {
8436 2
            return 0;
8437
        }
8438
8439 10
        $matches = [];
8440 10
        if (\preg_match('/^(.*?)' . self::rxClass($charList) . '/us', $str, $matches)) {
8441 9
            $return = self::strlen($matches[1], $encoding);
8442 9
            if ($return === false) {
8443
                return 0;
8444
            }
8445
8446 9
            return $return;
8447
        }
8448
8449 2
        return (int) self::strlen($str, $encoding);
8450
    }
8451
8452
    /**
8453
     * alias for "UTF8::stristr()"
8454
     *
8455
     * @param string $haystack
8456
     * @param string $needle
8457
     * @param bool   $before_needle
8458
     * @param string $encoding
8459
     * @param bool   $cleanUtf8
8460
     *
8461
     * @return false|string
8462
     *
8463
     * @see UTF8::stristr()
8464
     */
8465
    public static function strichr(
8466
        string $haystack,
8467
        string $needle,
8468
        bool $before_needle = false,
8469
        string $encoding = 'UTF-8',
8470
        bool $cleanUtf8 = false
8471
    ) {
8472 1
        return self::stristr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
8473
    }
8474
8475
    /**
8476
     * Create a UTF-8 string from code points.
8477
     *
8478
     * INFO: opposite to UTF8::codepoints()
8479
     *
8480
     * @param array $array <p>Integer or Hexadecimal codepoints.</p>
8481
     *
8482
     * @return string UTF-8 encoded string
8483
     */
8484
    public static function string(array $array): string
8485
    {
8486 4
        return \implode(
8487 4
            '',
8488 4
            \array_map(
8489
                [
8490 4
                    self::class,
8491
                    'chr',
8492
                ],
8493 4
                $array
8494
            )
8495
        );
8496
    }
8497
8498
    /**
8499
     * Checks if string starts with "BOM" (Byte Order Mark Character) character.
8500
     *
8501
     * @param string $str <p>The input string.</p>
8502
     *
8503
     * @return bool
8504
     *              <strong>true</strong> if the string has BOM at the start,<br>
8505
     *              <strong>false</strong> otherwise
8506
     */
8507
    public static function string_has_bom(string $str): bool
8508
    {
8509
        /** @noinspection PhpUnusedLocalVariableInspection */
8510 6
        foreach (self::$BOM as $bomString => &$bomByteLength) {
8511 6
            if (\strpos($str, $bomString) === 0) {
8512 6
                return true;
8513
            }
8514
        }
8515
8516 6
        return false;
8517
    }
8518
8519
    /**
8520
     * Strip HTML and PHP tags from a string + clean invalid UTF-8.
8521
     *
8522
     * @see http://php.net/manual/en/function.strip-tags.php
8523
     *
8524
     * @param string $str            <p>
8525
     *                               The input string.
8526
     *                               </p>
8527
     * @param string $allowable_tags [optional] <p>
8528
     *                               You can use the optional second parameter to specify tags which should
8529
     *                               not be stripped.
8530
     *                               </p>
8531
     *                               <p>
8532
     *                               HTML comments and PHP tags are also stripped. This is hardcoded and
8533
     *                               can not be changed with allowable_tags.
8534
     *                               </p>
8535
     * @param bool   $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
8536
     *
8537
     * @return string the stripped string
8538
     */
8539
    public static function strip_tags(string $str, string $allowable_tags = null, bool $cleanUtf8 = false): string
8540
    {
8541 4
        if ($str === '') {
8542 1
            return '';
8543
        }
8544
8545 4
        if ($cleanUtf8 === true) {
8546 2
            $str = self::clean($str);
8547
        }
8548
8549 4
        if ($allowable_tags === null) {
8550 4
            return \strip_tags($str);
8551
        }
8552
8553 2
        return \strip_tags($str, $allowable_tags);
8554
    }
8555
8556
    /**
8557
     * Strip all whitespace characters. This includes tabs and newline
8558
     * characters, as well as multibyte whitespace such as the thin space
8559
     * and ideographic space.
8560
     *
8561
     * @param string $str
8562
     *
8563
     * @return string
8564
     */
8565
    public static function strip_whitespace(string $str): string
8566
    {
8567 36
        if ($str === '') {
8568 3
            return '';
8569
        }
8570
8571 33
        return (string) \preg_replace('/[[:space:]]+/u', '', $str);
8572
    }
8573
8574
    /**
8575
     * Finds position of first occurrence of a string within another, case insensitive.
8576
     *
8577
     * @see http://php.net/manual/en/function.mb-stripos.php
8578
     *
8579
     * @param string $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
8580
     * @param string $needle    <p>The string to find in haystack.</p>
8581
     * @param int    $offset    [optional] <p>The position in haystack to start searching.</p>
8582
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
8583
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8584
     *
8585
     * @return false|int
8586
     *                   Return the <strong>(int)</strong> numeric position of the first occurrence of needle in the
8587
     *                   haystack string,<br> or <strong>false</strong> if needle is not found
8588
     */
8589
    public static function stripos(
8590
        string $haystack,
8591
        string $needle,
8592
        int $offset = 0,
8593
        $encoding = 'UTF-8',
8594
        bool $cleanUtf8 = false
8595
    ) {
8596 24
        if ($haystack === '' || $needle === '') {
8597 5
            return false;
8598
        }
8599
8600 23
        if ($cleanUtf8 === true) {
8601
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8602
            // if invalid characters are found in $haystack before $needle
8603 1
            $haystack = self::clean($haystack);
8604 1
            $needle = self::clean($needle);
8605
        }
8606
8607 23
        if (self::$SUPPORT['mbstring'] === true) {
8608 23
            if ($encoding === 'UTF-8') {
8609 23
                return \mb_stripos($haystack, $needle, $offset);
8610
            }
8611
8612 3
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8613
8614 3
            return \mb_stripos($haystack, $needle, $offset, $encoding);
8615
        }
8616
8617 2
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
8618
8619
        if (
8620 2
            $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
8621
            &&
8622 2
            $offset >= 0 // grapheme_stripos() can't handle negative offset
8623
            &&
8624 2
            self::$SUPPORT['intl'] === true
8625
        ) {
8626
            $returnTmp = \grapheme_stripos($haystack, $needle, $offset);
8627
            if ($returnTmp !== false) {
8628
                return $returnTmp;
8629
            }
8630
        }
8631
8632
        //
8633
        // fallback for ascii only
8634
        //
8635
8636 2
        if (self::is_ascii($haystack . $needle)) {
8637
            return \stripos($haystack, $needle, $offset);
8638
        }
8639
8640
        //
8641
        // fallback via vanilla php
8642
        //
8643
8644 2
        $haystack = self::strtocasefold($haystack, true, false, $encoding, null, false);
8645 2
        $needle = self::strtocasefold($needle, true, false, $encoding, null, false);
8646
8647 2
        return self::strpos($haystack, $needle, $offset, $encoding);
8648
    }
8649
8650
    /**
8651
     * Returns all of haystack starting from and including the first occurrence of needle to the end.
8652
     *
8653
     * @param string $haystack      <p>The input string. Must be valid UTF-8.</p>
8654
     * @param string $needle        <p>The string to look for. Must be valid UTF-8.</p>
8655
     * @param bool   $before_needle [optional] <p>
8656
     *                              If <b>TRUE</b>, it returns the part of the
8657
     *                              haystack before the first occurrence of the needle (excluding the needle).
8658
     *                              </p>
8659
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
8660
     * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
8661
     *
8662
     * @return false|string a sub-string,<br>or <strong>false</strong> if needle is not found
8663
     */
8664
    public static function stristr(
8665
        string $haystack,
8666
        string $needle,
8667
        bool $before_needle = false,
8668
        string $encoding = 'UTF-8',
8669
        bool $cleanUtf8 = false
8670
    ) {
8671 12
        if ($haystack === '' || $needle === '') {
8672 3
            return false;
8673
        }
8674
8675 9
        if ($cleanUtf8 === true) {
8676
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8677
            // if invalid characters are found in $haystack before $needle
8678 1
            $needle = self::clean($needle);
8679 1
            $haystack = self::clean($haystack);
8680
        }
8681
8682 9
        if (!$needle) {
8683
            return $haystack;
8684
        }
8685
8686 9
        if (self::$SUPPORT['mbstring'] === true) {
8687 9
            if ($encoding === 'UTF-8') {
8688 9
                return \mb_stristr($haystack, $needle, $before_needle);
8689
            }
8690
8691 1
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8692
8693 1
            return \mb_stristr($haystack, $needle, $before_needle, $encoding);
8694
        }
8695
8696
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
8697
8698
        if (
8699
            $encoding !== 'UTF-8'
8700
            &&
8701
            self::$SUPPORT['mbstring'] === false
8702
        ) {
8703
            \trigger_error('UTF8::stristr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
8704
        }
8705
8706
        if (
8707
            $encoding === 'UTF-8' // INFO: "grapheme_stristr()" can't handle other encodings
8708
            &&
8709
            self::$SUPPORT['intl'] === true
8710
        ) {
8711
            $returnTmp = \grapheme_stristr($haystack, $needle, $before_needle);
8712
            if ($returnTmp !== false) {
8713
                return $returnTmp;
8714
            }
8715
        }
8716
8717
        if (self::is_ascii($needle . $haystack)) {
8718
            return \stristr($haystack, $needle, $before_needle);
8719
        }
8720
8721
        \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/usi', $haystack, $match);
8722
8723
        if (!isset($match[1])) {
8724
            return false;
8725
        }
8726
8727
        if ($before_needle) {
8728
            return $match[1];
8729
        }
8730
8731
        return self::substr($haystack, (int) self::strlen($match[1], $encoding), null, $encoding);
8732
    }
8733
8734
    /**
8735
     * Get the string length, not the byte-length!
8736
     *
8737
     * @see http://php.net/manual/en/function.mb-strlen.php
8738
     *
8739
     * @param string $str       <p>The string being checked for length.</p>
8740
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
8741
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8742
     *
8743
     * @return false|int
8744
     *                   The number <strong>(int)</strong> of characters in the string $str having character encoding
8745
     *                   $encoding.
8746
     *                   (One multi-byte character counted as +1).
8747
     *                   <br>
8748
     *                   Can return <strong>false</strong>, if e.g. mbstring is not installed and we process invalid
8749
     *                   chars.
8750
     */
8751
    public static function strlen(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
8752
    {
8753 173
        if ($str === '') {
8754 21
            return 0;
8755
        }
8756
8757 171
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8758 12
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8759
        }
8760
8761 171
        if ($cleanUtf8 === true) {
8762
            // "mb_strlen" and "\iconv_strlen" returns wrong length,
8763
            // if invalid characters are found in $str
8764 4
            $str = self::clean($str);
8765
        }
8766
8767
        //
8768
        // fallback via mbstring
8769
        //
8770
8771 171
        if (self::$SUPPORT['mbstring'] === true) {
8772 165
            if ($encoding === 'UTF-8') {
8773 165
                return \mb_strlen($str);
8774
            }
8775
8776 4
            return \mb_strlen($str, $encoding);
8777
        }
8778
8779
        //
8780
        // fallback for binary || ascii only
8781
        //
8782
8783
        if (
8784 8
            $encoding === 'CP850'
8785
            ||
8786 8
            $encoding === 'ASCII'
8787
        ) {
8788
            return \strlen($str);
8789
        }
8790
8791
        if (
8792 8
            $encoding !== 'UTF-8'
8793
            &&
8794 8
            self::$SUPPORT['mbstring'] === false
8795
            &&
8796 8
            self::$SUPPORT['iconv'] === false
8797
        ) {
8798 2
            \trigger_error('UTF8::strlen() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
8799
        }
8800
8801
        //
8802
        // fallback via iconv
8803
        //
8804
8805 8
        if (self::$SUPPORT['iconv'] === true) {
8806
            $returnTmp = \iconv_strlen($str, $encoding);
8807
            if ($returnTmp !== false) {
8808
                return $returnTmp;
8809
            }
8810
        }
8811
8812
        //
8813
        // fallback via intl
8814
        //
8815
8816
        if (
8817 8
            $encoding === 'UTF-8' // INFO: "grapheme_strlen()" can't handle other encodings
8818
            &&
8819 8
            self::$SUPPORT['intl'] === true
8820
        ) {
8821
            $returnTmp = \grapheme_strlen($str);
8822
            if ($returnTmp !== null) {
8823
                return $returnTmp;
8824
            }
8825
        }
8826
8827
        //
8828
        // fallback for ascii only
8829
        //
8830
8831 8
        if (self::is_ascii($str)) {
8832 4
            return \strlen($str);
8833
        }
8834
8835
        //
8836
        // fallback via vanilla php
8837
        //
8838
8839 8
        \preg_match_all('/./us', $str, $parts);
8840
8841 8
        $returnTmp = \count($parts[0]);
8842 8
        if ($returnTmp === 0) {
8843
            return false;
8844
        }
8845
8846 8
        return $returnTmp;
8847
    }
8848
8849
    /**
8850
     * Get string length in byte.
8851
     *
8852
     * @param string $str
8853
     *
8854
     * @return int
8855
     */
8856
    public static function strlen_in_byte(string $str): int
8857
    {
8858
        if ($str === '') {
8859
            return 0;
8860
        }
8861
8862
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
8863
            // "mb_" is available if overload is used, so use it ...
8864
            return \mb_strlen($str, 'CP850'); // 8-BIT
8865
        }
8866
8867
        return \strlen($str);
8868
    }
8869
8870
    /**
8871
     * Case insensitive string comparisons using a "natural order" algorithm.
8872
     *
8873
     * INFO: natural order version of UTF8::strcasecmp()
8874
     *
8875
     * @param string $str1     <p>The first string.</p>
8876
     * @param string $str2     <p>The second string.</p>
8877
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8878
     *
8879
     * @return int
8880
     *             <strong>&lt; 0</strong> if str1 is less than str2<br>
8881
     *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
8882
     *             <strong>0</strong> if they are equal
8883
     */
8884
    public static function strnatcasecmp(string $str1, string $str2, string $encoding = 'UTF-8'): int
8885
    {
8886 2
        return self::strnatcmp(
8887 2
            self::strtocasefold($str1, true, false, $encoding, null, false),
8888 2
            self::strtocasefold($str2, true, false, $encoding, null, false)
8889
        );
8890
    }
8891
8892
    /**
8893
     * String comparisons using a "natural order" algorithm
8894
     *
8895
     * INFO: natural order version of UTF8::strcmp()
8896
     *
8897
     * @see http://php.net/manual/en/function.strnatcmp.php
8898
     *
8899
     * @param string $str1 <p>The first string.</p>
8900
     * @param string $str2 <p>The second string.</p>
8901
     *
8902
     * @return int
8903
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
8904
     *             <strong>&gt; 0</strong> if str1 is greater than str2;<br>
8905
     *             <strong>0</strong> if they are equal
8906
     */
8907
    public static function strnatcmp(string $str1, string $str2): int
8908
    {
8909 4
        return $str1 . '' === $str2 . '' ? 0 : \strnatcmp((string) self::strtonatfold($str1), (string) self::strtonatfold($str2));
8910
    }
8911
8912
    /**
8913
     * Case-insensitive string comparison of the first n characters.
8914
     *
8915
     * @see http://php.net/manual/en/function.strncasecmp.php
8916
     *
8917
     * @param string $str1     <p>The first string.</p>
8918
     * @param string $str2     <p>The second string.</p>
8919
     * @param int    $len      <p>The length of strings to be used in the comparison.</p>
8920
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8921
     *
8922
     * @return int
8923
     *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
8924
     *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
8925
     *             <strong>0</strong> if they are equal
8926
     */
8927
    public static function strncasecmp(
8928
        string $str1,
8929
        string $str2,
8930
        int $len,
8931
        string $encoding = 'UTF-8'
8932
    ): int {
8933 2
        return self::strncmp(
8934 2
            self::strtocasefold($str1, true, false, $encoding, null, false),
8935 2
            self::strtocasefold($str2, true, false, $encoding, null, false),
8936 2
            $len
8937
        );
8938
    }
8939
8940
    /**
8941
     * String comparison of the first n characters.
8942
     *
8943
     * @see http://php.net/manual/en/function.strncmp.php
8944
     *
8945
     * @param string $str1     <p>The first string.</p>
8946
     * @param string $str2     <p>The second string.</p>
8947
     * @param int    $len      <p>Number of characters to use in the comparison.</p>
8948
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8949
     *
8950
     * @return int
8951
     *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
8952
     *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
8953
     *             <strong>0</strong> if they are equal
8954
     */
8955
    public static function strncmp(
8956
        string $str1,
8957
        string $str2,
8958
        int $len,
8959
        string $encoding = 'UTF-8'
8960
    ): int {
8961 4
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8962
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8963
        }
8964
8965 4
        if ($encoding === 'UTF-8') {
8966 4
            $str1 = (string) \mb_substr($str1, 0, $len);
8967 4
            $str2 = (string) \mb_substr($str2, 0, $len);
8968
        } else {
8969
            $str1 = (string) self::substr($str1, 0, $len, $encoding);
8970
            $str2 = (string) self::substr($str2, 0, $len, $encoding);
8971
        }
8972
8973 4
        return self::strcmp($str1, $str2);
8974
    }
8975
8976
    /**
8977
     * Search a string for any of a set of characters.
8978
     *
8979
     * @see http://php.net/manual/en/function.strpbrk.php
8980
     *
8981
     * @param string $haystack  <p>The string where char_list is looked for.</p>
8982
     * @param string $char_list <p>This parameter is case sensitive.</p>
8983
     *
8984
     * @return false|string string starting from the character found, or false if it is not found
8985
     */
8986
    public static function strpbrk(string $haystack, string $char_list)
8987
    {
8988 2
        if ($haystack === '' || $char_list === '') {
8989 2
            return false;
8990
        }
8991
8992 2
        if (\preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
8993 2
            return \substr($haystack, (int) \strpos($haystack, $m[0]));
8994
        }
8995
8996 2
        return false;
8997
    }
8998
8999
    /**
9000
     * Find position of first occurrence of string in a string.
9001
     *
9002
     * @see http://php.net/manual/en/function.mb-strpos.php
9003
     *
9004
     * @param string     $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
9005
     * @param int|string $needle    <p>The string to find in haystack.<br>Or a code point as int.</p>
9006
     * @param int        $offset    [optional] <p>The search offset. If it is not specified, 0 is used.</p>
9007
     * @param string     $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
9008
     * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9009
     *
9010
     * @return false|int
9011
     *                   The <strong>(int)</strong> numeric position of the first occurrence of needle in the haystack
9012
     *                   string.<br> If needle is not found it returns false.
9013
     */
9014
    public static function strpos(
9015
        string $haystack,
9016
        $needle,
9017
        int $offset = 0,
9018
        $encoding = 'UTF-8',
9019
        bool $cleanUtf8 = false
9020
    ) {
9021 53
        if ($haystack === '') {
9022 4
            return false;
9023
        }
9024
9025
        // iconv and mbstring do not support integer $needle
9026 52
        if ((int) $needle === $needle) {
9027
            $needle = (string) self::chr($needle);
9028
        }
9029 52
        $needle = (string) $needle;
9030
9031 52
        if ($needle === '') {
9032 2
            return false;
9033
        }
9034
9035 52
        if ($cleanUtf8 === true) {
9036
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9037
            // if invalid characters are found in $haystack before $needle
9038 3
            $needle = self::clean($needle);
9039 3
            $haystack = self::clean($haystack);
9040
        }
9041
9042 52
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9043 11
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9044
        }
9045
9046
        //
9047
        // fallback via mbstring
9048
        //
9049
9050 52
        if (self::$SUPPORT['mbstring'] === true) {
9051 50
            if ($encoding === 'UTF-8') {
9052 50
                return \mb_strpos($haystack, $needle, $offset);
9053
            }
9054
9055 2
            return \mb_strpos($haystack, $needle, $offset, $encoding);
9056
        }
9057
9058
        //
9059
        // fallback for binary || ascii only
9060
        //
9061
        if (
9062 4
            $encoding === 'CP850'
9063
            ||
9064 4
            $encoding === 'ASCII'
9065
        ) {
9066 2
            return \strpos($haystack, $needle, $offset);
9067
        }
9068
9069
        if (
9070 4
            $encoding !== 'UTF-8'
9071
            &&
9072 4
            self::$SUPPORT['iconv'] === false
9073
            &&
9074 4
            self::$SUPPORT['mbstring'] === false
9075
        ) {
9076 2
            \trigger_error('UTF8::strpos() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9077
        }
9078
9079
        //
9080
        // fallback via intl
9081
        //
9082
9083
        if (
9084 4
            $encoding === 'UTF-8' // INFO: "grapheme_strpos()" can't handle other encodings
9085
            &&
9086 4
            $offset >= 0 // grapheme_strpos() can't handle negative offset
9087
            &&
9088 4
            self::$SUPPORT['intl'] === true
9089
        ) {
9090
            $returnTmp = \grapheme_strpos($haystack, $needle, $offset);
9091
            if ($returnTmp !== false) {
9092
                return $returnTmp;
9093
            }
9094
        }
9095
9096
        //
9097
        // fallback via iconv
9098
        //
9099
9100
        if (
9101 4
            $offset >= 0 // iconv_strpos() can't handle negative offset
9102
            &&
9103 4
            self::$SUPPORT['iconv'] === true
9104
        ) {
9105
            // ignore invalid negative offset to keep compatibility
9106
            // with php < 5.5.35, < 5.6.21, < 7.0.6
9107
            $returnTmp = \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
9108
            if ($returnTmp !== false) {
9109
                return $returnTmp;
9110
            }
9111
        }
9112
9113
        //
9114
        // fallback for ascii only
9115
        //
9116
9117 4
        if (self::is_ascii($haystack . $needle)) {
9118 2
            return \strpos($haystack, $needle, $offset);
9119
        }
9120
9121
        //
9122
        // fallback via vanilla php
9123
        //
9124
9125 4
        $haystackTmp = self::substr($haystack, $offset, null, $encoding);
9126 4
        if ($haystackTmp === false) {
9127
            $haystackTmp = '';
9128
        }
9129 4
        $haystack = (string) $haystackTmp;
9130
9131 4
        if ($offset < 0) {
9132
            $offset = 0;
9133
        }
9134
9135 4
        $pos = \strpos($haystack, $needle);
9136 4
        if ($pos === false) {
9137 2
            return false;
9138
        }
9139
9140 4
        if ($pos) {
9141 4
            return $offset + (int) self::strlen(\substr($haystack, 0, $pos), $encoding);
9142
        }
9143
9144 2
        return $offset + 0;
9145
    }
9146
9147
    /**
9148
     * Find position of first occurrence of string in a string.
9149
     *
9150
     * @param string $haystack <p>
9151
     *                         The string being checked.
9152
     *                         </p>
9153
     * @param string $needle   <p>
9154
     *                         The position counted from the beginning of haystack.
9155
     *                         </p>
9156
     * @param int    $offset   [optional] <p>
9157
     *                         The search offset. If it is not specified, 0 is used.
9158
     *                         </p>
9159
     *
9160
     * @return false|int The numeric position of the first occurrence of needle in the
9161
     *                   haystack string. If needle is not found, it returns false.
9162
     */
9163
    public static function strpos_in_byte(string $haystack, string $needle, int $offset = 0)
9164
    {
9165
        if ($haystack === '' || $needle === '') {
9166
            return false;
9167
        }
9168
9169
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
9170
            // "mb_" is available if overload is used, so use it ...
9171
            return \mb_strpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
9172
        }
9173
9174
        return \strpos($haystack, $needle, $offset);
9175
    }
9176
9177
    /**
9178
     * Finds the last occurrence of a character in a string within another.
9179
     *
9180
     * @see http://php.net/manual/en/function.mb-strrchr.php
9181
     *
9182
     * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
9183
     * @param string $needle        <p>The string to find in haystack</p>
9184
     * @param bool   $before_needle [optional] <p>
9185
     *                              Determines which portion of haystack
9186
     *                              this function returns.
9187
     *                              If set to true, it returns all of haystack
9188
     *                              from the beginning to the last occurrence of needle.
9189
     *                              If set to false, it returns all of haystack
9190
     *                              from the last occurrence of needle to the end,
9191
     *                              </p>
9192
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
9193
     * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
9194
     *
9195
     * @return false|string the portion of haystack or false if needle is not found
9196
     */
9197
    public static function strrchr(
9198
        string $haystack,
9199
        string $needle,
9200
        bool $before_needle = false,
9201
        string $encoding = 'UTF-8',
9202
        bool $cleanUtf8 = false
9203
    ) {
9204 2
        if ($haystack === '' || $needle === '') {
9205 2
            return false;
9206
        }
9207
9208 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9209 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9210
        }
9211
9212 2
        if ($cleanUtf8 === true) {
9213
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9214
            // if invalid characters are found in $haystack before $needle
9215 2
            $needle = self::clean($needle);
9216 2
            $haystack = self::clean($haystack);
9217
        }
9218
9219
        //
9220
        // fallback via mbstring
9221
        //
9222
9223 2
        if (self::$SUPPORT['mbstring'] === true) {
9224 2
            if ($encoding === 'UTF-8') {
9225 2
                return \mb_strrchr($haystack, $needle, $before_needle);
9226
            }
9227
9228 2
            return \mb_strrchr($haystack, $needle, $before_needle, $encoding);
9229
        }
9230
9231
        //
9232
        // fallback for binary || ascii only
9233
        //
9234
9235
        if (
9236
            $before_needle === false
9237
            &&
9238
            (
9239
                $encoding === 'CP850'
9240
                ||
9241
                $encoding === 'ASCII'
9242
            )
9243
        ) {
9244
            return \strrchr($haystack, $needle);
9245
        }
9246
9247
        if (
9248
            $encoding !== 'UTF-8'
9249
            &&
9250
            self::$SUPPORT['mbstring'] === false
9251
        ) {
9252
            \trigger_error('UTF8::strrchr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9253
        }
9254
9255
        //
9256
        // fallback via iconv
9257
        //
9258
9259
        if (self::$SUPPORT['iconv'] === true) {
9260
            $needleTmp = self::substr($needle, 0, 1, $encoding);
9261
            if ($needleTmp === false) {
9262
                return false;
9263
            }
9264
            $needle = (string) $needleTmp;
9265
9266
            $pos = \iconv_strrpos($haystack, $needle, $encoding);
9267
            if ($pos === false) {
9268
                return false;
9269
            }
9270
9271
            if ($before_needle) {
9272
                return self::substr($haystack, 0, $pos, $encoding);
9273
            }
9274
9275
            return self::substr($haystack, $pos, null, $encoding);
9276
        }
9277
9278
        //
9279
        // fallback via vanilla php
9280
        //
9281
9282
        $needleTmp = self::substr($needle, 0, 1, $encoding);
9283
        if ($needleTmp === false) {
9284
            return false;
9285
        }
9286
        $needle = (string) $needleTmp;
9287
9288
        $pos = self::strrpos($haystack, $needle, 0, $encoding);
9289
        if ($pos === false) {
9290
            return false;
9291
        }
9292
9293
        if ($before_needle) {
9294
            return self::substr($haystack, 0, $pos, $encoding);
9295
        }
9296
9297
        return self::substr($haystack, $pos, null, $encoding);
9298
    }
9299
9300
    /**
9301
     * Reverses characters order in the string.
9302
     *
9303
     * @param string $str      <p>The input string.</p>
9304
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
9305
     *
9306
     * @return string the string with characters in the reverse sequence
9307
     */
9308
    public static function strrev(string $str, string $encoding = 'UTF-8'): string
9309
    {
9310 10
        if ($str === '') {
9311 4
            return '';
9312
        }
9313
9314
        // init
9315 8
        $reversed = '';
9316
9317 8
        $str = self::emoji_encode($str, true);
9318
9319 8
        if ($encoding === 'UTF-8') {
9320 8
            if (self::$SUPPORT['intl'] === true) {
9321
                // try "grapheme" first: https://stackoverflow.com/questions/17496493/strrev-dosent-support-utf-8
9322 8
                $i = (int) \grapheme_strlen($str);
9323 8
                while ($i--) {
9324 8
                    $reversedTmp = \grapheme_substr($str, $i, 1);
9325 8
                    if ($reversedTmp !== false) {
9326 8
                        $reversed .= $reversedTmp;
9327
                    }
9328
                }
9329
            } else {
9330
                $i = (int) \mb_strlen($str);
9331 8
                while ($i--) {
9332
                    $reversedTmp = \mb_substr($str, $i, 1);
9333
                    if ($reversedTmp !== false) {
9334
                        $reversed .= $reversedTmp;
9335
                    }
9336
                }
9337
            }
9338
        } else {
9339
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9340
9341
            $i = (int) self::strlen($str, $encoding);
9342
            while ($i--) {
9343
                $reversedTmp = self::substr($str, $i, 1, $encoding);
9344
                if ($reversedTmp !== false) {
9345
                    $reversed .= $reversedTmp;
9346
                }
9347
            }
9348
        }
9349
9350 8
        return self::emoji_decode($reversed, true);
9351
    }
9352
9353
    /**
9354
     * Finds the last occurrence of a character in a string within another, case insensitive.
9355
     *
9356
     * @see http://php.net/manual/en/function.mb-strrichr.php
9357
     *
9358
     * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
9359
     * @param string $needle        <p>The string to find in haystack.</p>
9360
     * @param bool   $before_needle [optional] <p>
9361
     *                              Determines which portion of haystack
9362
     *                              this function returns.
9363
     *                              If set to true, it returns all of haystack
9364
     *                              from the beginning to the last occurrence of needle.
9365
     *                              If set to false, it returns all of haystack
9366
     *                              from the last occurrence of needle to the end,
9367
     *                              </p>
9368
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
9369
     * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
9370
     *
9371
     * @return false|string the portion of haystack or<br>false if needle is not found
9372
     */
9373
    public static function strrichr(
9374
        string $haystack,
9375
        string $needle,
9376
        bool $before_needle = false,
9377
        string $encoding = 'UTF-8',
9378
        bool $cleanUtf8 = false
9379
    ) {
9380 3
        if ($haystack === '' || $needle === '') {
9381 2
            return false;
9382
        }
9383
9384 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9385 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9386
        }
9387
9388 3
        if ($cleanUtf8 === true) {
9389
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9390
            // if invalid characters are found in $haystack before $needle
9391 2
            $needle = self::clean($needle);
9392 2
            $haystack = self::clean($haystack);
9393
        }
9394
9395
        //
9396
        // fallback via mbstring
9397
        //
9398
9399 3
        if (self::$SUPPORT['mbstring'] === true) {
9400 3
            if ($encoding === 'UTF-8') {
9401 3
                return \mb_strrichr($haystack, $needle, $before_needle);
9402
            }
9403
9404 2
            return \mb_strrichr($haystack, $needle, $before_needle, $encoding);
9405
        }
9406
9407
        //
9408
        // fallback via vanilla php
9409
        //
9410
9411
        $needleTmp = self::substr($needle, 0, 1, $encoding);
9412
        if ($needleTmp === false) {
9413
            return false;
9414
        }
9415
        $needle = (string) $needleTmp;
9416
9417
        $pos = self::strripos($haystack, $needle, 0, $encoding);
9418
        if ($pos === false) {
9419
            return false;
9420
        }
9421
9422
        if ($before_needle) {
9423
            return self::substr($haystack, 0, $pos, $encoding);
9424
        }
9425
9426
        return self::substr($haystack, $pos, null, $encoding);
9427
    }
9428
9429
    /**
9430
     * Find position of last occurrence of a case-insensitive string.
9431
     *
9432
     * @param string     $haystack  <p>The string to look in.</p>
9433
     * @param int|string $needle    <p>The string to look for.</p>
9434
     * @param int        $offset    [optional] <p>Number of characters to ignore in the beginning or end.</p>
9435
     * @param string     $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
9436
     * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9437
     *
9438
     * @return false|int
9439
     *                   The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
9440
     *                   string.<br>If needle is not found, it returns false.
9441
     */
9442
    public static function strripos(
9443
        string $haystack,
9444
        $needle,
9445
        int $offset = 0,
9446
        string $encoding = 'UTF-8',
9447
        bool $cleanUtf8 = false
9448
    ) {
9449 3
        if ($haystack === '') {
9450
            return false;
9451
        }
9452
9453
        // iconv and mbstring do not support integer $needle
9454 3
        if ((int) $needle === $needle && $needle >= 0) {
9455
            $needle = (string) self::chr($needle);
9456
        }
9457 3
        $needle = (string) $needle;
9458
9459 3
        if ($needle === '') {
9460
            return false;
9461
        }
9462
9463 3
        if ($cleanUtf8 === true) {
9464
            // mb_strripos() && iconv_strripos() is not tolerant to invalid characters
9465 2
            $needle = self::clean($needle);
9466 2
            $haystack = self::clean($haystack);
9467
        }
9468
9469 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9470 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9471
        }
9472
9473
        //
9474
        // fallback via mbstrig
9475
        //
9476
9477 3
        if (self::$SUPPORT['mbstring'] === true) {
9478 3
            if ($encoding === 'UTF-8') {
9479 3
                return \mb_strripos($haystack, $needle, $offset);
9480
            }
9481
9482
            return \mb_strripos($haystack, $needle, $offset, $encoding);
9483
        }
9484
9485
        //
9486
        // fallback for binary || ascii only
9487
        //
9488
9489
        if (
9490
            $encoding === 'CP850'
9491
            ||
9492
            $encoding === 'ASCII'
9493
        ) {
9494
            return \strripos($haystack, $needle, $offset);
9495
        }
9496
9497
        if (
9498
            $encoding !== 'UTF-8'
9499
            &&
9500
            self::$SUPPORT['mbstring'] === false
9501
        ) {
9502
            \trigger_error('UTF8::strripos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9503
        }
9504
9505
        //
9506
        // fallback via intl
9507
        //
9508
9509
        if (
9510
            $encoding === 'UTF-8' // INFO: "grapheme_strripos()" can't handle other encodings
9511
            &&
9512
            $offset >= 0 // grapheme_strripos() can't handle negative offset
9513
            &&
9514
            self::$SUPPORT['intl'] === true
9515
        ) {
9516
            $returnTmp = \grapheme_strripos($haystack, $needle, $offset);
9517
            if ($returnTmp !== false) {
9518
                return $returnTmp;
9519
            }
9520
        }
9521
9522
        //
9523
        // fallback for ascii only
9524
        //
9525
9526
        if (self::is_ascii($haystack . $needle)) {
9527
            return \strripos($haystack, $needle, $offset);
9528
        }
9529
9530
        //
9531
        // fallback via vanilla php
9532
        //
9533
9534
        $haystack = self::strtocasefold($haystack, true, false, $encoding);
9535
        $needle = self::strtocasefold($needle, true, false, $encoding);
9536
9537
        return self::strrpos($haystack, $needle, $offset, $encoding, $cleanUtf8);
9538
    }
9539
9540
    /**
9541
     * Finds position of last occurrence of a string within another, case insensitive.
9542
     *
9543
     * @param string $haystack <p>
9544
     *                         The string from which to get the position of the last occurrence
9545
     *                         of needle.
9546
     *                         </p>
9547
     * @param string $needle   <p>
9548
     *                         The string to find in haystack.
9549
     *                         </p>
9550
     * @param int    $offset   [optional] <p>
9551
     *                         The position in haystack
9552
     *                         to start searching.
9553
     *                         </p>
9554
     *
9555
     * @return false|int return the numeric position of the last occurrence of needle in the
9556
     *                   haystack string, or false if needle is not found
9557
     */
9558
    public static function strripos_in_byte(string $haystack, string $needle, int $offset = 0)
9559
    {
9560
        if ($haystack === '' || $needle === '') {
9561
            return false;
9562
        }
9563
9564
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
9565
            // "mb_" is available if overload is used, so use it ...
9566
            return \mb_strripos($haystack, $needle, $offset, 'CP850'); // 8-BIT
9567
        }
9568
9569
        return \strripos($haystack, $needle, $offset);
9570
    }
9571
9572
    /**
9573
     * Find position of last occurrence of a string in a string.
9574
     *
9575
     * @see http://php.net/manual/en/function.mb-strrpos.php
9576
     *
9577
     * @param string     $haystack  <p>The string being checked, for the last occurrence of needle</p>
9578
     * @param int|string $needle    <p>The string to find in haystack.<br>Or a code point as int.</p>
9579
     * @param int        $offset    [optional] <p>May be specified to begin searching an arbitrary number of characters
9580
     *                              into the string. Negative values will stop searching at an arbitrary point prior to
9581
     *                              the end of the string.
9582
     *                              </p>
9583
     * @param string     $encoding  [optional] <p>Set the charset.</p>
9584
     * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9585
     *
9586
     * @return false|int
9587
     *                   The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
9588
     *                   string.<br>If needle is not found, it returns false.
9589
     */
9590
    public static function strrpos(
9591
        string $haystack,
9592
        $needle,
9593
        int $offset = 0,
9594
        string $encoding = 'UTF-8',
9595
        bool $cleanUtf8 = false
9596
    ) {
9597 35
        if ($haystack === '') {
9598 3
            return false;
9599
        }
9600
9601
        // iconv and mbstring do not support integer $needle
9602 34
        if ((int) $needle === $needle && $needle >= 0) {
9603 2
            $needle = (string) self::chr($needle);
9604
        }
9605 34
        $needle = (string) $needle;
9606
9607 34
        if ($needle === '') {
9608 2
            return false;
9609
        }
9610
9611 34
        if ($cleanUtf8 === true) {
9612
            // \mb_strrpos && iconv_strrpos is not tolerant to invalid characters
9613 4
            $needle = self::clean($needle);
9614 4
            $haystack = self::clean($haystack);
9615
        }
9616
9617 34
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9618 8
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9619
        }
9620
9621
        //
9622
        // fallback via mbstring
9623
        //
9624
9625 34
        if (self::$SUPPORT['mbstring'] === true) {
9626 34
            if ($encoding === 'UTF-8') {
9627 34
                return \mb_strrpos($haystack, $needle, $offset);
9628
            }
9629
9630 2
            return \mb_strrpos($haystack, $needle, $offset, $encoding);
9631
        }
9632
9633
        //
9634
        // fallback for binary || ascii only
9635
        //
9636
9637
        if (
9638
            $encoding === 'CP850'
9639
            ||
9640
            $encoding === 'ASCII'
9641
        ) {
9642
            return \strrpos($haystack, $needle, $offset);
9643
        }
9644
9645
        if (
9646
            $encoding !== 'UTF-8'
9647
            &&
9648
            self::$SUPPORT['mbstring'] === false
9649
        ) {
9650
            \trigger_error('UTF8::strrpos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9651
        }
9652
9653
        //
9654
        // fallback via intl
9655
        //
9656
9657
        if (
9658
            $offset >= 0 // grapheme_strrpos() can't handle negative offset
9659
            &&
9660
            $encoding === 'UTF-8' // INFO: "grapheme_strrpos()" can't handle other encodings
9661
            &&
9662
            self::$SUPPORT['intl'] === true
9663
        ) {
9664
            $returnTmp = \grapheme_strrpos($haystack, $needle, $offset);
9665
            if ($returnTmp !== false) {
9666
                return $returnTmp;
9667
            }
9668
        }
9669
9670
        //
9671
        // fallback for ascii only
9672
        //
9673
9674
        if (self::is_ascii($haystack . $needle)) {
9675
            return \strrpos($haystack, $needle, $offset);
9676
        }
9677
9678
        //
9679
        // fallback via vanilla php
9680
        //
9681
9682
        $haystackTmp = null;
9683
        if ($offset > 0) {
9684
            $haystackTmp = self::substr($haystack, $offset);
9685
        } elseif ($offset < 0) {
9686
            $haystackTmp = self::substr($haystack, 0, $offset);
9687
            $offset = 0;
9688
        }
9689
9690
        if ($haystackTmp !== null) {
9691
            if ($haystackTmp === false) {
9692
                $haystackTmp = '';
9693
            }
9694
            $haystack = (string) $haystackTmp;
9695
        }
9696
9697
        $pos = \strrpos($haystack, $needle);
9698
        if ($pos === false) {
9699
            return false;
9700
        }
9701
9702
        $strTmp = \substr($haystack, 0, $pos);
9703
        if ($strTmp === false) {
9704
            return false;
9705
        }
9706
9707
        return $offset + (int) self::strlen($strTmp);
9708
    }
9709
9710
    /**
9711
     * Find position of last occurrence of a string in a string.
9712
     *
9713
     * @param string $haystack <p>
9714
     *                         The string being checked, for the last occurrence
9715
     *                         of needle.
9716
     *                         </p>
9717
     * @param string $needle   <p>
9718
     *                         The string to find in haystack.
9719
     *                         </p>
9720
     * @param int    $offset   [optional] May be specified to begin searching an arbitrary number of characters into
9721
     *                         the string. Negative values will stop searching at an arbitrary point
9722
     *                         prior to the end of the string.
9723
     *
9724
     * @return false|int The numeric position of the last occurrence of needle in the
9725
     *                   haystack string. If needle is not found, it returns false.
9726
     */
9727
    public static function strrpos_in_byte(string $haystack, string $needle, int $offset = 0)
9728
    {
9729
        if ($haystack === '' || $needle === '') {
9730
            return false;
9731
        }
9732
9733
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
9734
            // "mb_" is available if overload is used, so use it ...
9735
            return \mb_strrpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
9736
        }
9737
9738
        return \strrpos($haystack, $needle, $offset);
9739
    }
9740
9741
    /**
9742
     * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
9743
     * mask.
9744
     *
9745
     * @param string $str      <p>The input string.</p>
9746
     * @param string $mask     <p>The mask of chars</p>
9747
     * @param int    $offset   [optional]
9748
     * @param int    $length   [optional]
9749
     * @param string $encoding [optional] <p>Set the charset.</p>
9750
     *
9751
     * @return false|int
9752
     */
9753
    public static function strspn(
9754
        string $str,
9755
        string $mask,
9756
        int $offset = 0,
9757
        int $length = null,
9758
        string $encoding = 'UTF-8'
9759
    ) {
9760 10
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9761
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9762
        }
9763
9764 10
        if ($offset || $length !== null) {
9765 2
            if ($encoding === 'UTF-8') {
9766 2
                if ($length === null) {
9767
                    $str = (string) \mb_substr($str, $offset);
9768
                } else {
9769 2
                    $str = (string) \mb_substr($str, $offset, $length);
9770
                }
9771
            } else {
9772
                $str = (string) self::substr($str, $offset, $length, $encoding);
9773
            }
9774
        }
9775
9776 10
        if ($str === '' || $mask === '') {
9777 2
            return 0;
9778
        }
9779
9780 8
        $matches = [];
9781
9782 8
        return \preg_match('/^' . self::rxClass($mask) . '+/u', $str, $matches) ? (int) self::strlen($matches[0], $encoding) : 0;
9783
    }
9784
9785
    /**
9786
     * Returns part of haystack string from the first occurrence of needle to the end of haystack.
9787
     *
9788
     * @param string $haystack      <p>The input string. Must be valid UTF-8.</p>
9789
     * @param string $needle        <p>The string to look for. Must be valid UTF-8.</p>
9790
     * @param bool   $before_needle [optional] <p>
9791
     *                              If <b>TRUE</b>, strstr() returns the part of the
9792
     *                              haystack before the first occurrence of the needle (excluding the needle).
9793
     *                              </p>
9794
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
9795
     * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
9796
     *
9797
     * @return false|string
9798
     *                      A sub-string,<br>or <strong>false</strong> if needle is not found
9799
     */
9800
    public static function strstr(
9801
        string $haystack,
9802
        string $needle,
9803
        bool $before_needle = false,
9804
        string $encoding = 'UTF-8',
9805
        $cleanUtf8 = false
9806
    ) {
9807 3
        if ($haystack === '' || $needle === '') {
9808 2
            return false;
9809
        }
9810
9811 3
        if ($cleanUtf8 === true) {
9812
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9813
            // if invalid characters are found in $haystack before $needle
9814
            $needle = self::clean($needle);
9815
            $haystack = self::clean($haystack);
9816
        }
9817
9818 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9819 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9820
        }
9821
9822
        //
9823
        // fallback via mbstring
9824
        //
9825
9826 3
        if (self::$SUPPORT['mbstring'] === true) {
9827 3
            if ($encoding === 'UTF-8') {
9828 3
                return \mb_strstr($haystack, $needle, $before_needle);
9829
            }
9830
9831 2
            return \mb_strstr($haystack, $needle, $before_needle, $encoding);
9832
        }
9833
9834
        //
9835
        // fallback for binary || ascii only
9836
        //
9837
9838
        if (
9839
            $encoding === 'CP850'
9840
            ||
9841
            $encoding === 'ASCII'
9842
        ) {
9843
            return \strstr($haystack, $needle, $before_needle);
9844
        }
9845
9846
        if (
9847
            $encoding !== 'UTF-8'
9848
            &&
9849
            self::$SUPPORT['mbstring'] === false
9850
        ) {
9851
            \trigger_error('UTF8::strstr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9852
        }
9853
9854
        //
9855
        // fallback via intl
9856
        //
9857
9858
        if (
9859
            $encoding === 'UTF-8' // INFO: "grapheme_strstr()" can't handle other encodings
9860
            &&
9861
            self::$SUPPORT['intl'] === true
9862
        ) {
9863
            $returnTmp = \grapheme_strstr($haystack, $needle, $before_needle);
9864
            if ($returnTmp !== false) {
9865
                return $returnTmp;
9866
            }
9867
        }
9868
9869
        //
9870
        // fallback for ascii only
9871
        //
9872
9873
        if (self::is_ascii($haystack . $needle)) {
9874
            return \strstr($haystack, $needle, $before_needle);
9875
        }
9876
9877
        //
9878
        // fallback via vanilla php
9879
        //
9880
9881
        \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/us', $haystack, $match);
9882
9883
        if (!isset($match[1])) {
9884
            return false;
9885
        }
9886
9887
        if ($before_needle) {
9888
            return $match[1];
9889
        }
9890
9891
        return self::substr($haystack, (int) self::strlen($match[1]));
9892
    }
9893
9894
    /**
9895
     *  * Finds first occurrence of a string within another.
9896
     *
9897
     * @param string $haystack      <p>
9898
     *                              The string from which to get the first occurrence
9899
     *                              of needle.
9900
     *                              </p>
9901
     * @param string $needle        <p>
9902
     *                              The string to find in haystack.
9903
     *                              </p>
9904
     * @param bool   $before_needle [optional] <p>
9905
     *                              Determines which portion of haystack
9906
     *                              this function returns.
9907
     *                              If set to true, it returns all of haystack
9908
     *                              from the beginning to the first occurrence of needle.
9909
     *                              If set to false, it returns all of haystack
9910
     *                              from the first occurrence of needle to the end,
9911
     *                              </p>
9912
     *
9913
     * @return false|string the portion of haystack,
9914
     *                      or false if needle is not found
9915
     */
9916
    public static function strstr_in_byte(string $haystack, string $needle, bool $before_needle = false)
9917
    {
9918
        if ($haystack === '' || $needle === '') {
9919
            return false;
9920
        }
9921
9922
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
9923
            // "mb_" is available if overload is used, so use it ...
9924
            return \mb_strstr($haystack, $needle, $before_needle, 'CP850'); // 8-BIT
9925
        }
9926
9927
        return \strstr($haystack, $needle, $before_needle);
9928
    }
9929
9930
    /**
9931
     * Unicode transformation for case-less matching.
9932
     *
9933
     * @see http://unicode.org/reports/tr21/tr21-5.html
9934
     *
9935
     * @param string      $str       <p>The input string.</p>
9936
     * @param bool        $full      [optional] <p>
9937
     *                               <b>true</b>, replace full case folding chars (default)<br>
9938
     *                               <b>false</b>, use only limited static array [UTF8::$COMMON_CASE_FOLD]
9939
     *                               </p>
9940
     * @param bool        $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9941
     * @param string      $encoding  [optional] <p>Set the charset.</p>
9942
     * @param string|null $lang      [optional] <p>Set the language for special cases: az, el, lt, tr</p>
9943
     * @param bool        $lower     [optional] <p>Use lowercase string, otherwise use uppercase string. PS: uppercase
9944
     *                               is for some languages better ...</p>
9945
     *
9946
     * @return string
9947
     */
9948
    public static function strtocasefold(
9949
        string $str,
9950
        bool $full = true,
9951
        bool $cleanUtf8 = false,
9952
        string $encoding = 'UTF-8',
9953
        string $lang = null,
9954
        $lower = true
9955
    ): string {
9956 32
        if ($str === '') {
9957 5
            return '';
9958
        }
9959
9960 31
        if ($cleanUtf8 === true) {
9961
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9962
            // if invalid characters are found in $haystack before $needle
9963 2
            $str = self::clean($str);
9964
        }
9965
9966 31
        $str = self::fixStrCaseHelper($str, $lower, $full);
9967
9968 31
        if ($lang === null && $encoding === 'UTF-8') {
9969 31
            if ($lower === true) {
9970 2
                return \mb_strtolower($str);
9971
            }
9972
9973 29
            return \mb_strtoupper($str);
9974
        }
9975
9976 2
        if ($lower === true) {
9977
            return self::strtolower($str, $encoding, $cleanUtf8, $lang);
9978
        }
9979
9980 2
        return self::strtoupper($str, $encoding, $cleanUtf8, $lang);
9981
    }
9982
9983
    /**
9984
     * Make a string lowercase.
9985
     *
9986
     * @see http://php.net/manual/en/function.mb-strtolower.php
9987
     *
9988
     * @param string      $str                   <p>The string being lowercased.</p>
9989
     * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
9990
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
9991
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
9992
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
9993
     *
9994
     * @return string
9995
     *                <p>String with all alphabetic characters converted to lowercase.</p>
9996
     */
9997
    public static function strtolower(
9998
        $str,
9999
        string $encoding = 'UTF-8',
10000
        bool $cleanUtf8 = false,
10001
        string $lang = null,
10002
        bool $tryToKeepStringLength = false
10003
    ): string {
10004
        // init
10005 73
        $str = (string) $str;
10006
10007 73
        if ($str === '') {
10008 1
            return '';
10009
        }
10010
10011 72
        if ($cleanUtf8 === true) {
10012
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10013
            // if invalid characters are found in $haystack before $needle
10014 2
            $str = self::clean($str);
10015
        }
10016
10017
        // hack for old php version or for the polyfill ...
10018 72
        if ($tryToKeepStringLength === true) {
10019
            $str = self::fixStrCaseHelper($str, true);
10020
        }
10021
10022 72
        if ($lang === null && $encoding === 'UTF-8') {
10023 13
            return \mb_strtolower($str);
10024
        }
10025
10026 61
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
10027
10028 61
        if ($lang !== null) {
10029 2
            if (self::$SUPPORT['intl'] === true) {
10030 2
                if (self::$INTL_TRANSLITERATOR_LIST === null) {
10031
                    self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
10032
                }
10033
10034 2
                $langCode = $lang . '-Lower';
10035 2
                if (!\in_array($langCode, self::$INTL_TRANSLITERATOR_LIST, true)) {
10036
                    \trigger_error('UTF8::strtolower() cannot handle special language: ' . $lang, \E_USER_WARNING);
10037
10038
                    $langCode = 'Any-Lower';
10039
                }
10040
10041
                /** @noinspection PhpComposerExtensionStubsInspection */
10042
                /** @noinspection UnnecessaryCastingInspection */
10043 2
                return (string) \transliterator_transliterate($langCode, $str);
10044
            }
10045
10046
            \trigger_error('UTF8::strtolower() without intl cannot handle the "lang" parameter: ' . $lang, \E_USER_WARNING);
10047
        }
10048
10049
        // always fallback via symfony polyfill
10050 61
        return \mb_strtolower($str, $encoding);
10051
    }
10052
10053
    /**
10054
     * Make a string uppercase.
10055
     *
10056
     * @see http://php.net/manual/en/function.mb-strtoupper.php
10057
     *
10058
     * @param string      $str                   <p>The string being uppercased.</p>
10059
     * @param string      $encoding              [optional] <p>Set the charset.</p>
10060
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
10061
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
10062
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
10063
     *
10064
     * @return string
10065
     *                <p>String with all alphabetic characters converted to uppercase.</p>
10066
     */
10067
    public static function strtoupper(
10068
        $str,
10069
        string $encoding = 'UTF-8',
10070
        bool $cleanUtf8 = false,
10071
        string $lang = null,
10072
        bool $tryToKeepStringLength = false
10073
    ): string {
10074
        // init
10075 17
        $str = (string) $str;
10076
10077 17
        if ($str === '') {
10078 1
            return '';
10079
        }
10080
10081 16
        if ($cleanUtf8 === true) {
10082
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10083
            // if invalid characters are found in $haystack before $needle
10084 2
            $str = self::clean($str);
10085
        }
10086
10087
        // hack for old php version or for the polyfill ...
10088 16
        if ($tryToKeepStringLength === true) {
10089 2
            $str = self::fixStrCaseHelper($str, false);
10090
        }
10091
10092 16
        if ($lang === null && $encoding === 'UTF-8') {
10093 8
            return \mb_strtoupper($str);
10094
        }
10095
10096 10
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
10097
10098 10
        if ($lang !== null) {
10099 2
            if (self::$SUPPORT['intl'] === true) {
10100 2
                if (self::$INTL_TRANSLITERATOR_LIST === null) {
10101
                    self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
10102
                }
10103
10104 2
                $langCode = $lang . '-Upper';
10105 2
                if (!\in_array($langCode, self::$INTL_TRANSLITERATOR_LIST, true)) {
10106
                    \trigger_error('UTF8::strtoupper() without intl for special language: ' . $lang, \E_USER_WARNING);
10107
10108
                    $langCode = 'Any-Upper';
10109
                }
10110
10111
                /** @noinspection PhpComposerExtensionStubsInspection */
10112
                /** @noinspection UnnecessaryCastingInspection */
10113 2
                return (string) \transliterator_transliterate($langCode, $str);
10114
            }
10115
10116
            \trigger_error('UTF8::strtolower() without intl cannot handle the "lang"-parameter: ' . $lang, \E_USER_WARNING);
10117
        }
10118
10119
        // always fallback via symfony polyfill
10120 10
        return \mb_strtoupper($str, $encoding);
10121
    }
10122
10123
    /**
10124
     * Translate characters or replace sub-strings.
10125
     *
10126
     * @see http://php.net/manual/en/function.strtr.php
10127
     *
10128
     * @param string          $str  <p>The string being translated.</p>
10129
     * @param string|string[] $from <p>The string replacing from.</p>
10130
     * @param string|string[] $to   [optional] <p>The string being translated to to.</p>
10131
     *
10132
     * @return string
10133
     *                This function returns a copy of str, translating all occurrences of each character in from to the
10134
     *                corresponding character in to
10135
     */
10136
    public static function strtr(string $str, $from, $to = ''): string
10137
    {
10138 2
        if ($str === '') {
10139
            return '';
10140
        }
10141
10142 2
        if ($from === $to) {
10143
            return $str;
10144
        }
10145
10146 2
        if ($to !== '') {
10147 2
            $from = self::str_split($from);
10148 2
            $to = self::str_split($to);
10149 2
            $countFrom = \count($from);
10150 2
            $countTo = \count($to);
10151
10152 2
            if ($countFrom > $countTo) {
10153 2
                $from = \array_slice($from, 0, $countTo);
10154 2
            } elseif ($countFrom < $countTo) {
10155 2
                $to = \array_slice($to, 0, $countFrom);
10156
            }
10157
10158 2
            $from = \array_combine($from, $to);
10159 2
            if ($from === false) {
10160
                throw new \InvalidArgumentException('The number of elements for each array isn\'t equal or the arrays are empty: (from: ' . \print_r($from, true) . ' | to: ' . \print_r($to, true) . ')');
10161
            }
10162
        }
10163
10164 2
        if (\is_string($from)) {
10165 2
            return \str_replace($from, '', $str);
10166
        }
10167
10168 2
        return \strtr($str, $from);
10169
    }
10170
10171
    /**
10172
     * Return the width of a string.
10173
     *
10174
     * @param string $str       <p>The input string.</p>
10175
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
10176
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10177
     *
10178
     * @return int
10179
     */
10180
    public static function strwidth(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): int
10181
    {
10182 2
        if ($str === '') {
10183 2
            return 0;
10184
        }
10185
10186 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10187 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10188
        }
10189
10190 2
        if ($cleanUtf8 === true) {
10191
            // iconv and mbstring are not tolerant to invalid encoding
10192
            // further, their behaviour is inconsistent with that of PHP's substr
10193 2
            $str = self::clean($str);
10194
        }
10195
10196
        //
10197
        // fallback via mbstring
10198
        //
10199
10200 2
        if (self::$SUPPORT['mbstring'] === true) {
10201 2
            if ($encoding === 'UTF-8') {
10202 2
                return \mb_strwidth($str);
10203
            }
10204
10205
            return \mb_strwidth($str, $encoding);
10206
        }
10207
10208
        //
10209
        // fallback via vanilla php
10210
        //
10211
10212
        if ($encoding !== 'UTF-8') {
10213
            $str = self::encode('UTF-8', $str, false, $encoding);
10214
        }
10215
10216
        $wide = 0;
10217
        $str = (string) \preg_replace('/[\x{1100}-\x{115F}\x{2329}\x{232A}\x{2E80}-\x{303E}\x{3040}-\x{A4CF}\x{AC00}-\x{D7A3}\x{F900}-\x{FAFF}\x{FE10}-\x{FE19}\x{FE30}-\x{FE6F}\x{FF00}-\x{FF60}\x{FFE0}-\x{FFE6}\x{20000}-\x{2FFFD}\x{30000}-\x{3FFFD}]/u', '', $str, -1, $wide);
10218
10219
        return ($wide << 1) + (int) self::strlen($str, 'UTF-8');
10220
    }
10221
10222
    /**
10223
     * Get part of a string.
10224
     *
10225
     * @see http://php.net/manual/en/function.mb-substr.php
10226
     *
10227
     * @param string $str       <p>The string being checked.</p>
10228
     * @param int    $offset    <p>The first position used in str.</p>
10229
     * @param int    $length    [optional] <p>The maximum length of the returned string.</p>
10230
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
10231
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10232
     *
10233
     * @return false|string
10234
     *                      The portion of <i>str</i> specified by the <i>offset</i> and
10235
     *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
10236
     *                      characters long, <b>FALSE</b> will be returned.
10237
     */
10238
    public static function substr(
10239
        string $str,
10240
        int $offset = 0,
10241
        int $length = null,
10242
        string $encoding = 'UTF-8',
10243
        bool $cleanUtf8 = false
10244
    ) {
10245
        // empty string
10246 172
        if ($str === '' || $length === 0) {
10247 8
            return '';
10248
        }
10249
10250 168
        if ($cleanUtf8 === true) {
10251
            // iconv and mbstring are not tolerant to invalid encoding
10252
            // further, their behaviour is inconsistent with that of PHP's substr
10253 2
            $str = self::clean($str);
10254
        }
10255
10256
        // whole string
10257 168
        if (!$offset && $length === null) {
10258 7
            return $str;
10259
        }
10260
10261 163
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10262 19
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10263
        }
10264
10265
        //
10266
        // fallback via mbstring
10267
        //
10268
10269 163
        if (self::$SUPPORT['mbstring'] === true) {
10270 161
            if ($encoding === 'UTF-8') {
10271 161
                if ($length === null) {
10272 64
                    return \mb_substr($str, $offset);
10273
                }
10274
10275 102
                return \mb_substr($str, $offset, $length);
10276
            }
10277
10278
            return self::substr($str, $offset, $length, $encoding);
10279
        }
10280
10281
        //
10282
        // fallback for binary || ascii only
10283
        //
10284
10285
        if (
10286 4
            $encoding === 'CP850'
10287
            ||
10288 4
            $encoding === 'ASCII'
10289
        ) {
10290
            if ($length === null) {
10291
                return \substr($str, $offset);
10292
            }
10293
10294
            return \substr($str, $offset, $length);
10295
        }
10296
10297
        // otherwise we need the string-length
10298 4
        $str_length = 0;
10299 4
        if ($offset || $length === null) {
10300 4
            $str_length = self::strlen($str, $encoding);
10301
        }
10302
10303
        // e.g.: invalid chars + mbstring not installed
10304 4
        if ($str_length === false) {
10305
            return false;
10306
        }
10307
10308
        // empty string
10309 4
        if ($offset === $str_length && !$length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type integer|null is loosely compared to false; this is ambiguous if the integer can be 0. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
10310
            return '';
10311
        }
10312
10313
        // impossible
10314 4
        if ($offset && $offset > $str_length) {
10315
            return '';
10316
        }
10317
10318 4
        if ($length === null) {
10319 4
            $length = (int) $str_length;
10320
        } else {
10321 2
            $length = (int) $length;
10322
        }
10323
10324
        if (
10325 4
            $encoding !== 'UTF-8'
10326
            &&
10327 4
            self::$SUPPORT['mbstring'] === false
10328
        ) {
10329 2
            \trigger_error('UTF8::substr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10330
        }
10331
10332
        //
10333
        // fallback via intl
10334
        //
10335
10336
        if (
10337 4
            $encoding === 'UTF-8' // INFO: "grapheme_substr()" can't handle other encodings
10338
            &&
10339 4
            $offset >= 0 // grapheme_substr() can't handle negative offset
10340
            &&
10341 4
            self::$SUPPORT['intl'] === true
10342
        ) {
10343
            $returnTmp = \grapheme_substr($str, $offset, $length);
10344
            if ($returnTmp !== false) {
10345
                return $returnTmp;
10346
            }
10347
        }
10348
10349
        //
10350
        // fallback via iconv
10351
        //
10352
10353
        if (
10354 4
            $length >= 0 // "iconv_substr()" can't handle negative length
10355
            &&
10356 4
            self::$SUPPORT['iconv'] === true
10357
        ) {
10358
            $returnTmp = \iconv_substr($str, $offset, $length);
10359
            if ($returnTmp !== false) {
10360
                return $returnTmp;
10361
            }
10362
        }
10363
10364
        //
10365
        // fallback for ascii only
10366
        //
10367
10368 4
        if (self::is_ascii($str)) {
10369
            return \substr($str, $offset, $length);
10370
        }
10371
10372
        //
10373
        // fallback via vanilla php
10374
        //
10375
10376
        // split to array, and remove invalid characters
10377 4
        $array = self::str_split($str);
10378
10379
        // extract relevant part, and join to make sting again
10380 4
        return \implode('', \array_slice($array, $offset, $length));
10381
    }
10382
10383
    /**
10384
     * Binary safe comparison of two strings from an offset, up to length characters.
10385
     *
10386
     * @param string   $str1               <p>The main string being compared.</p>
10387
     * @param string   $str2               <p>The secondary string being compared.</p>
10388
     * @param int      $offset             [optional] <p>The start position for the comparison. If negative, it starts
10389
     *                                     counting from the end of the string.</p>
10390
     * @param int|null $length             [optional] <p>The length of the comparison. The default value is the largest
10391
     *                                     of the length of the str compared to the length of main_str less the
10392
     *                                     offset.</p>
10393
     * @param bool     $case_insensitivity [optional] <p>If case_insensitivity is TRUE, comparison is case
10394
     *                                     insensitive.</p>
10395
     * @param string   $encoding           [optional] <p>Set the charset for e.g. "mb_" function</p>
10396
     *
10397
     * @return int
10398
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
10399
     *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
10400
     *             <strong>0</strong> if they are equal
10401
     */
10402
    public static function substr_compare(
10403
        string $str1,
10404
        string $str2,
10405
        int $offset = 0,
10406
        int $length = null,
10407
        bool $case_insensitivity = false,
10408
        string $encoding = 'UTF-8'
10409
    ): int {
10410
        if (
10411 2
            $offset !== 0
10412
            ||
10413 2
            $length !== null
10414
        ) {
10415 2
            if ($encoding === 'UTF-8') {
10416 2
                if ($length === null) {
10417 2
                    $str1 = (string) \mb_substr($str1, $offset);
10418
                } else {
10419 2
                    $str1 = (string) \mb_substr($str1, $offset, $length);
10420
                }
10421 2
                $str2 = (string) \mb_substr($str2, 0, (int) self::strlen($str1));
10422
            } else {
10423
                $encoding = self::normalize_encoding($encoding, 'UTF-8');
10424
10425
                $str1 = (string) self::substr($str1, $offset, $length, $encoding);
10426
                $str2 = (string) self::substr($str2, 0, (int) self::strlen($str1), $encoding);
10427
            }
10428
        }
10429
10430 2
        if ($case_insensitivity === true) {
10431 2
            return self::strcasecmp($str1, $str2, $encoding);
10432
        }
10433
10434 2
        return self::strcmp($str1, $str2);
10435
    }
10436
10437
    /**
10438
     * Count the number of substring occurrences.
10439
     *
10440
     * @see http://php.net/manual/en/function.substr-count.php
10441
     *
10442
     * @param string $haystack  <p>The string to search in.</p>
10443
     * @param string $needle    <p>The substring to search for.</p>
10444
     * @param int    $offset    [optional] <p>The offset where to start counting.</p>
10445
     * @param int    $length    [optional] <p>
10446
     *                          The maximum length after the specified offset to search for the
10447
     *                          substring. It outputs a warning if the offset plus the length is
10448
     *                          greater than the haystack length.
10449
     *                          </p>
10450
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
10451
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10452
     *
10453
     * @return false|int this functions returns an integer or false if there isn't a string
10454
     */
10455
    public static function substr_count(
10456
        string $haystack,
10457
        string $needle,
10458
        int $offset = 0,
10459
        int $length = null,
10460
        string $encoding = 'UTF-8',
10461
        bool $cleanUtf8 = false
10462
    ) {
10463 5
        if ($haystack === '' || $needle === '') {
10464 2
            return false;
10465
        }
10466
10467 5
        if ($length === 0) {
10468 2
            return 0;
10469
        }
10470
10471 5
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10472 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10473
        }
10474
10475 5
        if ($cleanUtf8 === true) {
10476
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10477
            // if invalid characters are found in $haystack before $needle
10478
            $needle = self::clean($needle);
10479
            $haystack = self::clean($haystack);
10480
        }
10481
10482 5
        if ($offset || $length > 0) {
10483 2
            if ($length === null) {
10484 2
                $lengthTmp = self::strlen($haystack, $encoding);
10485 2
                if ($lengthTmp === false) {
10486
                    return false;
10487
                }
10488 2
                $length = (int) $lengthTmp;
10489
            }
10490
10491 2
            if ($encoding === 'UTF-8') {
10492 2
                $haystack = (string) \mb_substr($haystack, $offset, $length);
10493
            } else {
10494 2
                $haystack = (string) \mb_substr($haystack, $offset, $length, $encoding);
10495
            }
10496
        }
10497
10498
        if (
10499 5
            $encoding !== 'UTF-8'
10500
            &&
10501 5
            self::$SUPPORT['mbstring'] === false
10502
        ) {
10503
            \trigger_error('UTF8::substr_count() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10504
        }
10505
10506 5
        if (self::$SUPPORT['mbstring'] === true) {
10507 5
            if ($encoding === 'UTF-8') {
10508 5
                return \mb_substr_count($haystack, $needle);
10509
            }
10510
10511 2
            return \mb_substr_count($haystack, $needle, $encoding);
10512
        }
10513
10514
        \preg_match_all('/' . \preg_quote($needle, '/') . '/us', $haystack, $matches, \PREG_SET_ORDER);
10515
10516
        return \count($matches);
10517
    }
10518
10519
    /**
10520
     * Count the number of substring occurrences.
10521
     *
10522
     * @param string $haystack <p>
10523
     *                         The string being checked.
10524
     *                         </p>
10525
     * @param string $needle   <p>
10526
     *                         The string being found.
10527
     *                         </p>
10528
     * @param int    $offset   [optional] <p>
10529
     *                         The offset where to start counting
10530
     *                         </p>
10531
     * @param int    $length   [optional] <p>
10532
     *                         The maximum length after the specified offset to search for the
10533
     *                         substring. It outputs a warning if the offset plus the length is
10534
     *                         greater than the haystack length.
10535
     *                         </p>
10536
     *
10537
     * @return false|int the number of times the
10538
     *                   needle substring occurs in the
10539
     *                   haystack string
10540
     */
10541
    public static function substr_count_in_byte(
10542
        string $haystack,
10543
        string $needle,
10544
        int $offset = 0,
10545
        int $length = null
10546
    ) {
10547
        if ($haystack === '' || $needle === '') {
10548
            return 0;
10549
        }
10550
10551
        if (
10552
            ($offset || $length !== null)
10553
            &&
10554
            self::$SUPPORT['mbstring_func_overload'] === true
10555
        ) {
10556
            if ($length === null) {
10557
                $lengthTmp = self::strlen($haystack);
10558
                if ($lengthTmp === false) {
10559
                    return false;
10560
                }
10561
                $length = (int) $lengthTmp;
10562
            }
10563
10564
            if (
10565
                (
10566
                    $length !== 0
10567
                    &&
10568
                    $offset !== 0
10569
                )
10570
                &&
10571
                ($length + $offset) <= 0
10572
                &&
10573
                Bootup::is_php('7.1') === false // output from "substr_count()" have changed in PHP 7.1
10574
            ) {
10575
                return false;
10576
            }
10577
10578
            $haystackTmp = \substr($haystack, $offset, $length);
10579
            if ($haystackTmp === false) {
10580
                $haystackTmp = '';
10581
            }
10582
            $haystack = (string) $haystackTmp;
10583
        }
10584
10585
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10586
            // "mb_" is available if overload is used, so use it ...
10587
            return \mb_substr_count($haystack, $needle, 'CP850'); // 8-BIT
10588
        }
10589
10590
        if ($length === null) {
10591
            return \substr_count($haystack, $needle, $offset);
10592
        }
10593
10594
        return \substr_count($haystack, $needle, $offset, $length);
10595
    }
10596
10597
    /**
10598
     * Returns the number of occurrences of $substring in the given string.
10599
     * By default, the comparison is case-sensitive, but can be made insensitive
10600
     * by setting $caseSensitive to false.
10601
     *
10602
     * @param string $str           <p>The input string.</p>
10603
     * @param string $substring     <p>The substring to search for.</p>
10604
     * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
10605
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
10606
     *
10607
     * @return int
10608
     */
10609
    public static function substr_count_simple(
10610
        string $str,
10611
        string $substring,
10612
        bool $caseSensitive = true,
10613
        string $encoding = 'UTF-8'
10614
    ): int {
10615 15
        if ($str === '' || $substring === '') {
10616 2
            return 0;
10617
        }
10618
10619 13
        if ($encoding === 'UTF-8') {
10620 7
            if ($caseSensitive) {
10621
                return (int) \mb_substr_count($str, $substring);
10622
            }
10623
10624 7
            return (int) \mb_substr_count(
10625 7
                \mb_strtoupper($str),
10626 7
                \mb_strtoupper($substring)
10627
10628
            );
10629
        }
10630
10631 6
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
10632
10633 6
        if ($caseSensitive) {
10634 3
            return (int) \mb_substr_count($str, $substring, $encoding);
10635
        }
10636
10637 3
        return (int) \mb_substr_count(
10638 3
            self::strtocasefold($str, true, false, $encoding, null, false),
10639 3
            self::strtocasefold($substring, true, false, $encoding, null, false),
10640 3
            $encoding
10641
        );
10642
    }
10643
10644
    /**
10645
     * Removes an prefix ($needle) from start of the string ($haystack), case insensitive.
10646
     *
10647
     * @param string $haystack <p>The string to search in.</p>
10648
     * @param string $needle   <p>The substring to search for.</p>
10649
     *
10650
     * @return string return the sub-string
10651
     */
10652
    public static function substr_ileft(string $haystack, string $needle): string
10653
    {
10654 2
        if ($haystack === '') {
10655 2
            return '';
10656
        }
10657
10658 2
        if ($needle === '') {
10659 2
            return $haystack;
10660
        }
10661
10662 2
        if (self::str_istarts_with($haystack, $needle) === true) {
10663 2
            $haystack = (string) \mb_substr($haystack, (int) self::strlen($needle));
10664
        }
10665
10666 2
        return $haystack;
10667
    }
10668
10669
    /**
10670
     * Get part of a string process in bytes.
10671
     *
10672
     * @param string $str    <p>The string being checked.</p>
10673
     * @param int    $offset <p>The first position used in str.</p>
10674
     * @param int    $length [optional] <p>The maximum length of the returned string.</p>
10675
     *
10676
     * @return false|string
10677
     *                      The portion of <i>str</i> specified by the <i>offset</i> and
10678
     *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
10679
     *                      characters long, <b>FALSE</b> will be returned.
10680
     */
10681
    public static function substr_in_byte(string $str, int $offset = 0, int $length = null)
10682
    {
10683
        // empty string
10684
        if ($str === '' || $length === 0) {
10685
            return '';
10686
        }
10687
10688
        // whole string
10689
        if (!$offset && $length === null) {
10690
            return $str;
10691
        }
10692
10693
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10694
            // "mb_" is available if overload is used, so use it ...
10695
            return \mb_substr($str, $offset, $length, 'CP850'); // 8-BIT
10696
        }
10697
10698
        return \substr($str, $offset, $length ?? 2147483647);
10699
    }
10700
10701
    /**
10702
     * Removes an suffix ($needle) from end of the string ($haystack), case insensitive.
10703
     *
10704
     * @param string $haystack <p>The string to search in.</p>
10705
     * @param string $needle   <p>The substring to search for.</p>
10706
     *
10707
     * @return string return the sub-string
10708
     */
10709
    public static function substr_iright(string $haystack, string $needle): string
10710
    {
10711 2
        if ($haystack === '') {
10712 2
            return '';
10713
        }
10714
10715 2
        if ($needle === '') {
10716 2
            return $haystack;
10717
        }
10718
10719 2
        if (self::str_iends_with($haystack, $needle) === true) {
10720 2
            $haystack = (string) \mb_substr($haystack, 0, (int) self::strlen($haystack) - (int) self::strlen($needle));
10721
        }
10722
10723 2
        return $haystack;
10724
    }
10725
10726
    /**
10727
     * Removes an prefix ($needle) from start of the string ($haystack).
10728
     *
10729
     * @param string $haystack <p>The string to search in.</p>
10730
     * @param string $needle   <p>The substring to search for.</p>
10731
     *
10732
     * @return string return the sub-string
10733
     */
10734
    public static function substr_left(string $haystack, string $needle): string
10735
    {
10736 2
        if ($haystack === '') {
10737 2
            return '';
10738
        }
10739
10740 2
        if ($needle === '') {
10741 2
            return $haystack;
10742
        }
10743
10744 2
        if (self::str_starts_with($haystack, $needle) === true) {
10745 2
            $haystack = (string) \mb_substr($haystack, (int) self::strlen($needle));
10746
        }
10747
10748 2
        return $haystack;
10749
    }
10750
10751
    /**
10752
     * Replace text within a portion of a string.
10753
     *
10754
     * source: https://gist.github.com/stemar/8287074
10755
     *
10756
     * @param string|string[] $str         <p>The input string or an array of stings.</p>
10757
     * @param string|string[] $replacement <p>The replacement string or an array of stings.</p>
10758
     * @param int|int[]       $offset      <p>
10759
     *                                     If start is positive, the replacing will begin at the start'th offset
10760
     *                                     into string.
10761
     *                                     <br><br>
10762
     *                                     If start is negative, the replacing will begin at the start'th character
10763
     *                                     from the end of string.
10764
     *                                     </p>
10765
     * @param int|int[]|null  $length      [optional] <p>If given and is positive, it represents the length of the
10766
     *                                     portion of string which is to be replaced. If it is negative, it
10767
     *                                     represents the number of characters from the end of string at which to
10768
     *                                     stop replacing. If it is not given, then it will default to strlen(
10769
     *                                     string ); i.e. end the replacing at the end of string. Of course, if
10770
     *                                     length is zero then this function will have the effect of inserting
10771
     *                                     replacement into string at the given start offset.</p>
10772
     * @param string          $encoding    [optional] <p>Set the charset for e.g. "mb_" function</p>
10773
     *
10774
     * @return string|string[] The result string is returned. If string is an array then array is returned.
10775
     */
10776
    public static function substr_replace(
10777
        $str,
10778
        $replacement,
10779
        $offset,
10780
        $length = null,
10781
        string $encoding = 'UTF-8'
10782
    ) {
10783 10
        if (\is_array($str) === true) {
10784 1
            $num = \count($str);
10785
10786
            // the replacement
10787 1
            if (\is_array($replacement) === true) {
10788 1
                $replacement = \array_slice($replacement, 0, $num);
10789
            } else {
10790 1
                $replacement = \array_pad([$replacement], $num, $replacement);
10791
            }
10792
10793
            // the offset
10794 1
            if (\is_array($offset) === true) {
10795 1
                $offset = \array_slice($offset, 0, $num);
10796 1
                foreach ($offset as &$valueTmp) {
10797 1
                    $valueTmp = (int) $valueTmp === $valueTmp ? $valueTmp : 0;
10798
                }
10799 1
                unset($valueTmp);
10800
            } else {
10801 1
                $offset = \array_pad([$offset], $num, $offset);
10802
            }
10803
10804
            // the length
10805 1
            if ($length === null) {
10806 1
                $length = \array_fill(0, $num, 0);
10807 1
            } elseif (\is_array($length) === true) {
10808 1
                $length = \array_slice($length, 0, $num);
10809 1
                foreach ($length as &$valueTmpV2) {
10810 1
                    $valueTmpV2 = (int) $valueTmpV2 === $valueTmpV2 ? $valueTmpV2 : $num;
10811
                }
10812 1
                unset($valueTmpV2);
10813
            } else {
10814 1
                $length = \array_pad([$length], $num, $length);
10815
            }
10816
10817
            // recursive call
10818 1
            return \array_map([self::class, 'substr_replace'], $str, $replacement, $offset, $length);
10819
        }
10820
10821 10
        if (\is_array($replacement) === true) {
10822 1
            if (\count($replacement) > 0) {
10823 1
                $replacement = $replacement[0];
10824
            } else {
10825 1
                $replacement = '';
10826
            }
10827
        }
10828
10829
        // init
10830 10
        $str = (string) $str;
10831 10
        $replacement = (string) $replacement;
10832
10833 10
        if (\is_array($length) === true) {
10834
            throw new \InvalidArgumentException('Parameter "$length" can only be an array, if "$str" is also an array.');
10835
        }
10836
10837 10
        if (\is_array($offset) === true) {
10838
            throw new \InvalidArgumentException('Parameter "$offset" can only be an array, if "$str" is also an array.');
10839
        }
10840
10841 10
        if ($str === '') {
10842 1
            return $replacement;
10843
        }
10844
10845 9
        if (self::$SUPPORT['mbstring'] === true) {
10846 9
            $string_length = (int) self::strlen($str, $encoding);
10847
10848 9
            if ($offset < 0) {
10849 1
                $offset = (int) \max(0, $string_length + $offset);
10850 9
            } elseif ($offset > $string_length) {
10851 1
                $offset = $string_length;
10852
            }
10853
10854 9
            if ($length !== null && $length < 0) {
10855 1
                $length = (int) \max(0, $string_length - $offset + $length);
10856 9
            } elseif ($length === null || $length > $string_length) {
10857 4
                $length = $string_length;
10858
            }
10859
10860
            /** @noinspection AdditionOperationOnArraysInspection */
10861 9
            if (($offset + $length) > $string_length) {
10862 4
                $length = $string_length - $offset;
10863
            }
10864
10865
            /** @noinspection AdditionOperationOnArraysInspection */
10866 9
            return ((string) \mb_substr($str, 0, $offset, $encoding)) .
10867 9
                   $replacement .
10868 9
                   ((string) \mb_substr($str, $offset + $length, $string_length - $offset - $length, $encoding));
10869
        }
10870
10871
        //
10872
        // fallback for ascii only
10873
        //
10874
10875
        if (self::is_ascii($str)) {
10876
            return ($length === null) ?
10877
                \substr_replace($str, $replacement, $offset) :
10878
                \substr_replace($str, $replacement, $offset, $length);
10879
        }
10880
10881
        //
10882
        // fallback via vanilla php
10883
        //
10884
10885
        \preg_match_all('/./us', $str, $smatches);
10886
        \preg_match_all('/./us', $replacement, $rmatches);
10887
10888
        if ($length === null) {
10889
            $lengthTmp = self::strlen($str, $encoding);
10890
            if ($lengthTmp === false) {
10891
                // e.g.: non mbstring support + invalid chars
10892
                return '';
10893
            }
10894
            $length = (int) $lengthTmp;
10895
        }
10896
10897
        \array_splice($smatches[0], $offset, $length, $rmatches[0]);
10898
10899
        return \implode('', $smatches[0]);
10900
    }
10901
10902
    /**
10903
     * Removes an suffix ($needle) from end of the string ($haystack).
10904
     *
10905
     * @param string $haystack <p>The string to search in.</p>
10906
     * @param string $needle   <p>The substring to search for.</p>
10907
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
10908
     *
10909
     * @return string return the sub-string
10910
     */
10911
    public static function substr_right(
10912
        string $haystack,
10913
        string $needle,
10914
        string $encoding = 'UTF-8'
10915
    ): string {
10916 2
        if ($haystack === '') {
10917 2
            return '';
10918
        }
10919
10920 2
        if ($needle === '') {
10921 2
            return $haystack;
10922
        }
10923
10924
        if (
10925 2
            $encoding === 'UTF-8'
10926
            &&
10927 2
            \substr($haystack, -\strlen($needle)) === $needle
10928
        ) {
10929 2
            return (string) \mb_substr($haystack, 0, (int) \mb_strlen($haystack) - (int) \mb_strlen($needle));
10930
        }
10931
10932 2
        if (\substr($haystack, -\strlen($needle)) === $needle) {
10933
            return (string) self::substr(
10934
                $haystack,
10935
                0,
10936
                (int) self::strlen($haystack, $encoding) - (int) self::strlen($needle, $encoding),
10937
                $encoding
10938
            );
10939
        }
10940
10941 2
        return $haystack;
10942
    }
10943
10944
    /**
10945
     * Returns a case swapped version of the string.
10946
     *
10947
     * @param string $str       <p>The input string.</p>
10948
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
10949
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10950
     *
10951
     * @return string each character's case swapped
10952
     */
10953
    public static function swapCase(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
10954
    {
10955 6
        if ($str === '') {
10956 1
            return '';
10957
        }
10958
10959 6
        if ($cleanUtf8 === true) {
10960
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10961
            // if invalid characters are found in $haystack before $needle
10962 2
            $str = self::clean($str);
10963
        }
10964
10965 6
        if ($encoding === 'UTF-8') {
10966 4
            return (string) (\mb_strtolower($str) ^ \mb_strtoupper($str) ^ $str);
10967
        }
10968
10969 4
        return (string) (self::strtolower($str, $encoding) ^ self::strtoupper($str, $encoding) ^ $str);
10970
    }
10971
10972
    /**
10973
     * Checks whether symfony-polyfills are used.
10974
     *
10975
     * @return bool
10976
     *              <strong>true</strong> if in use, <strong>false</strong> otherwise
10977
     */
10978
    public static function symfony_polyfill_used(): bool
10979
    {
10980
        // init
10981
        $return = false;
10982
10983
        $returnTmp = \extension_loaded('mbstring');
10984
        if ($returnTmp === false && \function_exists('mb_strlen')) {
10985
            $return = true;
10986
        }
10987
10988
        $returnTmp = \extension_loaded('iconv');
10989
        if ($returnTmp === false && \function_exists('iconv')) {
10990
            $return = true;
10991
        }
10992
10993
        return $return;
10994
    }
10995
10996
    /**
10997
     * @param string $str
10998
     * @param int    $tabLength
10999
     *
11000
     * @return string
11001
     */
11002
    public static function tabs_to_spaces(string $str, int $tabLength = 4): string
11003
    {
11004 6
        if ($tabLength === 4) {
11005 3
            $spaces = '    ';
11006 3
        } elseif ($tabLength === 2) {
11007 1
            $spaces = '  ';
11008
        } else {
11009 2
            $spaces = \str_repeat(' ', $tabLength);
11010
        }
11011
11012 6
        return \str_replace("\t", $spaces, $str);
11013
    }
11014
11015
    /**
11016
     * Converts the first character of each word in the string to uppercase
11017
     * and all other chars to lowercase.
11018
     *
11019
     * @param string      $str                   <p>The input string.</p>
11020
     * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
11021
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
11022
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
11023
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
11024
     *
11025
     * @return string string with all characters of $str being title-cased
11026
     */
11027
    public static function titlecase(
11028
        string $str,
11029
        string $encoding = 'UTF-8',
11030
        bool $cleanUtf8 = false,
11031
        string $lang = null,
11032
        bool $tryToKeepStringLength = false
11033
    ): string {
11034 5
        if ($cleanUtf8 === true) {
11035
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11036
            // if invalid characters are found in $haystack before $needle
11037
            $str = self::clean($str);
11038
        }
11039
11040 5
        if ($lang === null && $tryToKeepStringLength === false) {
11041 5
            if ($encoding === 'UTF-8') {
11042 3
                return \mb_convert_case($str, \MB_CASE_TITLE);
11043
            }
11044
11045 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11046
11047 2
            return \mb_convert_case($str, \MB_CASE_TITLE, $encoding);
11048
        }
11049
11050
        return self::str_titleize($str, null, $encoding, false, $lang, $tryToKeepStringLength, false);
11051
    }
11052
11053
    /**
11054
     * alias for "UTF8::to_ascii()"
11055
     *
11056
     * @param string $str
11057
     * @param string $subst_chr
11058
     * @param bool   $strict
11059
     *
11060
     * @return string
11061
     *
11062
     * @see UTF8::to_ascii()
11063
     * @deprecated <p>use "UTF8::to_ascii()"</p>
11064
     */
11065
    public static function toAscii(string $str, string $subst_chr = '?', bool $strict = false): string
11066
    {
11067 7
        return self::to_ascii($str, $subst_chr, $strict);
11068
    }
11069
11070
    /**
11071
     * alias for "UTF8::to_iso8859()"
11072
     *
11073
     * @param string|string[] $str
11074
     *
11075
     * @return string|string[]
11076
     *
11077
     * @see UTF8::to_iso8859()
11078
     * @deprecated <p>use "UTF8::to_iso8859()"</p>
11079
     */
11080
    public static function toIso8859($str)
11081
    {
11082 2
        return self::to_iso8859($str);
11083
    }
11084
11085
    /**
11086
     * alias for "UTF8::to_latin1()"
11087
     *
11088
     * @param string|string[] $str
11089
     *
11090
     * @return string|string[]
11091
     *
11092
     * @see UTF8::to_latin1()
11093
     * @deprecated <p>use "UTF8::to_latin1()"</p>
11094
     */
11095
    public static function toLatin1($str)
11096
    {
11097 2
        return self::to_latin1($str);
11098
    }
11099
11100
    /**
11101
     * alias for "UTF8::to_utf8()"
11102
     *
11103
     * @param string|string[] $str
11104
     *
11105
     * @return string|string[]
11106
     *
11107
     * @see UTF8::to_utf8()
11108
     * @deprecated <p>use "UTF8::to_utf8()"</p>
11109
     */
11110
    public static function toUTF8($str)
11111
    {
11112 2
        return self::to_utf8($str);
11113
    }
11114
11115
    /**
11116
     * Convert a string into ASCII.
11117
     *
11118
     * @param string $str     <p>The input string.</p>
11119
     * @param string $unknown [optional] <p>Character use if character unknown. (default is ?)</p>
11120
     * @param bool   $strict  [optional] <p>Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad
11121
     *                        performance</p>
11122
     *
11123
     * @return string
11124
     */
11125
    public static function to_ascii(string $str, string $unknown = '?', bool $strict = false): string
11126
    {
11127 38
        static $UTF8_TO_ASCII;
11128
11129 38
        if ($str === '') {
11130 3
            return '';
11131
        }
11132
11133
        // check if we only have ASCII, first (better performance)
11134 35
        if (self::is_ascii($str) === true) {
11135 9
            return $str;
11136
        }
11137
11138 28
        $str = self::clean(
11139 28
            $str,
11140 28
            true,
11141 28
            true,
11142 28
            true,
11143 28
            false,
11144 28
            true,
11145 28
            true
11146
        );
11147
11148
        // check again, if we only have ASCII, now ...
11149 28
        if (self::is_ascii($str) === true) {
11150 10
            return $str;
11151
        }
11152
11153
        if (
11154 19
            $strict === true
11155
            &&
11156 19
            self::$SUPPORT['intl'] === true
11157
        ) {
11158
            // INFO: https://unicode.org/cldr/utility/character.jsp?a=%E2%84%8C
11159
            /** @noinspection PhpComposerExtensionStubsInspection */
11160
            /** @noinspection UnnecessaryCastingInspection */
11161 1
            $str = (string) \transliterator_transliterate('NFKC; [:Nonspacing Mark:] Remove; NFKC; Any-Latin; Latin-ASCII;', $str);
11162
11163
            // check again, if we only have ASCII, now ...
11164 1
            if (self::is_ascii($str) === true) {
11165 1
                return $str;
11166
            }
11167
        }
11168
11169 19
        if (self::$ORD === null) {
11170
            self::$ORD = self::getData('ord');
11171
        }
11172
11173 19
        \preg_match_all('/.{1}|[^\x00]{1,1}$/us', $str, $ar);
11174 19
        $chars = $ar[0];
11175 19
        $ord = null;
11176 19
        foreach ($chars as &$c) {
11177 19
            $ordC0 = self::$ORD[$c[0]];
11178
11179 19
            if ($ordC0 >= 0 && $ordC0 <= 127) {
11180 15
                continue;
11181
            }
11182
11183 19
            $ordC1 = self::$ORD[$c[1]];
11184
11185
            // ASCII - next please
11186 19
            if ($ordC0 >= 192 && $ordC0 <= 223) {
11187 17
                $ord = ($ordC0 - 192) * 64 + ($ordC1 - 128);
11188
            }
11189
11190 19
            if ($ordC0 >= 224) {
11191 8
                $ordC2 = self::$ORD[$c[2]];
11192
11193 8
                if ($ordC0 <= 239) {
11194 7
                    $ord = ($ordC0 - 224) * 4096 + ($ordC1 - 128) * 64 + ($ordC2 - 128);
11195
                }
11196
11197 8
                if ($ordC0 >= 240) {
11198 2
                    $ordC3 = self::$ORD[$c[3]];
11199
11200 2
                    if ($ordC0 <= 247) {
11201 2
                        $ord = ($ordC0 - 240) * 262144 + ($ordC1 - 128) * 4096 + ($ordC2 - 128) * 64 + ($ordC3 - 128);
11202
                    }
11203
11204 2
                    if ($ordC0 >= 248) {
11205
                        $ordC4 = self::$ORD[$c[4]];
11206
11207
                        if ($ordC0 <= 251) {
11208
                            $ord = ($ordC0 - 248) * 16777216 + ($ordC1 - 128) * 262144 + ($ordC2 - 128) * 4096 + ($ordC3 - 128) * 64 + ($ordC4 - 128);
11209
                        }
11210
11211
                        if ($ordC0 >= 252) {
11212
                            $ordC5 = self::$ORD[$c[5]];
11213
11214
                            if ($ordC0 <= 253) {
11215
                                $ord = ($ordC0 - 252) * 1073741824 + ($ordC1 - 128) * 16777216 + ($ordC2 - 128) * 262144 + ($ordC3 - 128) * 4096 + ($ordC4 - 128) * 64 + ($ordC5 - 128);
11216
                            }
11217
                        }
11218
                    }
11219
                }
11220
            }
11221
11222 19
            if ($ordC0 === 254 || $ordC0 === 255) {
11223
                $c = $unknown;
11224
11225
                continue;
11226
            }
11227
11228 19
            if ($ord === null) {
11229
                $c = $unknown;
11230
11231
                continue;
11232
            }
11233
11234 19
            $bank = $ord >> 8;
11235 19
            if (!isset($UTF8_TO_ASCII[$bank])) {
11236 9
                $UTF8_TO_ASCII[$bank] = self::getDataIfExists(\sprintf('x%02x', $bank));
11237 9
                if ($UTF8_TO_ASCII[$bank] === false) {
11238 2
                    $UTF8_TO_ASCII[$bank] = [];
11239
                }
11240
            }
11241
11242 19
            $newchar = $ord & 255;
11243
11244
            /** @noinspection NullCoalescingOperatorCanBeUsedInspection */
11245 19
            if (isset($UTF8_TO_ASCII[$bank][$newchar])) {
11246
11247
                // keep for debugging
11248
                /*
11249
                echo "file: " . sprintf('x%02x', $bank) . "\n";
11250
                echo "char: " . $c . "\n";
11251
                echo "ord: " . $ord . "\n";
11252
                echo "newchar: " . $newchar . "\n";
11253
                echo "ascii: " . $UTF8_TO_ASCII[$bank][$newchar] . "\n";
11254
                echo "bank:" . $bank . "\n\n";
11255
                 */
11256
11257 18
                $c = $UTF8_TO_ASCII[$bank][$newchar];
11258
            } else {
11259
11260
                // keep for debugging missing chars
11261
                /*
11262
                echo "file: " . sprintf('x%02x', $bank) . "\n";
11263
                echo "char: " . $c . "\n";
11264
                echo "ord: " . $ord . "\n";
11265
                echo "newchar: " . $newchar . "\n";
11266
                echo "bank:" . $bank . "\n\n";
11267
                 */
11268
11269 19
                $c = $unknown;
11270
            }
11271
        }
11272
11273 19
        return \implode('', $chars);
11274
    }
11275
11276
    /**
11277
     * @param mixed $str
11278
     *
11279
     * @return bool
11280
     */
11281
    public static function to_boolean($str): bool
11282
    {
11283
        // init
11284 19
        $str = (string) $str;
11285
11286 19
        if ($str === '') {
11287 2
            return false;
11288
        }
11289
11290
        // Info: http://php.net/manual/en/filter.filters.validate.php
11291
        $map = [
11292 17
            'true'  => true,
11293
            '1'     => true,
11294
            'on'    => true,
11295
            'yes'   => true,
11296
            'false' => false,
11297
            '0'     => false,
11298
            'off'   => false,
11299
            'no'    => false,
11300
        ];
11301
11302 17
        if (isset($map[$str])) {
11303 11
            return $map[$str];
11304
        }
11305
11306 6
        $key = \strtolower($str);
11307 6
        if (isset($map[$key])) {
11308 2
            return $map[$key];
11309
        }
11310
11311 4
        if (\is_numeric($str)) {
11312 2
            return ((float) $str + 0) > 0;
11313
        }
11314
11315 2
        return (bool) \trim($str);
11316
    }
11317
11318
    /**
11319
     * Convert given string to safe filename (and keep string case).
11320
     *
11321
     * @param string $string
11322
     * @param bool   $use_transliterate No transliteration, conversion etc. is done by default - unsafe characters are
11323
     *                                  simply replaced with hyphen.
11324
     * @param string $fallback_char
11325
     *
11326
     * @return string
11327
     */
11328
    public static function to_filename(string $string, bool $use_transliterate = false, string $fallback_char = '-'): string
11329
    {
11330 1
        if ($use_transliterate === true) {
11331 1
            $string = self::str_transliterate($string, $fallback_char);
11332
        }
11333
11334 1
        $fallback_char_escaped = \preg_quote($fallback_char, '/');
11335
11336 1
        $string = (string) \preg_replace(
11337
            [
11338 1
                '/[^' . $fallback_char_escaped . '\.\-a-zA-Z0-9\s]/', // 1) remove un-needed chars
11339 1
                '/[\s]+/u',                                           // 2) convert spaces to $fallback_char
11340 1
                '/[' . $fallback_char_escaped . ']+/u',               // 3) remove double $fallback_char's
11341
            ],
11342
            [
11343 1
                '',
11344 1
                $fallback_char,
11345 1
                $fallback_char,
11346
            ],
11347 1
            $string
11348
        );
11349
11350
        // trim "$fallback_char" from beginning and end of the string
11351 1
        return \trim($string, $fallback_char);
11352
    }
11353
11354
    /**
11355
     * Convert a string into "ISO-8859"-encoding (Latin-1).
11356
     *
11357
     * @param string|string[] $str
11358
     *
11359
     * @return string|string[]
11360
     */
11361
    public static function to_iso8859($str)
11362
    {
11363 8
        if (\is_array($str) === true) {
11364 2
            foreach ($str as $k => &$v) {
11365 2
                $v = self::to_iso8859($v);
11366
            }
11367
11368 2
            return $str;
11369
        }
11370
11371 8
        $str = (string) $str;
11372 8
        if ($str === '') {
11373 2
            return '';
11374
        }
11375
11376 8
        return self::utf8_decode($str);
11377
    }
11378
11379
    /**
11380
     * alias for "UTF8::to_iso8859()"
11381
     *
11382
     * @param string|string[] $str
11383
     *
11384
     * @return string|string[]
11385
     *
11386
     * @see UTF8::to_iso8859()
11387
     */
11388
    public static function to_latin1($str)
11389
    {
11390 2
        return self::to_iso8859($str);
11391
    }
11392
11393
    /**
11394
     * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
11395
     *
11396
     * <ul>
11397
     * <li>It decode UTF-8 codepoints and unicode escape sequences.</li>
11398
     * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li>
11399
     * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
11400
     * case.</li>
11401
     * </ul>
11402
     *
11403
     * @param string|string[] $str                    <p>Any string or array.</p>
11404
     * @param bool            $decodeHtmlEntityToUtf8 <p>Set to true, if you need to decode html-entities.</p>
11405
     *
11406
     * @return string|string[] the UTF-8 encoded string
11407
     */
11408
    public static function to_utf8($str, bool $decodeHtmlEntityToUtf8 = false)
11409
    {
11410 41
        if (\is_array($str) === true) {
11411 4
            foreach ($str as $k => &$v) {
11412 4
                $v = self::to_utf8($v, $decodeHtmlEntityToUtf8);
11413
            }
11414
11415 4
            return $str;
11416
        }
11417
11418 41
        $str = (string) $str;
11419 41
        if ($str === '') {
11420 6
            return $str;
11421
        }
11422
11423 41
        $max = \strlen($str);
11424 41
        $buf = '';
11425
11426 41
        for ($i = 0; $i < $max; ++$i) {
11427 41
            $c1 = $str[$i];
11428
11429 41
            if ($c1 >= "\xC0") { // should be converted to UTF8, if it's not UTF8 already
11430
11431 37
                if ($c1 <= "\xDF") { // looks like 2 bytes UTF8
11432
11433 34
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
11434
11435 34
                    if ($c2 >= "\x80" && $c2 <= "\xBF") { // yeah, almost sure it's UTF8 already
11436 20
                        $buf .= $c1 . $c2;
11437 20
                        ++$i;
11438
                    } else { // not valid UTF8 - convert it
11439 34
                        $buf .= self::to_utf8_convert_helper($c1);
11440
                    }
11441 34
                } elseif ($c1 >= "\xE0" && $c1 <= "\xEF") { // looks like 3 bytes UTF8
11442
11443 33
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
11444 33
                    $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
11445
11446 33
                    if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF") { // yeah, almost sure it's UTF8 already
11447 15
                        $buf .= $c1 . $c2 . $c3;
11448 15
                        $i += 2;
11449
                    } else { // not valid UTF8 - convert it
11450 33
                        $buf .= self::to_utf8_convert_helper($c1);
11451
                    }
11452 26
                } elseif ($c1 >= "\xF0" && $c1 <= "\xF7") { // looks like 4 bytes UTF8
11453
11454 26
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
11455 26
                    $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
11456 26
                    $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
11457
11458 26
                    if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF" && $c4 >= "\x80" && $c4 <= "\xBF") { // yeah, almost sure it's UTF8 already
11459 8
                        $buf .= $c1 . $c2 . $c3 . $c4;
11460 8
                        $i += 3;
11461
                    } else { // not valid UTF8 - convert it
11462 26
                        $buf .= self::to_utf8_convert_helper($c1);
11463
                    }
11464
                } else { // doesn't look like UTF8, but should be converted
11465
11466 37
                    $buf .= self::to_utf8_convert_helper($c1);
11467
                }
11468 38
            } elseif (($c1 & "\xC0") === "\x80") { // needs conversion
11469
11470 4
                $buf .= self::to_utf8_convert_helper($c1);
11471
            } else { // it doesn't need conversion
11472
11473 38
                $buf .= $c1;
11474
            }
11475
        }
11476
11477
        // decode unicode escape sequences + unicode surrogate pairs
11478 41
        $buf = \preg_replace_callback(
11479 41
            '/\\\\u([dD][89abAB][0-9a-fA-F]{2})\\\\u([dD][cdefCDEF][\da-fA-F]{2})|\\\\u([0-9a-fA-F]{4})/',
11480
            /**
11481
             * @param array $matches
11482
             *
11483
             * @return string
11484
             */
11485
            static function (array $matches): string {
11486 12
                if (isset($matches[3])) {
11487 12
                    $cp = (int) \hexdec($matches[3]);
11488
                } else {
11489
                    // http://unicode.org/faq/utf_bom.html#utf16-4
11490
                    $cp = ((int) \hexdec($matches[1]) << 10)
11491
                          + (int) \hexdec($matches[2])
11492
                          + 0x10000
11493
                          - (0xD800 << 10)
11494
                          - 0xDC00;
11495
                }
11496
11497
                // https://github.com/php/php-src/blob/php-7.3.2/ext/standard/html.c#L471
11498
                //
11499
                // php_utf32_utf8(unsigned char *buf, unsigned k)
11500
11501 12
                if ($cp < 0x80) {
11502 8
                    return (string) self::chr($cp);
11503
                }
11504
11505 9
                if ($cp < 0xA0) {
11506
                    /** @noinspection UnnecessaryCastingInspection */
11507
                    return (string) self::chr(0xC0 | $cp >> 6) . (string) self::chr(0x80 | $cp & 0x3F);
11508
                }
11509
11510 9
                return self::decimal_to_chr($cp);
11511 41
            },
11512 41
            $buf
11513
        );
11514
11515 41
        if ($buf === null) {
11516
            return '';
11517
        }
11518
11519
        // decode UTF-8 codepoints
11520 41
        if ($decodeHtmlEntityToUtf8 === true) {
11521 2
            $buf = self::html_entity_decode($buf);
11522
        }
11523
11524 41
        return $buf;
11525
    }
11526
11527
    /**
11528
     * Strip whitespace or other characters from beginning or end of a UTF-8 string.
11529
     *
11530
     * INFO: This is slower then "trim()"
11531
     *
11532
     * We can only use the original-function, if we use <= 7-Bit in the string / chars
11533
     * but the check for ACSII (7-Bit) cost more time, then we can safe here.
11534
     *
11535
     * @param string      $str   <p>The string to be trimmed</p>
11536
     * @param string|null $chars [optional] <p>Optional characters to be stripped</p>
11537
     *
11538
     * @return string the trimmed string
11539
     */
11540
    public static function trim(string $str = '', string $chars = null): string
11541
    {
11542 55
        if ($str === '') {
11543 9
            return '';
11544
        }
11545
11546 48
        if ($chars) {
11547 27
            $chars = \preg_quote($chars, '/');
11548 27
            $pattern = "^[${chars}]+|[${chars}]+\$";
11549
        } else {
11550 21
            $pattern = "^[\s]+|[\s]+\$";
11551
        }
11552
11553 48
        if (self::$SUPPORT['mbstring'] === true) {
11554
            /** @noinspection PhpComposerExtensionStubsInspection */
11555 48
            return (string) \mb_ereg_replace($pattern, '', $str);
11556
        }
11557
11558 8
        return self::regex_replace($str, $pattern, '', '', '/');
11559
    }
11560
11561
    /**
11562
     * Makes string's first char uppercase.
11563
     *
11564
     * @param string      $str                   <p>The input string.</p>
11565
     * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
11566
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
11567
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
11568
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
11569
     *
11570
     * @return string the resulting string
11571
     */
11572
    public static function ucfirst(
11573
        string $str,
11574
        string $encoding = 'UTF-8',
11575
        bool $cleanUtf8 = false,
11576
        string $lang = null,
11577
        bool $tryToKeepStringLength = false
11578
    ): string {
11579 69
        if ($str === '') {
11580 3
            return '';
11581
        }
11582
11583 68
        if ($cleanUtf8 === true) {
11584
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11585
            // if invalid characters are found in $haystack before $needle
11586 1
            $str = self::clean($str);
11587
        }
11588
11589 68
        $useMbFunction = $lang === null && $tryToKeepStringLength === false;
11590
11591 68
        if ($encoding === 'UTF-8') {
11592 22
            $strPartTwo = (string) \mb_substr($str, 1);
11593
11594 22
            if ($useMbFunction === true) {
11595 22
                $strPartOne = \mb_strtoupper(
11596 22
                    (string) \mb_substr($str, 0, 1)
11597
                );
11598
            } else {
11599
                $strPartOne = self::strtoupper(
11600
                    (string) \mb_substr($str, 0, 1),
11601
                    $encoding,
11602
                    false,
11603
                    $lang,
11604 22
                    $tryToKeepStringLength
11605
                );
11606
            }
11607
        } else {
11608 47
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11609
11610 47
            $strPartTwo = (string) self::substr($str, 1, null, $encoding);
11611
11612 47
            if ($useMbFunction === true) {
11613 47
                $strPartOne = \mb_strtoupper(
11614 47
                    (string) \mb_substr($str, 0, 1, $encoding),
11615 47
                    $encoding
11616
                );
11617
            } else {
11618
                $strPartOne = self::strtoupper(
11619
                    (string) self::substr($str, 0, 1, $encoding),
11620
                    $encoding,
11621
                    false,
11622
                    $lang,
11623
                    $tryToKeepStringLength
11624
                );
11625
            }
11626
        }
11627
11628 68
        return $strPartOne . $strPartTwo;
11629
    }
11630
11631
    /**
11632
     * alias for "UTF8::ucfirst()"
11633
     *
11634
     * @param string $str
11635
     * @param string $encoding
11636
     * @param bool   $cleanUtf8
11637
     *
11638
     * @return string
11639
     *
11640
     * @see UTF8::ucfirst()
11641
     */
11642
    public static function ucword(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
11643
    {
11644 1
        return self::ucfirst($str, $encoding, $cleanUtf8);
11645
    }
11646
11647
    /**
11648
     * Uppercase for all words in the string.
11649
     *
11650
     * @param string   $str        <p>The input string.</p>
11651
     * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
11652
     * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new
11653
     *                             word.</p>
11654
     * @param string   $encoding   [optional] <p>Set the charset.</p>
11655
     * @param bool     $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
11656
     *
11657
     * @return string
11658
     */
11659
    public static function ucwords(
11660
        string $str,
11661
        array $exceptions = [],
11662
        string $charlist = '',
11663
        string $encoding = 'UTF-8',
11664
        bool $cleanUtf8 = false
11665
    ): string {
11666 8
        if (!$str) {
11667 2
            return '';
11668
        }
11669
11670
        // INFO: mb_convert_case($str, MB_CASE_TITLE);
11671
        // -> MB_CASE_TITLE didn't only uppercase the first letter, it also lowercase all other letters
11672
11673 7
        if ($cleanUtf8 === true) {
11674
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11675
            // if invalid characters are found in $haystack before $needle
11676 1
            $str = self::clean($str);
11677
        }
11678
11679 7
        $usePhpDefaultFunctions = !(bool) ($charlist . \implode('', $exceptions));
11680
11681
        if (
11682 7
            $usePhpDefaultFunctions === true
11683
            &&
11684 7
            self::is_ascii($str) === true
11685
        ) {
11686
            return \ucwords($str);
11687
        }
11688
11689 7
        $words = self::str_to_words($str, $charlist);
11690 7
        $useExceptions = \count($exceptions) > 0;
11691
11692 7
        foreach ($words as &$word) {
11693 7
            if (!$word) {
11694 7
                continue;
11695
            }
11696
11697
            if (
11698 7
                $useExceptions === false
11699
                ||
11700 7
                !\in_array($word, $exceptions, true)
11701
            ) {
11702 7
                $word = self::ucfirst($word, $encoding);
11703
            }
11704
        }
11705
11706 7
        return \implode('', $words);
11707
    }
11708
11709
    /**
11710
     * Multi decode html entity & fix urlencoded-win1252-chars.
11711
     *
11712
     * e.g:
11713
     * 'test+test'                     => 'test test'
11714
     * 'D&#252;sseldorf'               => 'Düsseldorf'
11715
     * 'D%FCsseldorf'                  => 'Düsseldorf'
11716
     * 'D&#xFC;sseldorf'               => 'Düsseldorf'
11717
     * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
11718
     * 'Düsseldorf'                   => 'Düsseldorf'
11719
     * 'D%C3%BCsseldorf'               => 'Düsseldorf'
11720
     * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
11721
     * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
11722
     *
11723
     * @param string $str          <p>The input string.</p>
11724
     * @param bool   $multi_decode <p>Decode as often as possible.</p>
11725
     *
11726
     * @return string
11727
     */
11728
    public static function urldecode(string $str, bool $multi_decode = true): string
11729
    {
11730 4
        if ($str === '') {
11731 3
            return '';
11732
        }
11733
11734
        if (
11735 4
            \strpos($str, '&') === false
11736
            &&
11737 4
            \strpos($str, '%') === false
11738
            &&
11739 4
            \strpos($str, '+') === false
11740
            &&
11741 4
            \strpos($str, '\u') === false
11742
        ) {
11743 3
            return self::fix_simple_utf8($str);
11744
        }
11745
11746 4
        $str = self::urldecode_unicode_helper($str);
11747
11748
        do {
11749 4
            $str_compare = $str;
11750
11751
            /**
11752
             * @psalm-suppress PossiblyInvalidArgument
11753
             */
11754 4
            $str = self::fix_simple_utf8(
11755 4
                \urldecode(
11756 4
                    self::html_entity_decode(
11757 4
                        self::to_utf8($str),
11758 4
                        \ENT_QUOTES | \ENT_HTML5
11759
                    )
11760
                )
11761
            );
11762 4
        } while ($multi_decode === true && $str_compare !== $str);
11763
11764 4
        return $str;
11765
    }
11766
11767
    /**
11768
     * Return a array with "urlencoded"-win1252 -> UTF-8
11769
     *
11770
     * @return string[]
11771
     *
11772
     * @deprecated <p>use the "UTF8::urldecode()" function to decode a string</p>
11773
     */
11774
    public static function urldecode_fix_win1252_chars(): array
11775
    {
11776
        return [
11777 2
            '%20' => ' ',
11778
            '%21' => '!',
11779
            '%22' => '"',
11780
            '%23' => '#',
11781
            '%24' => '$',
11782
            '%25' => '%',
11783
            '%26' => '&',
11784
            '%27' => "'",
11785
            '%28' => '(',
11786
            '%29' => ')',
11787
            '%2A' => '*',
11788
            '%2B' => '+',
11789
            '%2C' => ',',
11790
            '%2D' => '-',
11791
            '%2E' => '.',
11792
            '%2F' => '/',
11793
            '%30' => '0',
11794
            '%31' => '1',
11795
            '%32' => '2',
11796
            '%33' => '3',
11797
            '%34' => '4',
11798
            '%35' => '5',
11799
            '%36' => '6',
11800
            '%37' => '7',
11801
            '%38' => '8',
11802
            '%39' => '9',
11803
            '%3A' => ':',
11804
            '%3B' => ';',
11805
            '%3C' => '<',
11806
            '%3D' => '=',
11807
            '%3E' => '>',
11808
            '%3F' => '?',
11809
            '%40' => '@',
11810
            '%41' => 'A',
11811
            '%42' => 'B',
11812
            '%43' => 'C',
11813
            '%44' => 'D',
11814
            '%45' => 'E',
11815
            '%46' => 'F',
11816
            '%47' => 'G',
11817
            '%48' => 'H',
11818
            '%49' => 'I',
11819
            '%4A' => 'J',
11820
            '%4B' => 'K',
11821
            '%4C' => 'L',
11822
            '%4D' => 'M',
11823
            '%4E' => 'N',
11824
            '%4F' => 'O',
11825
            '%50' => 'P',
11826
            '%51' => 'Q',
11827
            '%52' => 'R',
11828
            '%53' => 'S',
11829
            '%54' => 'T',
11830
            '%55' => 'U',
11831
            '%56' => 'V',
11832
            '%57' => 'W',
11833
            '%58' => 'X',
11834
            '%59' => 'Y',
11835
            '%5A' => 'Z',
11836
            '%5B' => '[',
11837
            '%5C' => '\\',
11838
            '%5D' => ']',
11839
            '%5E' => '^',
11840
            '%5F' => '_',
11841
            '%60' => '`',
11842
            '%61' => 'a',
11843
            '%62' => 'b',
11844
            '%63' => 'c',
11845
            '%64' => 'd',
11846
            '%65' => 'e',
11847
            '%66' => 'f',
11848
            '%67' => 'g',
11849
            '%68' => 'h',
11850
            '%69' => 'i',
11851
            '%6A' => 'j',
11852
            '%6B' => 'k',
11853
            '%6C' => 'l',
11854
            '%6D' => 'm',
11855
            '%6E' => 'n',
11856
            '%6F' => 'o',
11857
            '%70' => 'p',
11858
            '%71' => 'q',
11859
            '%72' => 'r',
11860
            '%73' => 's',
11861
            '%74' => 't',
11862
            '%75' => 'u',
11863
            '%76' => 'v',
11864
            '%77' => 'w',
11865
            '%78' => 'x',
11866
            '%79' => 'y',
11867
            '%7A' => 'z',
11868
            '%7B' => '{',
11869
            '%7C' => '|',
11870
            '%7D' => '}',
11871
            '%7E' => '~',
11872
            '%7F' => '',
11873
            '%80' => '`',
11874
            '%81' => '',
11875
            '%82' => '‚',
11876
            '%83' => 'ƒ',
11877
            '%84' => '„',
11878
            '%85' => '…',
11879
            '%86' => '†',
11880
            '%87' => '‡',
11881
            '%88' => 'ˆ',
11882
            '%89' => '‰',
11883
            '%8A' => 'Š',
11884
            '%8B' => '‹',
11885
            '%8C' => 'Œ',
11886
            '%8D' => '',
11887
            '%8E' => 'Ž',
11888
            '%8F' => '',
11889
            '%90' => '',
11890
            '%91' => '‘',
11891
            '%92' => '’',
11892
            '%93' => '“',
11893
            '%94' => '”',
11894
            '%95' => '•',
11895
            '%96' => '–',
11896
            '%97' => '—',
11897
            '%98' => '˜',
11898
            '%99' => '™',
11899
            '%9A' => 'š',
11900
            '%9B' => '›',
11901
            '%9C' => 'œ',
11902
            '%9D' => '',
11903
            '%9E' => 'ž',
11904
            '%9F' => 'Ÿ',
11905
            '%A0' => '',
11906
            '%A1' => '¡',
11907
            '%A2' => '¢',
11908
            '%A3' => '£',
11909
            '%A4' => '¤',
11910
            '%A5' => '¥',
11911
            '%A6' => '¦',
11912
            '%A7' => '§',
11913
            '%A8' => '¨',
11914
            '%A9' => '©',
11915
            '%AA' => 'ª',
11916
            '%AB' => '«',
11917
            '%AC' => '¬',
11918
            '%AD' => '',
11919
            '%AE' => '®',
11920
            '%AF' => '¯',
11921
            '%B0' => '°',
11922
            '%B1' => '±',
11923
            '%B2' => '²',
11924
            '%B3' => '³',
11925
            '%B4' => '´',
11926
            '%B5' => 'µ',
11927
            '%B6' => '¶',
11928
            '%B7' => '·',
11929
            '%B8' => '¸',
11930
            '%B9' => '¹',
11931
            '%BA' => 'º',
11932
            '%BB' => '»',
11933
            '%BC' => '¼',
11934
            '%BD' => '½',
11935
            '%BE' => '¾',
11936
            '%BF' => '¿',
11937
            '%C0' => 'À',
11938
            '%C1' => 'Á',
11939
            '%C2' => 'Â',
11940
            '%C3' => 'Ã',
11941
            '%C4' => 'Ä',
11942
            '%C5' => 'Å',
11943
            '%C6' => 'Æ',
11944
            '%C7' => 'Ç',
11945
            '%C8' => 'È',
11946
            '%C9' => 'É',
11947
            '%CA' => 'Ê',
11948
            '%CB' => 'Ë',
11949
            '%CC' => 'Ì',
11950
            '%CD' => 'Í',
11951
            '%CE' => 'Î',
11952
            '%CF' => 'Ï',
11953
            '%D0' => 'Ð',
11954
            '%D1' => 'Ñ',
11955
            '%D2' => 'Ò',
11956
            '%D3' => 'Ó',
11957
            '%D4' => 'Ô',
11958
            '%D5' => 'Õ',
11959
            '%D6' => 'Ö',
11960
            '%D7' => '×',
11961
            '%D8' => 'Ø',
11962
            '%D9' => 'Ù',
11963
            '%DA' => 'Ú',
11964
            '%DB' => 'Û',
11965
            '%DC' => 'Ü',
11966
            '%DD' => 'Ý',
11967
            '%DE' => 'Þ',
11968
            '%DF' => 'ß',
11969
            '%E0' => 'à',
11970
            '%E1' => 'á',
11971
            '%E2' => 'â',
11972
            '%E3' => 'ã',
11973
            '%E4' => 'ä',
11974
            '%E5' => 'å',
11975
            '%E6' => 'æ',
11976
            '%E7' => 'ç',
11977
            '%E8' => 'è',
11978
            '%E9' => 'é',
11979
            '%EA' => 'ê',
11980
            '%EB' => 'ë',
11981
            '%EC' => 'ì',
11982
            '%ED' => 'í',
11983
            '%EE' => 'î',
11984
            '%EF' => 'ï',
11985
            '%F0' => 'ð',
11986
            '%F1' => 'ñ',
11987
            '%F2' => 'ò',
11988
            '%F3' => 'ó',
11989
            '%F4' => 'ô',
11990
            '%F5' => 'õ',
11991
            '%F6' => 'ö',
11992
            '%F7' => '÷',
11993
            '%F8' => 'ø',
11994
            '%F9' => 'ù',
11995
            '%FA' => 'ú',
11996
            '%FB' => 'û',
11997
            '%FC' => 'ü',
11998
            '%FD' => 'ý',
11999
            '%FE' => 'þ',
12000
            '%FF' => 'ÿ',
12001
        ];
12002
    }
12003
12004
    /**
12005
     * Decodes an UTF-8 string to ISO-8859-1.
12006
     *
12007
     * @param string $str           <p>The input string.</p>
12008
     * @param bool   $keepUtf8Chars
12009
     *
12010
     * @return string
12011
     */
12012
    public static function utf8_decode(string $str, bool $keepUtf8Chars = false): string
12013
    {
12014 14
        if ($str === '') {
12015 6
            return '';
12016
        }
12017
12018
        // save for later comparision
12019 14
        $str_backup = $str;
12020 14
        $len = \strlen($str);
12021
12022 14
        if (self::$ORD === null) {
12023
            self::$ORD = self::getData('ord');
12024
        }
12025
12026 14
        if (self::$CHR === null) {
12027
            self::$CHR = self::getData('chr');
12028
        }
12029
12030 14
        $noCharFound = '?';
12031
        /** @noinspection ForeachInvariantsInspection */
12032 14
        for ($i = 0, $j = 0; $i < $len; ++$i, ++$j) {
12033 14
            switch ($str[$i] & "\xF0") {
12034 14
                case "\xC0":
12035 13
                case "\xD0":
12036 13
                    $c = (self::$ORD[$str[$i] & "\x1F"] << 6) | self::$ORD[$str[++$i] & "\x3F"];
12037 13
                    $str[$j] = $c < 256 ? self::$CHR[$c] : $noCharFound;
12038
12039 13
                    break;
12040
12041
                /** @noinspection PhpMissingBreakStatementInspection */
12042 13
                case "\xF0":
12043
                    ++$i;
12044
12045
                // no break
12046
12047 13
                case "\xE0":
12048 11
                    $str[$j] = $noCharFound;
12049 11
                    $i += 2;
12050
12051 11
                    break;
12052
12053
                default:
12054 12
                    $str[$j] = $str[$i];
12055
            }
12056
        }
12057
12058 14
        $return = \substr($str, 0, $j);
12059 14
        if ($return === false) {
12060
            $return = '';
12061
        }
12062
12063
        if (
12064 14
            $keepUtf8Chars === true
12065
            &&
12066 14
            self::strlen($return) >= (int) self::strlen($str_backup)
12067
        ) {
12068 2
            return $str_backup;
12069
        }
12070
12071 14
        return $return;
12072
    }
12073
12074
    /**
12075
     * Encodes an ISO-8859-1 string to UTF-8.
12076
     *
12077
     * @param string $str <p>The input string.</p>
12078
     *
12079
     * @return string
12080
     */
12081
    public static function utf8_encode(string $str): string
12082
    {
12083 14
        if ($str === '') {
12084 14
            return '';
12085
        }
12086
12087 14
        $str = \utf8_encode($str);
12088
12089
        // the polyfill maybe return false
12090
        /** @noinspection CallableParameterUseCaseInTypeContextInspection */
12091
        /** @psalm-suppress TypeDoesNotContainType */
12092 14
        if ($str === false) {
12093
            return '';
12094
        }
12095
12096 14
        return $str;
12097
    }
12098
12099
    /**
12100
     * fix -> utf8-win1252 chars
12101
     *
12102
     * @param string $str <p>The input string.</p>
12103
     *
12104
     * @return string
12105
     *
12106
     * @deprecated <p>use "UTF8::fix_simple_utf8()"</p>
12107
     */
12108
    public static function utf8_fix_win1252_chars(string $str): string
12109
    {
12110 2
        return self::fix_simple_utf8($str);
12111
    }
12112
12113
    /**
12114
     * Returns an array with all utf8 whitespace characters.
12115
     *
12116
     * @see http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
12117
     *
12118
     * @return string[]
12119
     *                  An array with all known whitespace characters as values and the type of whitespace as keys
12120
     *                  as defined in above URL
12121
     */
12122
    public static function whitespace_table(): array
12123
    {
12124 2
        return self::$WHITESPACE_TABLE;
12125
    }
12126
12127
    /**
12128
     * Limit the number of words in a string.
12129
     *
12130
     * @param string $str      <p>The input string.</p>
12131
     * @param int    $limit    <p>The limit of words as integer.</p>
12132
     * @param string $strAddOn <p>Replacement for the striped string.</p>
12133
     *
12134
     * @return string
12135
     */
12136
    public static function words_limit(string $str, int $limit = 100, string $strAddOn = '…'): string
12137
    {
12138 2
        if ($str === '' || $limit < 1) {
12139 2
            return '';
12140
        }
12141
12142 2
        \preg_match('/^\s*+(?:\S++\s*+){1,' . $limit . '}/u', $str, $matches);
12143
12144
        if (
12145 2
            !isset($matches[0])
12146
            ||
12147 2
            \mb_strlen($str) === (int) \mb_strlen($matches[0])
12148
        ) {
12149 2
            return $str;
12150
        }
12151
12152 2
        return \rtrim($matches[0]) . $strAddOn;
12153
    }
12154
12155
    /**
12156
     * Wraps a string to a given number of characters
12157
     *
12158
     * @see http://php.net/manual/en/function.wordwrap.php
12159
     *
12160
     * @param string $str   <p>The input string.</p>
12161
     * @param int    $width [optional] <p>The column width.</p>
12162
     * @param string $break [optional] <p>The line is broken using the optional break parameter.</p>
12163
     * @param bool   $cut   [optional] <p>
12164
     *                      If the cut is set to true, the string is
12165
     *                      always wrapped at or before the specified width. So if you have
12166
     *                      a word that is larger than the given width, it is broken apart.
12167
     *                      </p>
12168
     *
12169
     * @return string
12170
     *                <p>The given string wrapped at the specified column.</p>
12171
     */
12172
    public static function wordwrap(
12173
        string $str,
12174
        int $width = 75,
12175
        string $break = "\n",
12176
        bool $cut = false
12177
    ): string {
12178 10
        if ($str === '' || $break === '') {
12179 3
            return '';
12180
        }
12181
12182 8
        $strSplit = \explode($break, $str);
12183 8
        if ($strSplit === false) {
12184
            return '';
12185
        }
12186
12187 8
        $chars = [];
12188 8
        $wordSplit = '';
12189 8
        foreach ($strSplit as $i => $iValue) {
12190 8
            if ($i) {
12191 1
                $chars[] = $break;
12192 1
                $wordSplit .= '#';
12193
            }
12194
12195 8
            foreach (self::str_split($iValue) as $c) {
12196 8
                $chars[] = $c;
12197 8
                $wordSplit .= $c === ' ' ? ' ' : '?';
12198
            }
12199
        }
12200
12201 8
        $strReturn = '';
12202 8
        $j = 0;
12203 8
        $b = $i = -1;
12204 8
        $wordSplit = \wordwrap($wordSplit, $width, '#', $cut);
12205
12206 8
        while (false !== $b = \mb_strpos($wordSplit, '#', $b + 1)) {
12207 6
            for (++$i; $i < $b; ++$i) {
12208 6
                $strReturn .= $chars[$j];
12209 6
                unset($chars[$j++]);
12210
            }
12211
12212
            if (
12213 6
                $break === $chars[$j]
12214
                ||
12215 6
                $chars[$j] === ' '
12216
            ) {
12217 3
                unset($chars[$j++]);
12218
            }
12219
12220 6
            $strReturn .= $break;
12221
        }
12222
12223 8
        return $strReturn . \implode('', $chars);
12224
    }
12225
12226
    /**
12227
     * Line-Wrap the string after $limit, but also after the next word.
12228
     *
12229
     * @param string $str
12230
     * @param int    $limit
12231
     *
12232
     * @return string
12233
     */
12234
    public static function wordwrap_per_line(string $str, int $limit): string
12235
    {
12236 1
        $strings = (array) \preg_split('/\\r\\n|\\r|\\n/', $str);
12237
12238 1
        $string = '';
12239 1
        foreach ($strings as &$value) {
12240 1
            if ($value === false) {
12241
                continue;
12242
            }
12243
12244 1
            $string .= \wordwrap($value, $limit);
12245 1
            $string .= "\n";
12246
        }
12247
12248 1
        return $string;
12249
    }
12250
12251
    /**
12252
     * Returns an array of Unicode White Space characters.
12253
     *
12254
     * @return string[] an array with numeric code point as key and White Space Character as value
12255
     */
12256
    public static function ws(): array
12257
    {
12258 2
        return self::$WHITESPACE;
12259
    }
12260
12261
    /**
12262
     * @param string $str
12263
     * @param bool   $useLower     <p>Use uppercase by default, otherwise use lowecase.</p>
12264
     * @param bool   $fullCaseFold <p>Convert not only common cases.</p>
12265
     *
12266
     * @return string
12267
     */
12268
    private static function fixStrCaseHelper(string $str, $useLower = false, $fullCaseFold = false): string
12269
    {
12270 33
        $upper = self::$COMMON_CASE_FOLD['upper'];
12271 33
        $lower = self::$COMMON_CASE_FOLD['lower'];
12272
12273 33
        if ($useLower === true) {
12274 2
            $str = \str_replace(
12275 2
                $upper,
12276 2
                $lower,
12277 2
                $str
12278
            );
12279
        } else {
12280 31
            $str = \str_replace(
12281 31
                $lower,
12282 31
                $upper,
12283 31
                $str
12284
            );
12285
        }
12286
12287 33
        if ($fullCaseFold) {
12288 31
            static $FULL_CASE_FOLD = null;
12289 31
            if ($FULL_CASE_FOLD === null) {
12290 1
                $FULL_CASE_FOLD = self::getData('caseFolding_full');
12291
            }
12292
12293 31
            if ($useLower === true) {
12294 2
                $str = \str_replace($FULL_CASE_FOLD[0], $FULL_CASE_FOLD[1], $str);
12295
            } else {
12296 29
                $str = \str_replace($FULL_CASE_FOLD[1], $FULL_CASE_FOLD[0], $str);
12297
            }
12298
        }
12299
12300 33
        return $str;
12301
    }
12302
12303
    /**
12304
     * get data from "/data/*.php"
12305
     *
12306
     * @param string $file
12307
     *
12308
     * @return array
12309
     */
12310
    private static function getData(string $file): array
12311
    {
12312
        /** @noinspection PhpIncludeInspection */
12313
        /** @noinspection UsingInclusionReturnValueInspection */
12314
        /** @psalm-suppress UnresolvableInclude */
12315 6
        return include __DIR__ . '/data/' . $file . '.php';
12316
    }
12317
12318
    /**
12319
     * get data from "/data/*.php"
12320
     *
12321
     * @param string $file
12322
     *
12323
     * @return false|mixed will return false on error
12324
     */
12325
    private static function getDataIfExists(string $file)
12326
    {
12327 9
        $file = __DIR__ . '/data/' . $file . '.php';
12328 9
        if (\file_exists($file)) {
12329
            /** @noinspection PhpIncludeInspection */
12330
            /** @noinspection UsingInclusionReturnValueInspection */
12331 8
            return include $file;
12332
        }
12333
12334 2
        return false;
12335
    }
12336
12337
    /**
12338
     * @return true|null
12339
     */
12340
    private static function initEmojiData()
12341
    {
12342 9
        if (self::$EMOJI_KEYS_CACHE === null) {
12343 1
            if (self::$EMOJI === null) {
12344 1
                self::$EMOJI = self::getData('emoji');
12345
            }
12346
12347 1
            \uksort(
12348 1
                self::$EMOJI,
12349
                static function (string $a, string $b): int {
12350 1
                    return \strlen($b) <=> \strlen($a);
12351 1
                }
12352
            );
12353
12354 1
            self::$EMOJI_KEYS_CACHE = \array_keys(self::$EMOJI);
12355 1
            self::$EMOJI_VALUES_CACHE = \array_values(self::$EMOJI);
12356
12357 1
            foreach (self::$EMOJI_KEYS_CACHE as $key) {
12358 1
                $tmpKey = \crc32($key);
12359 1
                self::$EMOJI_KEYS_REVERSIBLE_CACHE[] = '_-_PORTABLE_UTF8_-_' . $tmpKey . '_-_' . \strrev((string) $tmpKey) . '_-_8FTU_ELBATROP_-_';
12360
            }
12361
12362 1
            return true;
12363
        }
12364
12365 9
        return null;
12366
    }
12367
12368
    /**
12369
     * Checks whether mbstring "overloaded" is active on the server.
12370
     *
12371
     * @return bool
12372
     */
12373
    private static function mbstring_overloaded(): bool
12374
    {
12375
        /**
12376
         * INI directive 'mbstring.func_overload' is deprecated since PHP 7.2
12377
         */
12378
12379
        /** @noinspection PhpComposerExtensionStubsInspection */
12380
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
12381
        return \defined('MB_OVERLOAD_STRING')
12382
               &&
12383
               ((int) @\ini_get('mbstring.func_overload') & \MB_OVERLOAD_STRING);
12384
    }
12385
12386
    /**
12387
     * @param array $strings
12388
     * @param bool  $removeEmptyValues
12389
     * @param int   $removeShortValues
12390
     *
12391
     * @return array
12392
     */
12393
    private static function reduce_string_array(array $strings, bool $removeEmptyValues, int $removeShortValues = null): array
12394
    {
12395
        // init
12396 2
        $return = [];
12397
12398 2
        foreach ($strings as &$str) {
12399
            if (
12400 2
                $removeShortValues !== null
12401
                &&
12402 2
                \mb_strlen($str) <= $removeShortValues
12403
            ) {
12404 2
                continue;
12405
            }
12406
12407
            if (
12408 2
                $removeEmptyValues === true
12409
                &&
12410 2
                \trim($str) === ''
12411
            ) {
12412 2
                continue;
12413
            }
12414
12415 2
            $return[] = $str;
12416
        }
12417
12418 2
        return $return;
12419
    }
12420
12421
    /**
12422
     * rxClass
12423
     *
12424
     * @param string $s
12425
     * @param string $class
12426
     *
12427
     * @return string
12428
     */
12429
    private static function rxClass(string $s, string $class = ''): string
12430
    {
12431 33
        static $RX_CLASS_CACHE = [];
12432
12433 33
        $cacheKey = $s . $class;
12434
12435 33
        if (isset($RX_CLASS_CACHE[$cacheKey])) {
12436 21
            return $RX_CLASS_CACHE[$cacheKey];
12437
        }
12438
12439 16
        $classArray = [$class];
12440
12441
        /** @noinspection SuspiciousLoopInspection */
12442
        /** @noinspection AlterInForeachInspection */
12443 16
        foreach (self::str_split($s) as &$s) {
0 ignored issues
show
Bug introduced by
The expression self::str_split($s) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
12444 15
            if ($s === '-') {
12445
                $classArray[0] = '-' . $classArray[0];
12446 15
            } elseif (!isset($s[2])) {
12447 15
                $classArray[0] .= \preg_quote($s, '/');
12448 1
            } elseif (self::strlen($s) === 1) {
12449 1
                $classArray[0] .= $s;
12450
            } else {
12451 15
                $classArray[] = $s;
12452
            }
12453
        }
12454
12455 16
        if ($classArray[0]) {
12456 16
            $classArray[0] = '[' . $classArray[0] . ']';
12457
        }
12458
12459 16
        if (\count($classArray) === 1) {
12460 16
            $return = $classArray[0];
12461
        } else {
12462
            $return = '(?:' . \implode('|', $classArray) . ')';
12463
        }
12464
12465 16
        $RX_CLASS_CACHE[$cacheKey] = $return;
12466
12467 16
        return $return;
12468
    }
12469
12470
    /**
12471
     * Personal names such as "Marcus Aurelius" are sometimes typed incorrectly using lowercase ("marcus aurelius").
12472
     *
12473
     * @param string $names
12474
     * @param string $delimiter
12475
     * @param string $encoding
12476
     *
12477
     * @return string
12478
     */
12479
    private static function str_capitalize_name_helper(string $names, string $delimiter, string $encoding = 'UTF-8'): string
12480
    {
12481
        // init
12482 1
        $namesArray = \explode($delimiter, $names);
12483
12484 1
        if ($namesArray === false) {
12485
            return '';
12486
        }
12487
12488
        $specialCases = [
12489 1
            'names' => [
12490
                'ab',
12491
                'af',
12492
                'al',
12493
                'and',
12494
                'ap',
12495
                'bint',
12496
                'binte',
12497
                'da',
12498
                'de',
12499
                'del',
12500
                'den',
12501
                'der',
12502
                'di',
12503
                'dit',
12504
                'ibn',
12505
                'la',
12506
                'mac',
12507
                'nic',
12508
                'of',
12509
                'ter',
12510
                'the',
12511
                'und',
12512
                'van',
12513
                'von',
12514
                'y',
12515
                'zu',
12516
            ],
12517
            'prefixes' => [
12518
                'al-',
12519
                "d'",
12520
                'ff',
12521
                "l'",
12522
                'mac',
12523
                'mc',
12524
                'nic',
12525
            ],
12526
        ];
12527
12528 1
        foreach ($namesArray as &$name) {
12529 1
            if (\in_array($name, $specialCases['names'], true)) {
12530 1
                continue;
12531
            }
12532
12533 1
            $continue = false;
12534
12535 1
            if ($delimiter === '-') {
12536 1
                foreach ((array) $specialCases['names'] as &$beginning) {
12537 1
                    if (self::strpos($name, $beginning, 0, $encoding) === 0) {
12538 1
                        $continue = true;
12539
                    }
12540
                }
12541 1
                unset($beginning);
12542
            }
12543
12544 1
            foreach ((array) $specialCases['prefixes'] as &$beginning) {
12545 1
                if (self::strpos($name, $beginning, 0, $encoding) === 0) {
12546 1
                    $continue = true;
12547
                }
12548
            }
12549 1
            unset($beginning);
12550
12551 1
            if ($continue === true) {
12552 1
                continue;
12553
            }
12554
12555 1
            $name = self::ucfirst($name);
12556
        }
12557
12558 1
        return \implode($delimiter, $namesArray);
12559
    }
12560
12561
    /**
12562
     * Generic case sensitive transformation for collation matching.
12563
     *
12564
     * @param string $str <p>The input string</p>
12565
     *
12566
     * @return string|null
12567
     */
12568
    private static function strtonatfold(string $str)
12569
    {
12570 6
        return \preg_replace(
12571 6
            '/\p{Mn}+/u',
12572 6
            '',
12573 6
            \Normalizer::normalize($str, \Normalizer::NFD)
12574
        );
12575
    }
12576
12577
    /**
12578
     * @param int|string $input
12579
     *
12580
     * @return string
12581
     */
12582
    private static function to_utf8_convert_helper($input): string
12583
    {
12584
        // init
12585 31
        $buf = '';
12586
12587 31
        if (self::$ORD === null) {
12588 1
            self::$ORD = self::getData('ord');
12589
        }
12590
12591 31
        if (self::$CHR === null) {
12592 1
            self::$CHR = self::getData('chr');
12593
        }
12594
12595 31
        if (self::$WIN1252_TO_UTF8 === null) {
12596 1
            self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
12597
        }
12598
12599 31
        $ordC1 = self::$ORD[$input];
12600 31
        if (isset(self::$WIN1252_TO_UTF8[$ordC1])) { // found in Windows-1252 special cases
12601 31
            $buf .= self::$WIN1252_TO_UTF8[$ordC1];
12602
        } else {
12603 1
            $cc1 = self::$CHR[$ordC1 / 64] | "\xC0";
12604 1
            $cc2 = ((string) $input & "\x3F") | "\x80";
0 ignored issues
show
Bug introduced by
Are you sure you want to use the bitwise | or did you mean ||?
Loading history...
Bug introduced by
Are you sure you want to use the bitwise & or did you mean &&?
Loading history...
12605 1
            $buf .= $cc1 . $cc2;
12606
        }
12607
12608 31
        return $buf;
12609
    }
12610
12611
    /**
12612
     * @param string $str
12613
     *
12614
     * @return string
12615
     */
12616
    private static function urldecode_unicode_helper(string $str): string
12617
    {
12618 9
        $pattern = '/%u([0-9a-fA-F]{3,4})/';
12619 9
        if (\preg_match($pattern, $str)) {
12620 7
            $str = (string) \preg_replace($pattern, '&#x\\1;', $str);
12621
        }
12622
12623 9
        return $str;
12624
    }
12625
}
12626