Passed
Push — master ( 148a01...cdece5 )
by Lars
08:38
created

UTF8::substr_left()   A

Complexity

Conditions 4
Paths 4

Size

Total Lines 15
Code Lines 7

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 7
CRAP Score 4

Importance

Changes 2
Bugs 2 Features 0
Metric Value
cc 4
eloc 7
c 2
b 2
f 0
nc 4
nop 2
dl 0
loc 15
ccs 7
cts 7
cp 1
crap 4
rs 10
1
<?php
2
3
declare(strict_types=1);
4
5
namespace voku\helper;
6
7
final class UTF8
8
{
9
    // (CRLF|([ZWNJ-ZWJ]|T+|L*(LV?V+|LV|LVT)T*|L+|[^Control])[Extend]*|[Control])
10
    // This regular expression is a work around for http://bugs.exim.org/1279
11
    const GRAPHEME_CLUSTER_RX = "(?:\r\n|(?:[ -~\x{200C}\x{200D}]|[ᆨ-ᇹ]+|[ᄀ-ᅟ]*(?:[가개갸걔거게겨계고과괘괴교구궈궤귀규그긔기까깨꺄꺠꺼께껴꼐꼬꽈꽤꾀꾜꾸꿔꿰뀌뀨끄끠끼나내냐냬너네녀녜노놔놰뇌뇨누눠눼뉘뉴느늬니다대댜댸더데뎌뎨도돠돼되됴두둬뒈뒤듀드듸디따때땨떄떠떼뗘뗴또똬뙈뙤뚀뚜뚸뛔뛰뜌뜨띄띠라래랴럐러레려례로롸뢔뢰료루뤄뤠뤼류르릐리마매먀먜머메며몌모뫄뫠뫼묘무뭐뭬뮈뮤므믜미바배뱌뱨버베벼볘보봐봬뵈뵤부붜붸뷔뷰브븨비빠빼뺘뺴뻐뻬뼈뼤뽀뽜뽸뾔뾰뿌뿨쀄쀠쀼쁘쁴삐사새샤섀서세셔셰소솨쇄쇠쇼수숴쉐쉬슈스싀시싸쌔쌰썌써쎄쎠쎼쏘쏴쐐쐬쑈쑤쒀쒜쒸쓔쓰씌씨아애야얘어에여예오와왜외요우워웨위유으의이자재쟈쟤저제져졔조좌좨죄죠주줘줴쥐쥬즈즤지짜째쨔쨰쩌쩨쪄쪠쪼쫘쫴쬐쬬쭈쭤쮀쮜쮸쯔쯰찌차채챠챼처체쳐쳬초촤쵀최쵸추춰췌취츄츠츼치카캐캬컈커케켜켸코콰쾌쾨쿄쿠쿼퀘퀴큐크킈키타태탸턔터테텨톄토톼퇘퇴툐투퉈퉤튀튜트틔티파패퍄퍠퍼페펴폐포퐈퐤푀표푸풔풰퓌퓨프픠피하해햐햬허헤혀혜호화홰회효후훠훼휘휴흐희히]?[ᅠ-ᆢ]+|[가-힣])[ᆨ-ᇹ]*|[ᄀ-ᅟ]+|[^\p{Cc}\p{Cf}\p{Zl}\p{Zp}])[\p{Mn}\p{Me}\x{09BE}\x{09D7}\x{0B3E}\x{0B57}\x{0BBE}\x{0BD7}\x{0CC2}\x{0CD5}\x{0CD6}\x{0D3E}\x{0D57}\x{0DCF}\x{0DDF}\x{200C}\x{200D}\x{1D165}\x{1D16E}-\x{1D172}]*|[\p{Cc}\p{Cf}\p{Zl}\p{Zp}])";
12
13
    /**
14
     * Bom => Byte-Length
15
     *
16
     * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
17
     *
18
     * @var array
19
     */
20
    private static $BOM = [
21
        "\xef\xbb\xbf"     => 3, // UTF-8 BOM
22
        ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
23
        "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
24
        '  þÿ'             => 6, // UTF-32 (BE) BOM as "WINDOWS-1252"
25
        "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
26
        'ÿþ  '             => 6, // UTF-32 (LE) BOM as "WINDOWS-1252"
27
        "\xfe\xff"         => 2, // UTF-16 (BE) BOM
28
        'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
29
        "\xff\xfe"         => 2, // UTF-16 (LE) BOM
30
        'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
31
    ];
32
33
    /**
34
     * Numeric code point => UTF-8 Character
35
     *
36
     * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
37
     *
38
     * @var array
39
     */
40
    private static $WHITESPACE = [
41
        // NUL Byte
42
        0 => "\x0",
43
        // Tab
44
        9 => "\x9",
45
        // New Line
46
        10 => "\xa",
47
        // Vertical Tab
48
        11 => "\xb",
49
        // Carriage Return
50
        13 => "\xd",
51
        // Ordinary Space
52
        32 => "\x20",
53
        // NO-BREAK SPACE
54
        160 => "\xc2\xa0",
55
        // OGHAM SPACE MARK
56
        5760 => "\xe1\x9a\x80",
57
        // MONGOLIAN VOWEL SEPARATOR
58
        6158 => "\xe1\xa0\x8e",
59
        // EN QUAD
60
        8192 => "\xe2\x80\x80",
61
        // EM QUAD
62
        8193 => "\xe2\x80\x81",
63
        // EN SPACE
64
        8194 => "\xe2\x80\x82",
65
        // EM SPACE
66
        8195 => "\xe2\x80\x83",
67
        // THREE-PER-EM SPACE
68
        8196 => "\xe2\x80\x84",
69
        // FOUR-PER-EM SPACE
70
        8197 => "\xe2\x80\x85",
71
        // SIX-PER-EM SPACE
72
        8198 => "\xe2\x80\x86",
73
        // FIGURE SPACE
74
        8199 => "\xe2\x80\x87",
75
        // PUNCTUATION SPACE
76
        8200 => "\xe2\x80\x88",
77
        // THIN SPACE
78
        8201 => "\xe2\x80\x89",
79
        //HAIR SPACE
80
        8202 => "\xe2\x80\x8a",
81
        // LINE SEPARATOR
82
        8232 => "\xe2\x80\xa8",
83
        // PARAGRAPH SEPARATOR
84
        8233 => "\xe2\x80\xa9",
85
        // NARROW NO-BREAK SPACE
86
        8239 => "\xe2\x80\xaf",
87
        // MEDIUM MATHEMATICAL SPACE
88
        8287 => "\xe2\x81\x9f",
89
        // IDEOGRAPHIC SPACE
90
        12288 => "\xe3\x80\x80",
91
    ];
92
93
    /**
94
     * @var array
95
     */
96
    private static $WHITESPACE_TABLE = [
97
        'SPACE'                     => "\x20",
98
        'NO-BREAK SPACE'            => "\xc2\xa0",
99
        'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
100
        'EN QUAD'                   => "\xe2\x80\x80",
101
        'EM QUAD'                   => "\xe2\x80\x81",
102
        'EN SPACE'                  => "\xe2\x80\x82",
103
        'EM SPACE'                  => "\xe2\x80\x83",
104
        'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
105
        'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
106
        'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
107
        'FIGURE SPACE'              => "\xe2\x80\x87",
108
        'PUNCTUATION SPACE'         => "\xe2\x80\x88",
109
        'THIN SPACE'                => "\xe2\x80\x89",
110
        'HAIR SPACE'                => "\xe2\x80\x8a",
111
        'LINE SEPARATOR'            => "\xe2\x80\xa8",
112
        'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
113
        'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
114
        'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
115
        'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
116
        'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
117
    ];
118
119
    /**
120
     * bidirectional text chars
121
     *
122
     * url: https://www.w3.org/International/questions/qa-bidi-unicode-controls
123
     *
124
     * @var array
125
     */
126
    private static $BIDI_UNI_CODE_CONTROLS_TABLE = [
127
        // LEFT-TO-RIGHT EMBEDDING (use -> dir = "ltr")
128
        8234 => "\xE2\x80\xAA",
129
        // RIGHT-TO-LEFT EMBEDDING (use -> dir = "rtl")
130
        8235 => "\xE2\x80\xAB",
131
        // POP DIRECTIONAL FORMATTING // (use -> </bdo>)
132
        8236 => "\xE2\x80\xAC",
133
        // LEFT-TO-RIGHT OVERRIDE // (use -> <bdo dir = "ltr">)
134
        8237 => "\xE2\x80\xAD",
135
        // RIGHT-TO-LEFT OVERRIDE // (use -> <bdo dir = "rtl">)
136
        8238 => "\xE2\x80\xAE",
137
        // LEFT-TO-RIGHT ISOLATE // (use -> dir = "ltr")
138
        8294 => "\xE2\x81\xA6",
139
        // RIGHT-TO-LEFT ISOLATE // (use -> dir = "rtl")
140
        8295 => "\xE2\x81\xA7",
141
        // FIRST STRONG ISOLATE // (use -> dir = "auto")
142
        8296 => "\xE2\x81\xA8",
143
        // POP DIRECTIONAL ISOLATE
144
        8297 => "\xE2\x81\xA9",
145
    ];
146
147
    /**
148
     * @var array
149
     */
150
    private static $COMMON_CASE_FOLD = [
151
        'upper' => [
152
            'µ',
153
            'ſ',
154
            "\xCD\x85",
155
            'ς',
156
            'ẞ',
157
            "\xCF\x90",
158
            "\xCF\x91",
159
            "\xCF\x95",
160
            "\xCF\x96",
161
            "\xCF\xB0",
162
            "\xCF\xB1",
163
            "\xCF\xB5",
164
            "\xE1\xBA\x9B",
165
            "\xE1\xBE\xBE",
166
        ],
167
        'lower' => [
168
            'μ',
169
            's',
170
            'ι',
171
            'σ',
172
            'ß',
173
            'β',
174
            'θ',
175
            'φ',
176
            'π',
177
            'κ',
178
            'ρ',
179
            'ε',
180
            "\xE1\xB9\xA1",
181
            'ι',
182
        ],
183
    ];
184
185
    /**
186
     * @var array
187
     */
188
    private static $SUPPORT = [];
189
190
    /**
191
     * @var array|null
192
     */
193
    private static $BROKEN_UTF8_FIX;
194
195
    /**
196
     * @var array|null
197
     */
198
    private static $WIN1252_TO_UTF8;
199
200
    /**
201
     * @var array|null
202
     */
203
    private static $INTL_TRANSLITERATOR_LIST;
204
205
    /**
206
     * @var array|null
207
     */
208
    private static $ENCODINGS;
209
210
    /**
211
     * @var array|null
212
     */
213
    private static $ORD;
214
215
    /**
216
     * @var array|null
217
     */
218
    private static $EMOJI;
219
220
    /**
221
     * @var array|null
222
     */
223
    private static $EMOJI_VALUES_CACHE;
224
225
    /**
226
     * @var array|null
227
     */
228
    private static $EMOJI_KEYS_CACHE;
229
230
    /**
231
     * @var array|null
232
     */
233
    private static $EMOJI_KEYS_REVERSIBLE_CACHE;
234
235
    /**
236
     * @var array|null
237
     */
238
    private static $CHR;
239
240
    /**
241
     * __construct()
242
     */
243 32
    public function __construct()
244
    {
245 32
    }
246
247
    /**
248
     * Return the character at the specified position: $str[1] like functionality.
249
     *
250
     * @param string $str      <p>A UTF-8 string.</p>
251
     * @param int    $pos      <p>The position of character to return.</p>
252
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
253
     *
254
     * @return string single multi-byte character
255
     */
256 3
    public static function access(string $str, int $pos, string $encoding = 'UTF-8'): string
257
    {
258 3
        if ($str === '' || $pos < 0) {
259 2
            return '';
260
        }
261
262 3
        if ($encoding === 'UTF-8') {
263 3
            return (string) \mb_substr($str, $pos, 1);
264
        }
265
266
        return (string) self::substr($str, $pos, 1, $encoding);
267
    }
268
269
    /**
270
     * Prepends UTF-8 BOM character to the string and returns the whole string.
271
     *
272
     * INFO: If BOM already existed there, the Input string is returned.
273
     *
274
     * @param string $str <p>The input string.</p>
275
     *
276
     * @return string the output string that contains BOM
277
     */
278 2
    public static function add_bom_to_string(string $str): string
279
    {
280 2
        if (self::string_has_bom($str) === false) {
281 2
            $str = self::bom() . $str;
282
        }
283
284 2
        return $str;
285
    }
286
287
    /**
288
     * Changes all keys in an array.
289
     *
290
     * @param array  $array    <p>The array to work on</p>
291
     * @param int    $case     [optional] <p> Either <strong>CASE_UPPER</strong><br>
292
     *                         or <strong>CASE_LOWER</strong> (default)</p>
293
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
294
     *
295
     * @return string[] an array with its keys lower or uppercased
296
     */
297 2
    public static function array_change_key_case(array $array, int $case = \CASE_LOWER, string $encoding = 'UTF-8'): array
298
    {
299
        if (
300 2
            $case !== \CASE_LOWER
301
            &&
302 2
            $case !== \CASE_UPPER
303
        ) {
304
            $case = \CASE_LOWER;
305
        }
306
307 2
        $return = [];
308 2
        foreach ($array as $key => &$value) {
309 2
            $key = $case === \CASE_LOWER
310 2
                ? self::strtolower((string) $key, $encoding)
311 2
                : self::strtoupper((string) $key, $encoding);
312
313 2
            $return[$key] = $value;
314
        }
315
316 2
        return $return;
317
    }
318
319
    /**
320
     * Returns the substring between $start and $end, if found, or an empty
321
     * string. An optional offset may be supplied from which to begin the
322
     * search for the start string.
323
     *
324
     * @param string $str
325
     * @param string $start    <p>Delimiter marking the start of the substring.</p>
326
     * @param string $end      <p>Delimiter marking the end of the substring.</p>
327
     * @param int    $offset   [optional] <p>Index from which to begin the search. Default: 0</p>
328
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
329
     *
330
     * @return string
331
     */
332 16
    public static function between(
333
        string $str,
334
        string $start,
335
        string $end,
336
        int $offset = 0,
337
        string $encoding = 'UTF-8'
338
    ): string {
339 16
        if ($encoding === 'UTF-8') {
340 8
            $posStart = \mb_strpos($str, $start, $offset);
341 8
            if ($posStart === false) {
342 1
                return '';
343
            }
344
345 7
            $substrIndex = $posStart + (int) \mb_strlen($start);
346 7
            $posEnd = \mb_strpos($str, $end, $substrIndex);
347
            if (
348 7
                $posEnd === false
349
                ||
350 7
                $posEnd === $substrIndex
351
            ) {
352 2
                return '';
353
            }
354
355 5
            return (string) \mb_substr($str, $substrIndex, $posEnd - $substrIndex);
356
        }
357
358 8
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
359
360 8
        $posStart = self::strpos($str, $start, $offset, $encoding);
361 8
        if ($posStart === false) {
362 1
            return '';
363
        }
364
365 7
        $substrIndex = $posStart + (int) self::strlen($start, $encoding);
366 7
        $posEnd = self::strpos($str, $end, $substrIndex, $encoding);
367
        if (
368 7
            $posEnd === false
369
            ||
370 7
            $posEnd === $substrIndex
371
        ) {
372 2
            return '';
373
        }
374
375 5
        return (string) self::substr(
376 5
            $str,
377 5
            $substrIndex,
378 5
            $posEnd - $substrIndex,
379 5
            $encoding
380
        );
381
    }
382
383
    /**
384
     * Convert binary into an string.
385
     *
386
     * @param mixed $bin 1|0
387
     *
388
     * @return string
389
     */
390 2
    public static function binary_to_str($bin): string
391
    {
392 2
        if (!isset($bin[0])) {
393
            return '';
394
        }
395
396 2
        $convert = \base_convert($bin, 2, 16);
397 2
        if ($convert === '0') {
398 1
            return '';
399
        }
400
401 2
        return \pack('H*', $convert);
402
    }
403
404
    /**
405
     * Returns the UTF-8 Byte Order Mark Character.
406
     *
407
     * INFO: take a look at UTF8::$bom for e.g. UTF-16 and UTF-32 BOM values
408
     *
409
     * @return string UTF-8 Byte Order Mark
410
     */
411 4
    public static function bom(): string
412
    {
413 4
        return "\xef\xbb\xbf";
414
    }
415
416
    /**
417
     * @alias of UTF8::chr_map()
418
     *
419
     * @param array|string $callback
420
     * @param string       $str
421
     *
422
     * @return string[]
423
     *
424
     * @see UTF8::chr_map()
425
     */
426 2
    public static function callback($callback, string $str): array
427
    {
428 2
        return self::chr_map($callback, $str);
429
    }
430
431
    /**
432
     * Returns the character at $index, with indexes starting at 0.
433
     *
434
     * @param string $str      <p>The input string.</p>
435
     * @param int    $index    <p>Position of the character.</p>
436
     * @param string $encoding [optional] <p>Default is UTF-8</p>
437
     *
438
     * @return string the character at $index
439
     */
440 9
    public static function char_at(string $str, int $index, string $encoding = 'UTF-8'): string
441
    {
442 9
        if ($encoding === 'UTF-8') {
443 5
            return (string) \mb_substr($str, $index, 1);
444
        }
445
446 4
        return (string) self::substr($str, $index, 1, $encoding);
447
    }
448
449
    /**
450
     * Returns an array consisting of the characters in the string.
451
     *
452
     * @param string $str <p>The input string.</p>
453
     *
454
     * @return string[] an array of chars
455
     */
456 3
    public static function chars(string $str): array
457
    {
458 3
        return self::str_split($str);
459
    }
460
461
    /**
462
     * This method will auto-detect your server environment for UTF-8 support.
463
     *
464
     * @return true|null
465
     *
466
     * @internal <p>You don't need to run it manually, it will be triggered if it's needed.</p>
467
     */
468 5
    public static function checkForSupport()
469
    {
470 5
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
471
            self::$SUPPORT['already_checked_via_portable_utf8'] = true;
472
473
            // http://php.net/manual/en/book.mbstring.php
474
            self::$SUPPORT['mbstring'] = self::mbstring_loaded();
475
            self::$SUPPORT['mbstring_func_overload'] = self::mbstring_overloaded();
476
            if (self::$SUPPORT['mbstring'] === true) {
477
                \mb_internal_encoding('UTF-8');
478
                /** @noinspection UnusedFunctionResultInspection */
479
                /** @noinspection PhpComposerExtensionStubsInspection */
480
                \mb_regex_encoding('UTF-8');
481
                self::$SUPPORT['mbstring_internal_encoding'] = 'UTF-8';
482
            }
483
484
            // http://php.net/manual/en/book.iconv.php
485
            self::$SUPPORT['iconv'] = self::iconv_loaded();
486
487
            // http://php.net/manual/en/book.intl.php
488
            self::$SUPPORT['intl'] = self::intl_loaded();
489
490
            // http://php.net/manual/en/class.intlchar.php
491
            self::$SUPPORT['intlChar'] = self::intlChar_loaded();
492
493
            // http://php.net/manual/en/book.ctype.php
494
            self::$SUPPORT['ctype'] = self::ctype_loaded();
495
496
            // http://php.net/manual/en/class.finfo.php
497
            self::$SUPPORT['finfo'] = self::finfo_loaded();
498
499
            // http://php.net/manual/en/book.json.php
500
            self::$SUPPORT['json'] = self::json_loaded();
501
502
            // http://php.net/manual/en/book.pcre.php
503
            self::$SUPPORT['pcre_utf8'] = self::pcre_utf8_support();
504
505
            self::$SUPPORT['symfony_polyfill_used'] = self::symfony_polyfill_used();
506
            if (self::$SUPPORT['symfony_polyfill_used'] === true) {
507
                \mb_internal_encoding('UTF-8');
508
                self::$SUPPORT['mbstring_internal_encoding'] = 'UTF-8';
509
            }
510
511
            return true;
512
        }
513
514 5
        return null;
515
    }
516
517
    /**
518
     * Generates a UTF-8 encoded character from the given code point.
519
     *
520
     * INFO: opposite to UTF8::ord()
521
     *
522
     * @param int|string $code_point <p>The code point for which to generate a character.</p>
523
     * @param string     $encoding   [optional] <p>Default is UTF-8</p>
524
     *
525
     * @return string|null multi-byte character, returns null on failure or empty input
526
     */
527 25
    public static function chr($code_point, string $encoding = 'UTF-8')
528
    {
529
        // init
530 25
        static $CHAR_CACHE = [];
531
532 25
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
533 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
534
        }
535
536
        if (
537 25
            $encoding !== 'UTF-8'
538
            &&
539 25
            $encoding !== 'ISO-8859-1'
540
            &&
541 25
            $encoding !== 'WINDOWS-1252'
542
            &&
543 25
            self::$SUPPORT['mbstring'] === false
544
        ) {
545
            \trigger_error('UTF8::chr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
546
        }
547
548 25
        $cacheKey = $code_point . $encoding;
549 25
        if (isset($CHAR_CACHE[$cacheKey]) === true) {
550 23
            return $CHAR_CACHE[$cacheKey];
551
        }
552
553 14
        if ($code_point <= 127) { // use "simple"-char only until "\x80"
554
555 13
            if (self::$CHR === null) {
556
                self::$CHR = self::getData('chr');
557
            }
558
559
            /**
560
             * @psalm-suppress PossiblyNullArrayAccess
561
             */
562 13
            $chr = self::$CHR[$code_point];
563
564 13
            if ($encoding !== 'UTF-8') {
565 1
                $chr = self::encode($encoding, $chr);
566
            }
567
568 13
            return $CHAR_CACHE[$cacheKey] = $chr;
569
        }
570
571
        //
572
        // fallback via "IntlChar"
573
        //
574
575 7
        if (self::$SUPPORT['intlChar'] === true) {
576
            /** @noinspection PhpComposerExtensionStubsInspection */
577 7
            $chr = \IntlChar::chr($code_point);
578
579 7
            if ($encoding !== 'UTF-8') {
580
                $chr = self::encode($encoding, $chr);
581
            }
582
583 7
            return $CHAR_CACHE[$cacheKey] = $chr;
584
        }
585
586
        //
587
        // fallback via vanilla php
588
        //
589
590
        if (self::$CHR === null) {
591
            self::$CHR = self::getData('chr');
592
        }
593
594
        $code_point = (int) $code_point;
595
        if ($code_point <= 0x7F) {
596
            /**
597
             * @psalm-suppress PossiblyNullArrayAccess
598
             */
599
            $chr = self::$CHR[$code_point];
600
        } elseif ($code_point <= 0x7FF) {
601
            /**
602
             * @psalm-suppress PossiblyNullArrayAccess
603
             */
604
            $chr = self::$CHR[($code_point >> 6) + 0xC0] .
605
                   self::$CHR[($code_point & 0x3F) + 0x80];
606
        } elseif ($code_point <= 0xFFFF) {
607
            /**
608
             * @psalm-suppress PossiblyNullArrayAccess
609
             */
610
            $chr = self::$CHR[($code_point >> 12) + 0xE0] .
611
                   self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
612
                   self::$CHR[($code_point & 0x3F) + 0x80];
613
        } else {
614
            /**
615
             * @psalm-suppress PossiblyNullArrayAccess
616
             */
617
            $chr = self::$CHR[($code_point >> 18) + 0xF0] .
618
                   self::$CHR[(($code_point >> 12) & 0x3F) + 0x80] .
619
                   self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
620
                   self::$CHR[($code_point & 0x3F) + 0x80];
621
        }
622
623
        if ($encoding !== 'UTF-8') {
624
            $chr = self::encode($encoding, $chr);
625
        }
626
627
        return $CHAR_CACHE[$cacheKey] = $chr;
628
    }
629
630
    /**
631
     * Applies callback to all characters of a string.
632
     *
633
     * @param array|string $callback <p>The callback function.</p>
634
     * @param string       $str      <p>UTF-8 string to run callback on.</p>
635
     *
636
     * @return string[] the outcome of callback
637
     */
638 2
    public static function chr_map($callback, string $str): array
639
    {
640 2
        return \array_map(
641 2
            $callback,
642 2
            self::str_split($str)
643
        );
644
    }
645
646
    /**
647
     * Generates an array of byte length of each character of a Unicode string.
648
     *
649
     * 1 byte => U+0000  - U+007F
650
     * 2 byte => U+0080  - U+07FF
651
     * 3 byte => U+0800  - U+FFFF
652
     * 4 byte => U+10000 - U+10FFFF
653
     *
654
     * @param string $str <p>The original unicode string.</p>
655
     *
656
     * @return int[] an array of byte lengths of each character
657
     */
658 4
    public static function chr_size_list(string $str): array
659
    {
660 4
        if ($str === '') {
661 4
            return [];
662
        }
663
664 4
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
665
            return \array_map(
666
                static function (string $data): int {
667
                    // "mb_" is available if overload is used, so use it ...
668
                    return \mb_strlen($data, 'CP850'); // 8-BIT
669
                },
670
                self::str_split($str)
671
            );
672
        }
673
674 4
        return \array_map('\strlen', self::str_split($str));
675
    }
676
677
    /**
678
     * Get a decimal code representation of a specific character.
679
     *
680
     * @param string $char <p>The input character.</p>
681
     *
682
     * @return int
683
     */
684 4
    public static function chr_to_decimal(string $char): int
685
    {
686 4
        $code = self::ord($char[0]);
687 4
        $bytes = 1;
688
689 4
        if (!($code & 0x80)) {
690
            // 0xxxxxxx
691 4
            return $code;
692
        }
693
694 4
        if (($code & 0xe0) === 0xc0) {
695
            // 110xxxxx
696 4
            $bytes = 2;
697 4
            $code &= ~0xc0;
698 4
        } elseif (($code & 0xf0) === 0xe0) {
699
            // 1110xxxx
700 4
            $bytes = 3;
701 4
            $code &= ~0xe0;
702 2
        } elseif (($code & 0xf8) === 0xf0) {
703
            // 11110xxx
704 2
            $bytes = 4;
705 2
            $code &= ~0xf0;
706
        }
707
708 4
        for ($i = 2; $i <= $bytes; ++$i) {
709
            // 10xxxxxx
710 4
            $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
711
        }
712
713 4
        return $code;
714
    }
715
716
    /**
717
     * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
718
     *
719
     * @param int|string $char <p>The input character</p>
720
     * @param string     $pfix [optional]
721
     *
722
     * @return string The code point encoded as U+xxxx
723
     */
724 2
    public static function chr_to_hex($char, string $pfix = 'U+'): string
725
    {
726 2
        if ($char === '') {
727 2
            return '';
728
        }
729
730 2
        if ($char === '&#0;') {
731 2
            $char = '';
732
        }
733
734 2
        return self::int_to_hex(self::ord((string) $char), $pfix);
735
    }
736
737
    /**
738
     * alias for "UTF8::chr_to_decimal()"
739
     *
740
     * @param string $chr
741
     *
742
     * @return int
743
     *
744
     * @see UTF8::chr_to_decimal()
745
     */
746 2
    public static function chr_to_int(string $chr): int
747
    {
748 2
        return self::chr_to_decimal($chr);
749
    }
750
751
    /**
752
     * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
753
     *
754
     * @param string $body     <p>The original string to be split.</p>
755
     * @param int    $chunklen [optional] <p>The maximum character length of a chunk.</p>
756
     * @param string $end      [optional] <p>The character(s) to be inserted at the end of each chunk.</p>
757
     *
758
     * @return string the chunked string
759
     */
760 4
    public static function chunk_split(string $body, int $chunklen = 76, string $end = "\r\n"): string
761
    {
762 4
        return \implode($end, self::str_split($body, $chunklen));
763
    }
764
765
    /**
766
     * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
767
     *
768
     * @param string $str                           <p>The string to be sanitized.</p>
769
     * @param bool   $remove_bom                    [optional] <p>Set to true, if you need to remove UTF-BOM.</p>
770
     * @param bool   $normalize_whitespace          [optional] <p>Set to true, if you need to normalize the
771
     *                                              whitespace.</p>
772
     * @param bool   $normalize_msword              [optional] <p>Set to true, if you need to normalize MS Word chars
773
     *                                              e.g.: "…"
774
     *                                              => "..."</p>
775
     * @param bool   $keep_non_breaking_space       [optional] <p>Set to true, to keep non-breaking-spaces, in
776
     *                                              combination with
777
     *                                              $normalize_whitespace</p>
778
     * @param bool   $replace_diamond_question_mark [optional] <p>Set to true, if you need to remove diamond question
779
     *                                              mark e.g.: "�"</p>
780
     * @param bool   $remove_invisible_characters   [optional] <p>Set to false, if you not want to remove invisible
781
     *                                              characters e.g.: "\0"</p>
782
     *
783
     * @return string clean UTF-8 encoded string
784
     */
785 114
    public static function clean(
786
        string $str,
787
        bool $remove_bom = false,
788
        bool $normalize_whitespace = false,
789
        bool $normalize_msword = false,
790
        bool $keep_non_breaking_space = false,
791
        bool $replace_diamond_question_mark = false,
792
        bool $remove_invisible_characters = true
793
    ): string {
794
        // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
795
        // caused connection reset problem on larger strings
796
797 114
        $regex = '/
798
          (
799
            (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
800
            |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
801
            |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
802
            |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
803
            ){1,100}                      # ...one or more times
804
          )
805
        | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
806
        | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
807
        /x';
808
        /** @noinspection NotOptimalRegularExpressionsInspection */
809 114
        $str = (string) \preg_replace($regex, '$1', $str);
810
811 114
        if ($replace_diamond_question_mark === true) {
812 60
            $str = self::replace_diamond_question_mark($str, '');
813
        }
814
815 114
        if ($remove_invisible_characters === true) {
816 114
            $str = self::remove_invisible_characters($str);
817
        }
818
819 114
        if ($normalize_whitespace === true) {
820 64
            $str = self::normalize_whitespace($str, $keep_non_breaking_space);
821
        }
822
823 114
        if ($normalize_msword === true) {
824 32
            $str = self::normalize_msword($str);
825
        }
826
827 114
        if ($remove_bom === true) {
828 64
            $str = self::remove_bom($str);
829
        }
830
831 114
        return $str;
832
    }
833
834
    /**
835
     * Clean-up a and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
836
     *
837
     * @param string $str <p>The input string.</p>
838
     *
839
     * @return string
840
     */
841 33
    public static function cleanup($str): string
842
    {
843
        // init
844 33
        $str = (string) $str;
845
846 33
        if ($str === '') {
847 5
            return '';
848
        }
849
850
        // fixed ISO <-> UTF-8 Errors
851 33
        $str = self::fix_simple_utf8($str);
852
853
        // remove all none UTF-8 symbols
854
        // && remove diamond question mark (�)
855
        // && remove remove invisible characters (e.g. "\0")
856
        // && remove BOM
857
        // && normalize whitespace chars (but keep non-breaking-spaces)
858 33
        return self::clean(
859 33
            $str,
860 33
            true,
861 33
            true,
862 33
            false,
863 33
            true,
864 33
            true,
865 33
            true
866
        );
867
    }
868
869
    /**
870
     * Accepts a string or a array of strings and returns an array of Unicode code points.
871
     *
872
     * INFO: opposite to UTF8::string()
873
     *
874
     * @param string|string[] $arg     <p>A UTF-8 encoded string or an array of such strings.</p>
875
     * @param bool            $u_style <p>If True, will return code points in U+xxxx format,
876
     *                                 default, code points will be returned as integers.</p>
877
     *
878
     * @return array<int|string>
879
     *                           The array of code points:<br>
880
     *                           array<int> for $u_style === false<br>
881
     *                           array<string> for $u_style === true<br>
882
     */
883 12
    public static function codepoints($arg, bool $u_style = false): array
884
    {
885 12
        if (\is_string($arg) === true) {
886 12
            $arg = self::str_split($arg);
887
        }
888
889 12
        $arg = \array_map(
890
            [
891 12
                self::class,
892
                'ord',
893
            ],
894 12
            $arg
895
        );
896
897 12
        if (\count($arg) === 0) {
898 7
            return [];
899
        }
900
901 11
        if ($u_style === true) {
902 2
            $arg = \array_map(
903
                [
904 2
                    self::class,
905
                    'int_to_hex',
906
                ],
907 2
                $arg
908
            );
909
        }
910
911 11
        return $arg;
912
    }
913
914
    /**
915
     * Trims the string and replaces consecutive whitespace characters with a
916
     * single space. This includes tabs and newline characters, as well as
917
     * multibyte whitespace such as the thin space and ideographic space.
918
     *
919
     * @param string $str <p>The input string.</p>
920
     *
921
     * @return string string with a trimmed $str and condensed whitespace
922
     */
923 13
    public static function collapse_whitespace(string $str): string
924
    {
925 13
        if (self::$SUPPORT['mbstring'] === true) {
926
            /** @noinspection PhpComposerExtensionStubsInspection */
927 13
            return \trim((string) \mb_ereg_replace('[[:space:]]+', ' ', $str));
928
        }
929
930
        return \trim(self::regex_replace($str, '[[:space:]]+', ' '));
931
    }
932
933
    /**
934
     * Returns count of characters used in a string.
935
     *
936
     * @param string $str                <p>The input string.</p>
937
     * @param bool   $cleanUtf8          [optional] <p>Remove non UTF-8 chars from the string.</p>
938
     * @param bool   $tryToUseMbFunction [optional] <p>Set to false, if you don't want to use
939
     *
940
     * @return int[] an associative array of Character as keys and
941
     *               their count as values
942
     */
943 19
    public static function count_chars(
944
        string $str,
945
        bool $cleanUtf8 = false,
946
        bool $tryToUseMbFunction = true
947
    ): array {
948 19
        return \array_count_values(
949 19
            self::str_split(
950 19
                $str,
951 19
                1,
952 19
                $cleanUtf8,
953 19
                $tryToUseMbFunction
954
            )
955
        );
956
    }
957
958
    /**
959
     * Remove css media-queries.
960
     *
961
     * @param string $str
962
     *
963
     * @return string
964
     */
965 1
    public static function css_stripe_media_queries(string $str): string
966
    {
967 1
        return (string) \preg_replace(
968 1
            '#@media\\s+(?:only\\s)?(?:[\\s{\\(]|screen|all)\\s?[^{]+{.*}\\s*}\\s*#isumU',
969 1
            '',
970 1
            $str
971
        );
972
    }
973
974
    /**
975
     * Checks whether ctype is available on the server.
976
     *
977
     * @return bool
978
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
979
     */
980
    public static function ctype_loaded(): bool
981
    {
982
        return \extension_loaded('ctype');
983
    }
984
985
    /**
986
     * Converts a int-value into an UTF-8 character.
987
     *
988
     * @param mixed $int
989
     *
990
     * @return string
991
     */
992 19
    public static function decimal_to_chr($int): string
993
    {
994 19
        return self::html_entity_decode('&#' . $int . ';', \ENT_QUOTES | \ENT_HTML5);
995
    }
996
997
    /**
998
     * Decodes a MIME header field
999
     *
1000
     * @param string $str
1001
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
1002
     *
1003
     * @return false|string
1004
     *                      A decoded MIME field on success,
1005
     *                      or false if an error occurs during the decoding
1006
     */
1007
    public static function decode_mimeheader($str, string $encoding = 'UTF-8')
1008
    {
1009
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
1010
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
1011
        }
1012
1013
        if (self::$SUPPORT['iconv'] === true) {
1014
            return \iconv_mime_decode($str, \ICONV_MIME_DECODE_CONTINUE_ON_ERROR, $encoding);
1015
        }
1016
1017
        if ($encoding !== 'UTF-8') {
1018
            $str = self::encode($encoding, $str);
1019
        }
1020
1021
        return \mb_decode_mimeheader($str);
1022
    }
1023
1024
    /**
1025
     * Decodes a string which was encoded by "UTF8::emoji_encode()".
1026
     *
1027
     * @param string $str                        <p>The input string.</p>
1028
     * @param bool   $useReversibleStringMapping [optional] <p>
1029
     *                                           When <b>TRUE</b>, we se a reversible string mapping
1030
     *                                           between "emoji_encode" and "emoji_decode".</p>
1031
     *
1032
     * @return string
1033
     */
1034 9
    public static function emoji_decode(string $str, bool $useReversibleStringMapping = false): string
1035
    {
1036 9
        self::initEmojiData();
1037
1038 9
        if ($useReversibleStringMapping === true) {
1039 9
            return (string) \str_replace(
1040 9
                (array) self::$EMOJI_KEYS_REVERSIBLE_CACHE,
1041 9
                (array) self::$EMOJI_VALUES_CACHE,
1042 9
                $str
1043
            );
1044
        }
1045
1046 1
        return (string) \str_replace(
1047 1
            (array) self::$EMOJI_KEYS_CACHE,
1048 1
            (array) self::$EMOJI_VALUES_CACHE,
1049 1
            $str
1050
        );
1051
    }
1052
1053
    /**
1054
     * Encode a string with emoji chars into a non-emoji string.
1055
     *
1056
     * @param string $str                        <p>The input string</p>
1057
     * @param bool   $useReversibleStringMapping [optional] <p>
1058
     *                                           when <b>TRUE</b>, we se a reversible string mapping
1059
     *                                           between "emoji_encode" and "emoji_decode"</p>
1060
     *
1061
     * @return string
1062
     */
1063 12
    public static function emoji_encode(string $str, bool $useReversibleStringMapping = false): string
1064
    {
1065 12
        self::initEmojiData();
1066
1067 12
        if ($useReversibleStringMapping === true) {
1068 9
            return (string) \str_replace(
1069 9
                (array) self::$EMOJI_VALUES_CACHE,
1070 9
                (array) self::$EMOJI_KEYS_REVERSIBLE_CACHE,
1071 9
                $str
1072
            );
1073
        }
1074
1075 4
        return (string) \str_replace(
1076 4
            (array) self::$EMOJI_VALUES_CACHE,
1077 4
            (array) self::$EMOJI_KEYS_CACHE,
1078 4
            $str
1079
        );
1080
    }
1081
1082
    /**
1083
     * Encode a string with a new charset-encoding.
1084
     *
1085
     * INFO:  The different to "UTF8::utf8_encode()" is that this function, try to fix also broken / double encoding,
1086
     *        so you can call this function also on a UTF-8 String and you don't mess the string.
1087
     *
1088
     * @param string $toEncoding             <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.</p>
1089
     * @param string $str                    <p>The input string</p>
1090
     * @param bool   $autodetectFromEncoding [optional] <p>Force the new encoding (we try to fix broken / double
1091
     *                                       encoding for UTF-8)<br> otherwise we auto-detect the current
1092
     *                                       string-encoding</p>
1093
     * @param string $fromEncoding           [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
1094
     *                                       A empty string will trigger the autodetect anyway.</p>
1095
     *
1096
     * @return string
1097
     *
1098
     * @psalm-suppress InvalidReturnStatement
1099
     */
1100 28
    public static function encode(
1101
        string $toEncoding,
1102
        string $str,
1103
        bool $autodetectFromEncoding = true,
1104
        string $fromEncoding = ''
1105
    ): string {
1106 28
        if ($str === '' || $toEncoding === '') {
1107 13
            return $str;
1108
        }
1109
1110 28
        if ($toEncoding !== 'UTF-8' && $toEncoding !== 'CP850') {
1111 7
            $toEncoding = self::normalize_encoding($toEncoding, 'UTF-8');
1112
        }
1113
1114 28
        if ($fromEncoding && $fromEncoding !== 'UTF-8' && $fromEncoding !== 'CP850') {
1115 2
            $fromEncoding = self::normalize_encoding($fromEncoding, null);
1116
        }
1117
1118
        if (
1119 28
            $toEncoding
1120
            &&
1121 28
            $fromEncoding
1122
            &&
1123 28
            $fromEncoding === $toEncoding
1124
        ) {
1125
            return $str;
1126
        }
1127
1128 28
        if ($toEncoding === 'JSON') {
1129 1
            $return = self::json_encode($str);
1130 1
            if ($return === false) {
1131
                throw new \InvalidArgumentException('The input string [' . $str . '] can not be used for json_encode().');
1132
            }
1133
1134 1
            return $return;
1135
        }
1136 28
        if ($fromEncoding === 'JSON') {
1137 1
            $str = self::json_decode($str);
1138 1
            $fromEncoding = '';
1139
        }
1140
1141 28
        if ($toEncoding === 'BASE64') {
1142 2
            return \base64_encode($str);
1143
        }
1144 28
        if ($fromEncoding === 'BASE64') {
1145 2
            $str = \base64_decode($str, true);
1146 2
            $fromEncoding = '';
1147
        }
1148
1149 28
        if ($toEncoding === 'HTML-ENTITIES') {
1150 2
            return self::html_encode($str, true, 'UTF-8');
1151
        }
1152 28
        if ($fromEncoding === 'HTML-ENTITIES') {
1153 2
            $str = self::html_decode($str, \ENT_COMPAT, 'UTF-8');
1154 2
            $fromEncoding = '';
1155
        }
1156
1157 28
        $fromEncodingDetected = false;
1158
        if (
1159 28
            $autodetectFromEncoding === true
1160
            ||
1161 28
            !$fromEncoding
1162
        ) {
1163 28
            $fromEncodingDetected = self::str_detect_encoding($str);
1164
        }
1165
1166
        // DEBUG
1167
        //var_dump($toEncoding, $fromEncoding, $fromEncodingDetected, $str, "\n\n");
1168
1169 28
        if ($fromEncodingDetected !== false) {
1170 24
            $fromEncoding = $fromEncodingDetected;
1171 7
        } elseif ($autodetectFromEncoding === true) {
1172
            // fallback for the "autodetect"-mode
1173 7
            return self::to_utf8($str);
1174
        }
1175
1176
        if (
1177 24
            !$fromEncoding
1178
            ||
1179 24
            $fromEncoding === $toEncoding
1180
        ) {
1181 15
            return $str;
1182
        }
1183
1184
        if (
1185 19
            $toEncoding === 'UTF-8'
1186
            &&
1187
            (
1188 17
                $fromEncoding === 'WINDOWS-1252'
1189
                ||
1190 19
                $fromEncoding === 'ISO-8859-1'
1191
            )
1192
        ) {
1193 13
            return self::to_utf8($str);
1194
        }
1195
1196
        if (
1197 12
            $toEncoding === 'ISO-8859-1'
1198
            &&
1199
            (
1200 6
                $fromEncoding === 'WINDOWS-1252'
1201
                ||
1202 12
                $fromEncoding === 'UTF-8'
1203
            )
1204
        ) {
1205 6
            return self::to_iso8859($str);
1206
        }
1207
1208
        if (
1209 10
            $toEncoding !== 'UTF-8'
1210
            &&
1211 10
            $toEncoding !== 'ISO-8859-1'
1212
            &&
1213 10
            $toEncoding !== 'WINDOWS-1252'
1214
            &&
1215 10
            self::$SUPPORT['mbstring'] === false
1216
        ) {
1217
            \trigger_error('UTF8::encode() without mbstring cannot handle "' . $toEncoding . '" encoding', \E_USER_WARNING);
1218
        }
1219
1220 10
        if (self::$SUPPORT['mbstring'] === true) {
1221
            // warning: do not use the symfony polyfill here
1222 10
            $strEncoded = \mb_convert_encoding(
1223 10
                $str,
1224 10
                $toEncoding,
1225 10
                $fromEncoding
1226
            );
1227
1228 10
            if ($strEncoded) {
1229 10
                return $strEncoded;
1230
            }
1231
        }
1232
1233
        $return = \iconv($fromEncoding, $toEncoding, $str);
1234
        if ($return !== false) {
1235
            return $return;
1236
        }
1237
1238
        return $str;
1239
    }
1240
1241
    /**
1242
     * @param string $str
1243
     * @param string $fromCharset      [optional] <p>Set the input charset.</p>
1244
     * @param string $toCharset        [optional] <p>Set the output charset.</p>
1245
     * @param string $transferEncoding [optional] <p>Set the transfer encoding.</p>
1246
     * @param string $linefeed         [optional] <p>Set the used linefeed.</p>
1247
     * @param int    $indent           [optional] <p>Set the max length indent.</p>
1248
     *
1249
     * @return false|string
1250
     *                      An encoded MIME field on success,
1251
     *                      or false if an error occurs during the encoding
1252
     */
1253
    public static function encode_mimeheader(
1254
        $str,
1255
        $fromCharset = 'UTF-8',
1256
        $toCharset = 'UTF-8',
1257
        $transferEncoding = 'Q',
1258
        $linefeed = '\\r\\n',
1259
        $indent = 76
1260
    ) {
1261
        if ($fromCharset !== 'UTF-8' && $fromCharset !== 'CP850') {
1262
            $fromCharset = self::normalize_encoding($fromCharset, 'UTF-8');
1263
        }
1264
1265
        if ($toCharset !== 'UTF-8' && $toCharset !== 'CP850') {
1266
            $toCharset = self::normalize_encoding($toCharset, 'UTF-8');
1267
        }
1268
1269
        return \iconv_mime_encode(
1270
            '',
1271
            $str,
1272
            [
1273
                'scheme'           => $transferEncoding,
1274
                'line-length'      => $indent,
1275
                'input-charset'    => $fromCharset,
1276
                'output-charset'   => $toCharset,
1277
                'line-break-chars' => $linefeed,
1278
            ]
1279
        );
1280
    }
1281
1282
    /**
1283
     * Create an extract from a sentence, so if the search-string was found, it try to centered in the output.
1284
     *
1285
     * @param string   $str                    <p>The input string.</p>
1286
     * @param string   $search                 <p>The searched string.</p>
1287
     * @param int|null $length                 [optional] <p>Default: null === text->length / 2</p>
1288
     * @param string   $replacerForSkippedText [optional] <p>Default: …</p>
1289
     * @param string   $encoding               [optional] <p>Set the charset for e.g. "mb_" function</p>
1290
     *
1291
     * @return string
1292
     */
1293 1
    public static function extract_text(
1294
        string $str,
1295
        string $search = '',
1296
        int $length = null,
1297
        string $replacerForSkippedText = '…',
1298
        string $encoding = 'UTF-8'
1299
    ): string {
1300 1
        if ($str === '') {
1301 1
            return '';
1302
        }
1303
1304 1
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
1305
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
1306
        }
1307
1308 1
        $trimChars = "\t\r\n -_()!~?=+/*\\,.:;\"'[]{}`&";
1309
1310 1
        if ($length === null) {
1311 1
            $length = (int) \round((int) self::strlen($str, $encoding) / 2, 0);
1312
        }
1313
1314 1
        if ($search === '') {
1315 1
            if ($encoding === 'UTF-8') {
1316 1
                if ($length > 0) {
1317 1
                    $stringLength = (int) \mb_strlen($str);
1318 1
                    $end = ($length - 1) > $stringLength ? $stringLength : ($length - 1);
1319
                } else {
1320 1
                    $end = 0;
1321
                }
1322
1323 1
                $pos = (int) \min(
1324 1
                    \mb_strpos($str, ' ', $end),
1325 1
                    \mb_strpos($str, '.', $end)
1326
                );
1327
            } else {
1328
                if ($length > 0) {
1329
                    $stringLength = (int) self::strlen($str, $encoding);
1330
                    $end = ($length - 1) > $stringLength ? $stringLength : ($length - 1);
1331
                } else {
1332
                    $end = 0;
1333
                }
1334
1335
                $pos = (int) \min(
1336
                    self::strpos($str, ' ', $end, $encoding),
1337
                    self::strpos($str, '.', $end, $encoding)
1338
                );
1339
            }
1340
1341 1
            if ($pos) {
1342 1
                if ($encoding === 'UTF-8') {
1343 1
                    $strSub = \mb_substr($str, 0, $pos);
1344
                } else {
1345
                    $strSub = self::substr($str, 0, $pos, $encoding);
1346
                }
1347
1348 1
                if ($strSub === false) {
1349
                    return '';
1350
                }
1351
1352 1
                return \rtrim($strSub, $trimChars) . $replacerForSkippedText;
1353
            }
1354
1355
            return $str;
1356
        }
1357
1358 1
        if ($encoding === 'UTF-8') {
1359 1
            $wordPos = (int) \mb_stripos($str, $search);
1360 1
            $halfSide = (int) ($wordPos - $length / 2 + (int) \mb_strlen($search) / 2);
1361
        } else {
1362
            $wordPos = (int) self::stripos($str, $search, 0, $encoding);
1363
            $halfSide = (int) ($wordPos - $length / 2 + (int) self::strlen($search, $encoding) / 2);
1364
        }
1365
1366 1
        $pos_start = 0;
1367 1
        if ($halfSide > 0) {
1368 1
            if ($encoding === 'UTF-8') {
1369 1
                $halfText = \mb_substr($str, 0, $halfSide);
1370
            } else {
1371
                $halfText = self::substr($str, 0, $halfSide, $encoding);
1372
            }
1373 1
            if ($halfText !== false) {
1374 1
                if ($encoding === 'UTF-8') {
1375 1
                    $pos_start = (int) \max(
1376 1
                        \mb_strrpos($halfText, ' '),
1377 1
                        \mb_strrpos($halfText, '.')
1378
                    );
1379
                } else {
1380
                    $pos_start = (int) \max(
1381
                        self::strrpos($halfText, ' ', 0, $encoding),
1382
                        self::strrpos($halfText, '.', 0, $encoding)
1383
                    );
1384
                }
1385
            }
1386
        }
1387
1388 1
        if ($wordPos && $halfSide > 0) {
1389 1
            $offset = $pos_start + $length - 1;
1390 1
            $realLength = (int) self::strlen($str, $encoding);
1391
1392 1
            if ($offset > $realLength) {
1393
                $offset = $realLength;
1394
            }
1395
1396 1
            if ($encoding === 'UTF-8') {
1397 1
                $pos_end = (int) \min(
1398 1
                    \mb_strpos($str, ' ', $offset),
1399 1
                    \mb_strpos($str, '.', $offset)
1400 1
                    ) - $pos_start;
1401
            } else {
1402
                $pos_end = (int) \min(
1403
                    self::strpos($str, ' ', $offset, $encoding),
1404
                    self::strpos($str, '.', $offset, $encoding)
1405
                    ) - $pos_start;
1406
            }
1407
1408 1
            if (!$pos_end || $pos_end <= 0) {
1409 1
                if ($encoding === 'UTF-8') {
1410 1
                    $strSub = \mb_substr($str, $pos_start, (int) \mb_strlen($str));
1411
                } else {
1412
                    $strSub = self::substr($str, $pos_start, (int) self::strlen($str, $encoding), $encoding);
1413
                }
1414 1
                if ($strSub !== false) {
1415 1
                    $extract = $replacerForSkippedText . \ltrim($strSub, $trimChars);
1416
                } else {
1417 1
                    $extract = '';
1418
                }
1419
            } else {
1420 1
                if ($encoding === 'UTF-8') {
1421 1
                    $strSub = \mb_substr($str, $pos_start, $pos_end);
1422
                } else {
1423
                    $strSub = self::substr($str, $pos_start, $pos_end, $encoding);
1424
                }
1425 1
                if ($strSub !== false) {
1426 1
                    $extract = $replacerForSkippedText . \trim($strSub, $trimChars) . $replacerForSkippedText;
1427
                } else {
1428 1
                    $extract = '';
1429
                }
1430
            }
1431
        } else {
1432 1
            $offset = $length - 1;
1433 1
            $trueLength = (int) self::strlen($str, $encoding);
1434
1435 1
            if ($offset > $trueLength) {
1436
                $offset = $trueLength;
1437
            }
1438
1439 1
            if ($encoding === 'UTF-8') {
1440 1
                $pos_end = (int) \min(
1441 1
                    \mb_strpos($str, ' ', $offset),
1442 1
                    \mb_strpos($str, '.', $offset)
1443
                );
1444
            } else {
1445
                $pos_end = (int) \min(
1446
                    self::strpos($str, ' ', $offset, $encoding),
1447
                    self::strpos($str, '.', $offset, $encoding)
1448
                );
1449
            }
1450
1451 1
            if ($pos_end) {
1452 1
                if ($encoding === 'UTF-8') {
1453 1
                    $strSub = \mb_substr($str, 0, $pos_end);
1454
                } else {
1455
                    $strSub = self::substr($str, 0, $pos_end, $encoding);
1456
                }
1457 1
                if ($strSub !== false) {
1458 1
                    $extract = \rtrim($strSub, $trimChars) . $replacerForSkippedText;
1459
                } else {
1460 1
                    $extract = '';
1461
                }
1462
            } else {
1463 1
                $extract = $str;
1464
            }
1465
        }
1466
1467 1
        return $extract;
1468
    }
1469
1470
    /**
1471
     * Reads entire file into a string.
1472
     *
1473
     * WARNING: do not use UTF-8 Option ($convertToUtf8) for binary-files (e.g.: images) !!!
1474
     *
1475
     * @see http://php.net/manual/en/function.file-get-contents.php
1476
     *
1477
     * @param string        $filename         <p>
1478
     *                                        Name of the file to read.
1479
     *                                        </p>
1480
     * @param bool          $use_include_path [optional] <p>
1481
     *                                        Prior to PHP 5, this parameter is called
1482
     *                                        use_include_path and is a bool.
1483
     *                                        As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
1484
     *                                        to trigger include path
1485
     *                                        search.
1486
     *                                        </p>
1487
     * @param resource|null $context          [optional] <p>
1488
     *                                        A valid context resource created with
1489
     *                                        stream_context_create. If you don't need to use a
1490
     *                                        custom context, you can skip this parameter by &null;.
1491
     *                                        </p>
1492
     * @param int|null      $offset           [optional] <p>
1493
     *                                        The offset where the reading starts.
1494
     *                                        </p>
1495
     * @param int|null      $maxLength        [optional] <p>
1496
     *                                        Maximum length of data read. The default is to read until end
1497
     *                                        of file is reached.
1498
     *                                        </p>
1499
     * @param int           $timeout          <p>The time in seconds for the timeout.</p>
1500
     * @param bool          $convertToUtf8    <strong>WARNING!!!</strong> <p>Maybe you can't use this option for
1501
     *                                        some files, because they used non default utf-8 chars. Binary files
1502
     *                                        like images or pdf will not be converted.</p>
1503
     * @param string        $fromEncoding     [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
1504
     *                                        A empty string will trigger the autodetect anyway.</p>
1505
     *
1506
     * @return false|string the function returns the read data as string or <b>false</b> on failure
1507
     */
1508 12
    public static function file_get_contents(
1509
        string $filename,
1510
        bool $use_include_path = false,
1511
        $context = null,
1512
        int $offset = null,
1513
        int $maxLength = null,
1514
        int $timeout = 10,
1515
        bool $convertToUtf8 = true,
1516
        string $fromEncoding = ''
1517
    ) {
1518
        // init
1519 12
        $filename = \filter_var($filename, \FILTER_SANITIZE_STRING);
1520 12
        if ($filename === false) {
1521
            return false;
1522
        }
1523
1524 12
        if ($timeout && $context === null) {
1525 9
            $context = \stream_context_create(
1526
                [
1527
                    'http' => [
1528 9
                        'timeout' => $timeout,
1529
                    ],
1530
                ]
1531
            );
1532
        }
1533
1534 12
        if ($offset === null) {
1535 12
            $offset = 0;
1536
        }
1537
1538 12
        if (\is_int($maxLength) === true) {
1539 2
            $data = \file_get_contents($filename, $use_include_path, $context, $offset, $maxLength);
1540
        } else {
1541 12
            $data = \file_get_contents($filename, $use_include_path, $context, $offset);
1542
        }
1543
1544
        // return false on error
1545 12
        if ($data === false) {
1546
            return false;
1547
        }
1548
1549 12
        if ($convertToUtf8 === true) {
1550
            if (
1551 12
                self::is_binary($data, true) !== true
1552
                ||
1553 9
                self::is_utf16($data, false) !== false
1554
                ||
1555 12
                self::is_utf32($data, false) !== false
1556
            ) {
1557 9
                $data = self::encode('UTF-8', $data, false, $fromEncoding);
1558 9
                $data = self::cleanup($data);
1559
            }
1560
        }
1561
1562 12
        return $data;
1563
    }
1564
1565
    /**
1566
     * Checks if a file starts with BOM (Byte Order Mark) character.
1567
     *
1568
     * @param string $file_path <p>Path to a valid file.</p>
1569
     *
1570
     * @throws \RuntimeException if file_get_contents() returned false
1571
     *
1572
     * @return bool
1573
     *              <strong>true</strong> if the file has BOM at the start, <strong>false</strong> otherwise
1574
     */
1575 2
    public static function file_has_bom(string $file_path): bool
1576
    {
1577 2
        $file_content = \file_get_contents($file_path);
1578 2
        if ($file_content === false) {
1579
            throw new \RuntimeException('file_get_contents() returned false for:' . $file_path);
1580
        }
1581
1582 2
        return self::string_has_bom($file_content);
1583
    }
1584
1585
    /**
1586
     * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1587
     *
1588
     * @param mixed  $var
1589
     * @param int    $normalization_form
1590
     * @param string $leading_combining
1591
     *
1592
     * @return mixed
1593
     */
1594 62
    public static function filter($var, int $normalization_form = \Normalizer::NFC, string $leading_combining = '◌')
1595
    {
1596 62
        switch (\gettype($var)) {
1597 62
            case 'array':
1598
                /** @noinspection ForeachSourceInspection */
1599 6
                foreach ($var as $k => &$v) {
1600 6
                    $v = self::filter($v, $normalization_form, $leading_combining);
1601
                }
1602 6
                unset($v);
1603
1604 6
                break;
1605 62
            case 'object':
1606
                /** @noinspection ForeachSourceInspection */
1607 4
                foreach ($var as $k => &$v) {
1608 4
                    $v = self::filter($v, $normalization_form, $leading_combining);
1609
                }
1610 4
                unset($v);
1611
1612 4
                break;
1613 62
            case 'string':
1614
1615 62
                if (\strpos($var, "\r") !== false) {
1616
                    // Workaround https://bugs.php.net/65732
1617 3
                    $var = self::normalize_line_ending($var);
1618
                }
1619
1620 62
                if (self::is_ascii($var) === false) {
1621 32
                    if (\Normalizer::isNormalized($var, $normalization_form)) {
1622 27
                        $n = '-';
1623
                    } else {
1624 12
                        $n = \Normalizer::normalize($var, $normalization_form);
1625
1626 12
                        if (isset($n[0])) {
1627 7
                            $var = $n;
1628
                        } else {
1629 8
                            $var = self::encode('UTF-8', $var, true);
1630
                        }
1631
                    }
1632
1633
                    if (
1634 32
                        $var[0] >= "\x80"
1635
                        &&
1636 32
                        isset($n[0], $leading_combining[0])
1637
                        &&
1638 32
                        \preg_match('/^\\p{Mn}/u', $var)
1639
                    ) {
1640
                        // Prevent leading combining chars
1641
                        // for NFC-safe concatenations.
1642 3
                        $var = $leading_combining . $var;
1643
                    }
1644
                }
1645
1646 62
                break;
1647
        }
1648
1649 62
        return $var;
1650
    }
1651
1652
    /**
1653
     * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1654
     *
1655
     * Gets a specific external variable by name and optionally filters it
1656
     *
1657
     * @see http://php.net/manual/en/function.filter-input.php
1658
     *
1659
     * @param int    $type          <p>
1660
     *                              One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1661
     *                              <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1662
     *                              <b>INPUT_ENV</b>.
1663
     *                              </p>
1664
     * @param string $variable_name <p>
1665
     *                              Name of a variable to get.
1666
     *                              </p>
1667
     * @param int    $filter        [optional] <p>
1668
     *                              The ID of the filter to apply. The
1669
     *                              manual page lists the available filters.
1670
     *                              </p>
1671
     * @param mixed  $options       [optional] <p>
1672
     *                              Associative array of options or bitwise disjunction of flags. If filter
1673
     *                              accepts options, flags can be provided in "flags" field of array.
1674
     *                              </p>
1675
     *
1676
     * @return mixed Value of the requested variable on success, <b>FALSE</b> if the filter fails, or <b>NULL</b> if the
1677
     *               <i>variable_name</i> variable is not set. If the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it
1678
     *               returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter fails.
1679
     */
1680
    public static function filter_input(
1681
        int $type,
1682
        string $variable_name,
1683
        int $filter = \FILTER_DEFAULT,
1684
        $options = null
1685
    ) {
1686
        if (\func_num_args() < 4) {
1687
            $var = \filter_input($type, $variable_name, $filter);
1688
        } else {
1689
            $var = \filter_input($type, $variable_name, $filter, $options);
1690
        }
1691
1692
        return self::filter($var);
1693
    }
1694
1695
    /**
1696
     * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1697
     *
1698
     * Gets external variables and optionally filters them
1699
     *
1700
     * @see http://php.net/manual/en/function.filter-input-array.php
1701
     *
1702
     * @param int   $type       <p>
1703
     *                          One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1704
     *                          <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1705
     *                          <b>INPUT_ENV</b>.
1706
     *                          </p>
1707
     * @param mixed $definition [optional] <p>
1708
     *                          An array defining the arguments. A valid key is a string
1709
     *                          containing a variable name and a valid value is either a filter type, or an array
1710
     *                          optionally specifying the filter, flags and options. If the value is an
1711
     *                          array, valid keys are filter which specifies the
1712
     *                          filter type,
1713
     *                          flags which specifies any flags that apply to the
1714
     *                          filter, and options which specifies any options that
1715
     *                          apply to the filter. See the example below for a better understanding.
1716
     *                          </p>
1717
     *                          <p>
1718
     *                          This parameter can be also an integer holding a filter constant. Then all values in the
1719
     *                          input array are filtered by this filter.
1720
     *                          </p>
1721
     * @param bool  $add_empty  [optional] <p>
1722
     *                          Add missing keys as <b>NULL</b> to the return value.
1723
     *                          </p>
1724
     *
1725
     * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b> on failure.
1726
     *               An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not
1727
     *               set. Or if the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it returns <b>FALSE</b> if the variable
1728
     *               is not set and <b>NULL</b> if the filter fails.
1729
     */
1730
    public static function filter_input_array(int $type, $definition = null, bool $add_empty = true)
1731
    {
1732
        if (\func_num_args() < 2) {
1733
            $a = \filter_input_array($type);
1734
        } else {
1735
            $a = \filter_input_array($type, $definition, $add_empty);
1736
        }
1737
1738
        return self::filter($a);
1739
    }
1740
1741
    /**
1742
     * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1743
     *
1744
     * Filters a variable with a specified filter
1745
     *
1746
     * @see http://php.net/manual/en/function.filter-var.php
1747
     *
1748
     * @param mixed $variable <p>
1749
     *                        Value to filter.
1750
     *                        </p>
1751
     * @param int   $filter   [optional] <p>
1752
     *                        The ID of the filter to apply. The
1753
     *                        manual page lists the available filters.
1754
     *                        </p>
1755
     * @param mixed $options  [optional] <p>
1756
     *                        Associative array of options or bitwise disjunction of flags. If filter
1757
     *                        accepts options, flags can be provided in "flags" field of array. For
1758
     *                        the "callback" filter, callable type should be passed. The
1759
     *                        callback must accept one argument, the value to be filtered, and return
1760
     *                        the value after filtering/sanitizing it.
1761
     *                        </p>
1762
     *                        <p>
1763
     *                        <code>
1764
     *                        // for filters that accept options, use this format
1765
     *                        $options = array(
1766
     *                        'options' => array(
1767
     *                        'default' => 3, // value to return if the filter fails
1768
     *                        // other options here
1769
     *                        'min_range' => 0
1770
     *                        ),
1771
     *                        'flags' => FILTER_FLAG_ALLOW_OCTAL,
1772
     *                        );
1773
     *                        $var = filter_var('0755', FILTER_VALIDATE_INT, $options);
1774
     *                        // for filter that only accept flags, you can pass them directly
1775
     *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);
1776
     *                        // for filter that only accept flags, you can also pass as an array
1777
     *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN,
1778
     *                        array('flags' => FILTER_NULL_ON_FAILURE));
1779
     *                        // callback validate filter
1780
     *                        function foo($value)
1781
     *                        {
1782
     *                        // Expected format: Surname, GivenNames
1783
     *                        if (strpos($value, ", ") === false) return false;
1784
     *                        list($surname, $givennames) = explode(", ", $value, 2);
1785
     *                        $empty = (empty($surname) || empty($givennames));
1786
     *                        $notstrings = (!is_string($surname) || !is_string($givennames));
1787
     *                        if ($empty || $notstrings) {
1788
     *                        return false;
1789
     *                        } else {
1790
     *                        return $value;
1791
     *                        }
1792
     *                        }
1793
     *                        $var = filter_var('Doe, Jane Sue', FILTER_CALLBACK, array('options' => 'foo'));
1794
     *                        </code>
1795
     *                        </p>
1796
     *
1797
     * @return mixed the filtered data, or <b>FALSE</b> if the filter fails
1798
     */
1799 2
    public static function filter_var($variable, int $filter = \FILTER_DEFAULT, $options = null)
1800
    {
1801 2
        if (\func_num_args() < 3) {
1802 2
            $variable = \filter_var($variable, $filter);
1803
        } else {
1804 2
            $variable = \filter_var($variable, $filter, $options);
1805
        }
1806
1807 2
        return self::filter($variable);
1808
    }
1809
1810
    /**
1811
     * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1812
     *
1813
     * Gets multiple variables and optionally filters them
1814
     *
1815
     * @see http://php.net/manual/en/function.filter-var-array.php
1816
     *
1817
     * @param array $data       <p>
1818
     *                          An array with string keys containing the data to filter.
1819
     *                          </p>
1820
     * @param mixed $definition [optional] <p>
1821
     *                          An array defining the arguments. A valid key is a string
1822
     *                          containing a variable name and a valid value is either a
1823
     *                          filter type, or an
1824
     *                          array optionally specifying the filter, flags and options.
1825
     *                          If the value is an array, valid keys are filter
1826
     *                          which specifies the filter type,
1827
     *                          flags which specifies any flags that apply to the
1828
     *                          filter, and options which specifies any options that
1829
     *                          apply to the filter. See the example below for a better understanding.
1830
     *                          </p>
1831
     *                          <p>
1832
     *                          This parameter can be also an integer holding a filter constant. Then all values in the
1833
     *                          input array are filtered by this filter.
1834
     *                          </p>
1835
     * @param bool  $add_empty  [optional] <p>
1836
     *                          Add missing keys as <b>NULL</b> to the return value.
1837
     *                          </p>
1838
     *
1839
     * @return mixed an array containing the values of the requested variables on success, or <b>FALSE</b> on failure.
1840
     *               An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not
1841
     *               set
1842
     */
1843 2
    public static function filter_var_array(array $data, $definition = null, bool $add_empty = true)
1844
    {
1845 2
        if (\func_num_args() < 2) {
1846 2
            $a = \filter_var_array($data);
1847
        } else {
1848 2
            $a = \filter_var_array($data, $definition, $add_empty);
1849
        }
1850
1851 2
        return self::filter($a);
1852
    }
1853
1854
    /**
1855
     * Checks whether finfo is available on the server.
1856
     *
1857
     * @return bool
1858
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
1859
     */
1860
    public static function finfo_loaded(): bool
1861
    {
1862
        return \class_exists('finfo');
1863
    }
1864
1865
    /**
1866
     * Returns the first $n characters of the string.
1867
     *
1868
     * @param string $str      <p>The input string.</p>
1869
     * @param int    $n        <p>Number of characters to retrieve from the start.</p>
1870
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
1871
     *
1872
     * @return string
1873
     */
1874 13
    public static function first_char(string $str, int $n = 1, string $encoding = 'UTF-8'): string
1875
    {
1876 13
        if ($str === '' || $n <= 0) {
1877 5
            return '';
1878
        }
1879
1880 8
        if ($encoding === 'UTF-8') {
1881 4
            return (string) \mb_substr($str, 0, $n);
1882
        }
1883
1884 4
        return (string) self::substr($str, 0, $n, $encoding);
1885
    }
1886
1887
    /**
1888
     * Check if the number of unicode characters are not more than the specified integer.
1889
     *
1890
     * @param string $str      the original string to be checked
1891
     * @param int    $box_size the size in number of chars to be checked against string
1892
     *
1893
     * @return bool true if string is less than or equal to $box_size, false otherwise
1894
     */
1895 2
    public static function fits_inside(string $str, int $box_size): bool
1896
    {
1897 2
        return self::strlen($str) <= $box_size;
1898
    }
1899
1900
    /**
1901
     * Try to fix simple broken UTF-8 strings.
1902
     *
1903
     * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
1904
     *
1905
     * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
1906
     * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
1907
     * See: http://en.wikipedia.org/wiki/Windows-1252
1908
     *
1909
     * @param string $str <p>The input string</p>
1910
     *
1911
     * @return string
1912
     */
1913 46
    public static function fix_simple_utf8(string $str): string
1914
    {
1915 46
        if ($str === '') {
1916 4
            return '';
1917
        }
1918
1919 46
        static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null;
1920 46
        static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null;
1921
1922 46
        if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) {
1923 1
            if (self::$BROKEN_UTF8_FIX === null) {
1924 1
                self::$BROKEN_UTF8_FIX = self::getData('utf8_fix');
1925
            }
1926
1927 1
            $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = \array_keys(self::$BROKEN_UTF8_FIX);
1928 1
            $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = \array_values(self::$BROKEN_UTF8_FIX);
1929
        }
1930
1931 46
        return \str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str);
1932
    }
1933
1934
    /**
1935
     * Fix a double (or multiple) encoded UTF8 string.
1936
     *
1937
     * @param string|string[] $str you can use a string or an array of strings
1938
     *
1939
     * @return string|string[]
1940
     *                         Will return the fixed input-"array" or
1941
     *                         the fixed input-"string"
1942
     *
1943
     * @psalm-suppress InvalidReturnType
1944
     */
1945 2
    public static function fix_utf8($str)
1946
    {
1947 2
        if (\is_array($str) === true) {
1948 2
            foreach ($str as $k => &$v) {
1949 2
                $v = self::fix_utf8($v);
1950
            }
1951 2
            unset($v);
1952
1953
            /**
1954
             * @psalm-suppress InvalidReturnStatement
1955
             */
1956 2
            return $str;
1957
        }
1958
1959 2
        $str = (string) $str;
1960 2
        $last = '';
1961 2
        while ($last !== $str) {
1962 2
            $last = $str;
1963
            /**
1964
             * @psalm-suppress PossiblyInvalidArgument
1965
             */
1966 2
            $str = self::to_utf8(
1967 2
                self::utf8_decode($str, true)
1968
            );
1969
        }
1970
1971
        /**
1972
         * @psalm-suppress InvalidReturnStatement
1973
         */
1974 2
        return $str;
1975
    }
1976
1977
    /**
1978
     * Get character of a specific character.
1979
     *
1980
     * @param string $char
1981
     *
1982
     * @return string 'RTL' or 'LTR'
1983
     */
1984 2
    public static function getCharDirection(string $char): string
1985
    {
1986 2
        if (self::$SUPPORT['intlChar'] === true) {
1987
            /** @noinspection PhpComposerExtensionStubsInspection */
1988 2
            $tmpReturn = \IntlChar::charDirection($char);
1989
1990
            // from "IntlChar"-Class
1991
            $charDirection = [
1992 2
                'RTL' => [1, 13, 14, 15, 21],
1993
                'LTR' => [0, 11, 12, 20],
1994
            ];
1995
1996 2
            if (\in_array($tmpReturn, $charDirection['LTR'], true)) {
1997
                return 'LTR';
1998
            }
1999
2000 2
            if (\in_array($tmpReturn, $charDirection['RTL'], true)) {
2001 2
                return 'RTL';
2002
            }
2003
        }
2004
2005 2
        $c = static::chr_to_decimal($char);
2006
2007 2
        if (!($c >= 0x5be && $c <= 0x10b7f)) {
2008 2
            return 'LTR';
2009
        }
2010
2011 2
        if ($c <= 0x85e) {
2012 2
            if ($c === 0x5be ||
2013 2
                $c === 0x5c0 ||
2014 2
                $c === 0x5c3 ||
2015 2
                $c === 0x5c6 ||
2016 2
                ($c >= 0x5d0 && $c <= 0x5ea) ||
2017 2
                ($c >= 0x5f0 && $c <= 0x5f4) ||
2018 2
                $c === 0x608 ||
2019 2
                $c === 0x60b ||
2020 2
                $c === 0x60d ||
2021 2
                $c === 0x61b ||
2022 2
                ($c >= 0x61e && $c <= 0x64a) ||
2023
                ($c >= 0x66d && $c <= 0x66f) ||
2024
                ($c >= 0x671 && $c <= 0x6d5) ||
2025
                ($c >= 0x6e5 && $c <= 0x6e6) ||
2026
                ($c >= 0x6ee && $c <= 0x6ef) ||
2027
                ($c >= 0x6fa && $c <= 0x70d) ||
2028
                $c === 0x710 ||
2029
                ($c >= 0x712 && $c <= 0x72f) ||
2030
                ($c >= 0x74d && $c <= 0x7a5) ||
2031
                $c === 0x7b1 ||
2032
                ($c >= 0x7c0 && $c <= 0x7ea) ||
2033
                ($c >= 0x7f4 && $c <= 0x7f5) ||
2034
                $c === 0x7fa ||
2035
                ($c >= 0x800 && $c <= 0x815) ||
2036
                $c === 0x81a ||
2037
                $c === 0x824 ||
2038
                $c === 0x828 ||
2039
                ($c >= 0x830 && $c <= 0x83e) ||
2040
                ($c >= 0x840 && $c <= 0x858) ||
2041 2
                $c === 0x85e
2042
            ) {
2043 2
                return 'RTL';
2044
            }
2045 2
        } elseif ($c === 0x200f) {
2046
            return 'RTL';
2047 2
        } elseif ($c >= 0xfb1d) {
2048 2
            if ($c === 0xfb1d ||
2049 2
                ($c >= 0xfb1f && $c <= 0xfb28) ||
2050 2
                ($c >= 0xfb2a && $c <= 0xfb36) ||
2051 2
                ($c >= 0xfb38 && $c <= 0xfb3c) ||
2052 2
                $c === 0xfb3e ||
2053 2
                ($c >= 0xfb40 && $c <= 0xfb41) ||
2054 2
                ($c >= 0xfb43 && $c <= 0xfb44) ||
2055 2
                ($c >= 0xfb46 && $c <= 0xfbc1) ||
2056 2
                ($c >= 0xfbd3 && $c <= 0xfd3d) ||
2057 2
                ($c >= 0xfd50 && $c <= 0xfd8f) ||
2058 2
                ($c >= 0xfd92 && $c <= 0xfdc7) ||
2059 2
                ($c >= 0xfdf0 && $c <= 0xfdfc) ||
2060 2
                ($c >= 0xfe70 && $c <= 0xfe74) ||
2061 2
                ($c >= 0xfe76 && $c <= 0xfefc) ||
2062 2
                ($c >= 0x10800 && $c <= 0x10805) ||
2063 2
                $c === 0x10808 ||
2064 2
                ($c >= 0x1080a && $c <= 0x10835) ||
2065 2
                ($c >= 0x10837 && $c <= 0x10838) ||
2066 2
                $c === 0x1083c ||
2067 2
                ($c >= 0x1083f && $c <= 0x10855) ||
2068 2
                ($c >= 0x10857 && $c <= 0x1085f) ||
2069 2
                ($c >= 0x10900 && $c <= 0x1091b) ||
2070 2
                ($c >= 0x10920 && $c <= 0x10939) ||
2071 2
                $c === 0x1093f ||
2072 2
                $c === 0x10a00 ||
2073 2
                ($c >= 0x10a10 && $c <= 0x10a13) ||
2074 2
                ($c >= 0x10a15 && $c <= 0x10a17) ||
2075 2
                ($c >= 0x10a19 && $c <= 0x10a33) ||
2076 2
                ($c >= 0x10a40 && $c <= 0x10a47) ||
2077 2
                ($c >= 0x10a50 && $c <= 0x10a58) ||
2078 2
                ($c >= 0x10a60 && $c <= 0x10a7f) ||
2079 2
                ($c >= 0x10b00 && $c <= 0x10b35) ||
2080 2
                ($c >= 0x10b40 && $c <= 0x10b55) ||
2081 2
                ($c >= 0x10b58 && $c <= 0x10b72) ||
2082 2
                ($c >= 0x10b78 && $c <= 0x10b7f)
2083
            ) {
2084 2
                return 'RTL';
2085
            }
2086
        }
2087
2088 2
        return 'LTR';
2089
    }
2090
2091
    /**
2092
     * Check for php-support.
2093
     *
2094
     * @param string|null $key
2095
     *
2096
     * @return mixed
2097
     *               Return the full support-"array", if $key === null<br>
2098
     *               return bool-value, if $key is used and available<br>
2099
     *               otherwise return <strong>null</strong>
2100
     */
2101 27
    public static function getSupportInfo(string $key = null)
2102
    {
2103 27
        if ($key === null) {
2104 4
            return self::$SUPPORT;
2105
        }
2106
2107 25
        if (self::$INTL_TRANSLITERATOR_LIST === null) {
2108 1
            self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
2109
        }
2110
        // compatibility fix for old versions
2111 25
        self::$SUPPORT['intl__transliterator_list_ids'] = self::$INTL_TRANSLITERATOR_LIST;
2112
2113 25
        return self::$SUPPORT[$key] ?? null;
2114
    }
2115
2116
    /**
2117
     * Warning: this method only works for some file-types (png bmp gif jpg rar zip midi exe pdf)
2118
     *          if you need more supported types, please use e.g. "finfo"
2119
     *
2120
     * @param string $str
2121
     * @param array  $fallback with this keys: 'ext', 'mime', 'type'
2122
     *
2123
     * @return array
2124
     *               with this keys: 'ext', 'mime', 'type'
2125
     */
2126 39
    public static function get_file_type(
2127
        string $str,
2128
        array $fallback = [
2129
            'ext'  => null,
2130
            'mime' => 'application/octet-stream',
2131
            'type' => null,
2132
        ]
2133
    ): array {
2134 39
        if ($str === '') {
2135
            return $fallback;
2136
        }
2137
2138 39
        $str_info = \substr($str, 0, 2);
2139 39
        if ($str_info === false || \strlen($str_info) !== 2) {
2140 11
            return $fallback;
2141
        }
2142
2143 35
        $str_info = \unpack('C2chars', $str_info);
2144 35
        if ($str_info === false) {
2145
            return $fallback;
2146
        }
2147
        /** @noinspection OffsetOperationsInspection */
2148 35
        $type_code = (int) ($str_info['chars1'] . $str_info['chars2']);
2149
2150
        // DEBUG
2151
        //var_dump($type_code);
2152
2153
        switch ($type_code) {
2154 35
            case 3780:
2155 5
                $ext = 'pdf';
2156 5
                $mime = 'application/pdf';
2157 5
                $type = 'binary';
2158
2159 5
                break;
2160 35
            case 7790:
2161
                $ext = 'exe';
2162
                $mime = 'application/octet-stream';
2163
                $type = 'binary';
2164
2165
                break;
2166 35
            case 7784:
2167
                $ext = 'midi';
2168
                $mime = 'audio/x-midi';
2169
                $type = 'binary';
2170
2171
                break;
2172 35
            case 8075:
2173 7
                $ext = 'zip';
2174 7
                $mime = 'application/zip';
2175 7
                $type = 'binary';
2176
2177 7
                break;
2178 35
            case 8297:
2179
                $ext = 'rar';
2180
                $mime = 'application/rar';
2181
                $type = 'binary';
2182
2183
                break;
2184 35
            case 255216:
2185
                $ext = 'jpg';
2186
                $mime = 'image/jpeg';
2187
                $type = 'binary';
2188
2189
                break;
2190 35
            case 7173:
2191
                $ext = 'gif';
2192
                $mime = 'image/gif';
2193
                $type = 'binary';
2194
2195
                break;
2196 35
            case 7373:
2197
                $ext = 'tiff';
2198
                $mime = 'image/tiff';
2199
                $type = 'binary';
2200
2201
                break;
2202 35
            case 6677:
2203
                $ext = 'bmp';
2204
                $mime = 'image/bmp';
2205
                $type = 'binary';
2206
2207
                break;
2208 35
            case 13780:
2209 7
                $ext = 'png';
2210 7
                $mime = 'image/png';
2211 7
                $type = 'binary';
2212
2213 7
                break;
2214
            default:
2215 32
                return $fallback;
2216
        }
2217
2218
        return [
2219 7
            'ext'  => $ext,
2220 7
            'mime' => $mime,
2221 7
            'type' => $type,
2222
        ];
2223
    }
2224
2225
    /**
2226
     * @param int    $length        <p>Length of the random string.</p>
2227
     * @param string $possibleChars [optional] <p>Characters string for the random selection.</p>
2228
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
2229
     *
2230
     * @return string
2231
     */
2232 1
    public static function get_random_string(int $length, string $possibleChars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789', string $encoding = 'UTF-8'): string
2233
    {
2234
        // init
2235 1
        $i = 0;
2236 1
        $str = '';
2237
2238
        //
2239
        // add random chars
2240
        //
2241
2242 1
        if ($encoding === 'UTF-8') {
2243 1
            $maxlength = (int) \mb_strlen($possibleChars);
2244 1
            if ($maxlength === 0) {
2245 1
                return '';
2246
            }
2247
2248 1
            while ($i < $length) {
2249
                try {
2250 1
                    $randInt = \random_int(0, $maxlength - 1);
2251
                } catch (\Exception $e) {
2252
                    /** @noinspection RandomApiMigrationInspection */
2253
                    $randInt = \mt_rand(0, $maxlength - 1);
2254
                }
2255 1
                $char = \mb_substr($possibleChars, $randInt, 1);
2256 1
                if ($char !== false) {
2257 1
                    $str .= $char;
2258 1
                    ++$i;
2259
                }
2260
            }
2261
        } else {
2262
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2263
2264
            $maxlength = (int) self::strlen($possibleChars, $encoding);
2265
            if ($maxlength === 0) {
2266
                return '';
2267
            }
2268
2269
            while ($i < $length) {
2270
                try {
2271
                    $randInt = \random_int(0, $maxlength - 1);
2272
                } catch (\Exception $e) {
2273
                    /** @noinspection RandomApiMigrationInspection */
2274
                    $randInt = \mt_rand(0, $maxlength - 1);
2275
                }
2276
                $char = self::substr($possibleChars, $randInt, 1, $encoding);
2277
                if ($char !== false) {
2278
                    $str .= $char;
2279
                    ++$i;
2280
                }
2281
            }
2282
        }
2283
2284 1
        return $str;
2285
    }
2286
2287
    /**
2288
     * @param int|string $entropyExtra [optional] <p>Extra entropy via a string or int value.</p>
2289
     * @param bool       $md5          [optional] <p>Return the unique identifier as md5-hash? Default: true</p>
2290
     *
2291
     * @return string
2292
     */
2293 1
    public static function get_unique_string($entropyExtra = '', bool $md5 = true): string
2294
    {
2295 1
        $uniqueHelper = \random_int(0, \mt_getrandmax()) .
2296 1
                        \session_id() .
2297 1
                        ($_SERVER['REMOTE_ADDR'] ?? '') .
2298 1
                        ($_SERVER['SERVER_ADDR'] ?? '') .
2299 1
                        $entropyExtra;
2300
2301 1
        $uniqueString = \uniqid($uniqueHelper, true);
2302
2303 1
        if ($md5) {
2304 1
            $uniqueString = \md5($uniqueString . $uniqueHelper);
2305
        }
2306
2307 1
        return $uniqueString;
2308
    }
2309
2310
    /**
2311
     * alias for "UTF8::string_has_bom()"
2312
     *
2313
     * @param string $str
2314
     *
2315
     * @return bool
2316
     *
2317
     * @see UTF8::string_has_bom()
2318
     * @deprecated <p>use "UTF8::string_has_bom()"</p>
2319
     */
2320 2
    public static function hasBom(string $str): bool
2321
    {
2322 2
        return self::string_has_bom($str);
2323
    }
2324
2325
    /**
2326
     * Returns true if the string contains a lower case char, false otherwise.
2327
     *
2328
     * @param string $str <p>The input string.</p>
2329
     *
2330
     * @return bool whether or not the string contains a lower case character
2331
     */
2332 47
    public static function has_lowercase(string $str): bool
2333
    {
2334 47
        if (self::$SUPPORT['mbstring'] === true) {
2335
            /** @noinspection PhpComposerExtensionStubsInspection */
2336 47
            return \mb_ereg_match('.*[[:lower:]]', $str);
2337
        }
2338
2339
        return self::str_matches_pattern($str, '.*[[:lower:]]');
2340
    }
2341
2342
    /**
2343
     * Returns true if the string contains an upper case char, false otherwise.
2344
     *
2345
     * @param string $str <p>The input string.</p>
2346
     *
2347
     * @return bool whether or not the string contains an upper case character
2348
     */
2349 12
    public static function has_uppercase(string $str): bool
2350
    {
2351 12
        if (self::$SUPPORT['mbstring'] === true) {
2352
            /** @noinspection PhpComposerExtensionStubsInspection */
2353 12
            return \mb_ereg_match('.*[[:upper:]]', $str);
2354
        }
2355
2356
        return self::str_matches_pattern($str, '.*[[:upper:]]');
2357
    }
2358
2359
    /**
2360
     * Converts a hexadecimal-value into an UTF-8 character.
2361
     *
2362
     * @param string $hexdec <p>The hexadecimal value.</p>
2363
     *
2364
     * @return false|string one single UTF-8 character
2365
     */
2366 4
    public static function hex_to_chr(string $hexdec)
2367
    {
2368 4
        return self::decimal_to_chr(\hexdec($hexdec));
2369
    }
2370
2371
    /**
2372
     * Converts hexadecimal U+xxxx code point representation to integer.
2373
     *
2374
     * INFO: opposite to UTF8::int_to_hex()
2375
     *
2376
     * @param string $hexDec <p>The hexadecimal code point representation.</p>
2377
     *
2378
     * @return false|int the code point, or false on failure
2379
     */
2380 2
    public static function hex_to_int($hexDec)
2381
    {
2382
        // init
2383 2
        $hexDec = (string) $hexDec;
2384
2385 2
        if ($hexDec === '') {
2386 2
            return false;
2387
        }
2388
2389 2
        if (\preg_match('/^(?:\\\u|U\+|)([a-zA-Z0-9]{4,6})$/', $hexDec, $match)) {
2390 2
            return \intval($match[1], 16);
2391
        }
2392
2393 2
        return false;
2394
    }
2395
2396
    /**
2397
     * alias for "UTF8::html_entity_decode()"
2398
     *
2399
     * @param string $str
2400
     * @param int    $flags
2401
     * @param string $encoding
2402
     *
2403
     * @return string
2404
     *
2405
     * @see UTF8::html_entity_decode()
2406
     */
2407 4
    public static function html_decode(string $str, int $flags = null, string $encoding = 'UTF-8'): string
2408
    {
2409 4
        return self::html_entity_decode($str, $flags, $encoding);
2410
    }
2411
2412
    /**
2413
     * Converts a UTF-8 string to a series of HTML numbered entities.
2414
     *
2415
     * INFO: opposite to UTF8::html_decode()
2416
     *
2417
     * @param string $str            <p>The Unicode string to be encoded as numbered entities.</p>
2418
     * @param bool   $keepAsciiChars [optional] <p>Keep ASCII chars.</p>
2419
     * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
2420
     *
2421
     * @return string HTML numbered entities
2422
     */
2423 14
    public static function html_encode(string $str, bool $keepAsciiChars = false, string $encoding = 'UTF-8'): string
2424
    {
2425 14
        if ($str === '') {
2426 4
            return '';
2427
        }
2428
2429 14
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2430 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2431
        }
2432
2433
        // INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2434 14
        if (self::$SUPPORT['mbstring'] === true) {
2435 14
            $startCode = 0x00;
2436 14
            if ($keepAsciiChars === true) {
2437 13
                $startCode = 0x80;
2438
            }
2439
2440 14
            if ($encoding === 'UTF-8') {
2441 14
                return \mb_encode_numericentity(
2442 14
                    $str,
2443 14
                    [$startCode, 0xfffff, 0, 0xfffff, 0]
2444
                );
2445
            }
2446
2447 4
            return \mb_encode_numericentity(
2448 4
                $str,
2449 4
                [$startCode, 0xfffff, 0, 0xfffff, 0],
2450 4
                $encoding
2451
            );
2452
        }
2453
2454
        //
2455
        // fallback via vanilla php
2456
        //
2457
2458
        return \implode(
2459
            '',
2460
            \array_map(
2461
                static function (string $chr) use ($keepAsciiChars, $encoding): string {
2462
                    return self::single_chr_html_encode($chr, $keepAsciiChars, $encoding);
2463
                },
2464
                self::str_split($str)
2465
            )
2466
        );
2467
    }
2468
2469
    /**
2470
     * UTF-8 version of html_entity_decode()
2471
     *
2472
     * The reason we are not using html_entity_decode() by itself is because
2473
     * while it is not technically correct to leave out the semicolon
2474
     * at the end of an entity most browsers will still interpret the entity
2475
     * correctly. html_entity_decode() does not convert entities without
2476
     * semicolons, so we are left with our own little solution here. Bummer.
2477
     *
2478
     * Convert all HTML entities to their applicable characters
2479
     *
2480
     * INFO: opposite to UTF8::html_encode()
2481
     *
2482
     * @see http://php.net/manual/en/function.html-entity-decode.php
2483
     *
2484
     * @param string $str      <p>
2485
     *                         The input string.
2486
     *                         </p>
2487
     * @param int    $flags    [optional] <p>
2488
     *                         A bitmask of one or more of the following flags, which specify how to handle quotes
2489
     *                         and which document type to use. The default is ENT_COMPAT | ENT_HTML401.
2490
     *                         <table>
2491
     *                         Available <i>flags</i> constants
2492
     *                         <tr valign="top">
2493
     *                         <td>Constant Name</td>
2494
     *                         <td>Description</td>
2495
     *                         </tr>
2496
     *                         <tr valign="top">
2497
     *                         <td><b>ENT_COMPAT</b></td>
2498
     *                         <td>Will convert double-quotes and leave single-quotes alone.</td>
2499
     *                         </tr>
2500
     *                         <tr valign="top">
2501
     *                         <td><b>ENT_QUOTES</b></td>
2502
     *                         <td>Will convert both double and single quotes.</td>
2503
     *                         </tr>
2504
     *                         <tr valign="top">
2505
     *                         <td><b>ENT_NOQUOTES</b></td>
2506
     *                         <td>Will leave both double and single quotes unconverted.</td>
2507
     *                         </tr>
2508
     *                         <tr valign="top">
2509
     *                         <td><b>ENT_HTML401</b></td>
2510
     *                         <td>
2511
     *                         Handle code as HTML 4.01.
2512
     *                         </td>
2513
     *                         </tr>
2514
     *                         <tr valign="top">
2515
     *                         <td><b>ENT_XML1</b></td>
2516
     *                         <td>
2517
     *                         Handle code as XML 1.
2518
     *                         </td>
2519
     *                         </tr>
2520
     *                         <tr valign="top">
2521
     *                         <td><b>ENT_XHTML</b></td>
2522
     *                         <td>
2523
     *                         Handle code as XHTML.
2524
     *                         </td>
2525
     *                         </tr>
2526
     *                         <tr valign="top">
2527
     *                         <td><b>ENT_HTML5</b></td>
2528
     *                         <td>
2529
     *                         Handle code as HTML 5.
2530
     *                         </td>
2531
     *                         </tr>
2532
     *                         </table>
2533
     *                         </p>
2534
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
2535
     *
2536
     * @return string the decoded string
2537
     */
2538 46
    public static function html_entity_decode(string $str, int $flags = null, string $encoding = 'UTF-8'): string
2539
    {
2540
        if (
2541 46
            !isset($str[3]) // examples: &; || &x;
2542
            ||
2543 46
            \strpos($str, '&') === false // no "&"
2544
        ) {
2545 23
            return $str;
2546
        }
2547
2548 44
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2549 9
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2550
        }
2551
2552 44
        if ($flags === null) {
2553 10
            $flags = \ENT_QUOTES | \ENT_HTML5;
2554
        }
2555
2556
        if (
2557 44
            $encoding !== 'UTF-8'
2558
            &&
2559 44
            $encoding !== 'ISO-8859-1'
2560
            &&
2561 44
            $encoding !== 'WINDOWS-1252'
2562
            &&
2563 44
            self::$SUPPORT['mbstring'] === false
2564
        ) {
2565
            \trigger_error('UTF8::html_entity_decode() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
2566
        }
2567
2568
        do {
2569 44
            $str_compare = $str;
2570
2571
            // INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2572 44
            if (self::$SUPPORT['mbstring'] === true) {
2573 44
                if ($encoding === 'UTF-8') {
2574 44
                    $str = \mb_decode_numericentity(
2575 44
                        $str,
2576 44
                        [0x80, 0xfffff, 0, 0xfffff, 0]
2577
                    );
2578
                } else {
2579 4
                    $str = \mb_decode_numericentity(
2580 4
                        $str,
2581 4
                        [0x80, 0xfffff, 0, 0xfffff, 0],
2582 44
                        $encoding
2583
                    );
2584
                }
2585
            } else {
2586
                $str = (string) \preg_replace_callback(
2587
                    "/&#\d{2,6};/",
2588
                    /**
2589
                     * @param string[] $matches
2590
                     *
2591
                     * @return string
2592
                     */
2593
                    static function (array $matches) use ($encoding): string {
2594
                        $returnTmp = \mb_convert_encoding($matches[0], $encoding, 'HTML-ENTITIES');
2595
                        if ($returnTmp !== '"' && $returnTmp !== "'") {
2596
                            return $returnTmp;
2597
                        }
2598
2599
                        return $matches[0];
2600
                    },
2601
                    $str
2602
                );
2603
            }
2604
2605 44
            if (\strpos($str, '&') !== false) {
2606 40
                if (\strpos($str, '&#') !== false) {
2607
                    // decode also numeric & UTF16 two byte entities
2608 32
                    $str = (string) \preg_replace(
2609 32
                        '/(&#(?:x0*[0-9a-fA-F]{2,6}(?![0-9a-fA-F;])|(?:0*\d{2,6}(?![0-9;]))))/S',
2610 32
                        '$1;',
2611 32
                        $str
2612
                    );
2613
                }
2614
2615 40
                $str = \html_entity_decode(
2616 40
                    $str,
2617 40
                    $flags,
2618 40
                    $encoding
2619
                );
2620
            }
2621 44
        } while ($str_compare !== $str);
2622
2623 44
        return $str;
2624
    }
2625
2626
    /**
2627
     * Create a escape html version of the string via "UTF8::htmlspecialchars()".
2628
     *
2629
     * @param string $str
2630
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
2631
     *
2632
     * @return string
2633
     */
2634 6
    public static function html_escape(string $str, string $encoding = 'UTF-8'): string
2635
    {
2636 6
        return self::htmlspecialchars(
2637 6
            $str,
2638 6
            \ENT_QUOTES | \ENT_SUBSTITUTE,
2639 6
            $encoding
2640
        );
2641
    }
2642
2643
    /**
2644
     * Remove empty html-tag.
2645
     *
2646
     * e.g.: <tag></tag>
2647
     *
2648
     * @param string $str
2649
     *
2650
     * @return string
2651
     */
2652 1
    public static function html_stripe_empty_tags(string $str): string
2653
    {
2654 1
        return (string) \preg_replace(
2655 1
            '/<[^\\/>]*?>\\s*?<\\/[^>]*?>/u',
2656 1
            '',
2657 1
            $str
2658
        );
2659
    }
2660
2661
    /**
2662
     * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities()
2663
     *
2664
     * @see http://php.net/manual/en/function.htmlentities.php
2665
     *
2666
     * @param string $str           <p>
2667
     *                              The input string.
2668
     *                              </p>
2669
     * @param int    $flags         [optional] <p>
2670
     *                              A bitmask of one or more of the following flags, which specify how to handle
2671
     *                              quotes, invalid code unit sequences and the used document type. The default is
2672
     *                              ENT_COMPAT | ENT_HTML401.
2673
     *                              <table>
2674
     *                              Available <i>flags</i> constants
2675
     *                              <tr valign="top">
2676
     *                              <td>Constant Name</td>
2677
     *                              <td>Description</td>
2678
     *                              </tr>
2679
     *                              <tr valign="top">
2680
     *                              <td><b>ENT_COMPAT</b></td>
2681
     *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2682
     *                              </tr>
2683
     *                              <tr valign="top">
2684
     *                              <td><b>ENT_QUOTES</b></td>
2685
     *                              <td>Will convert both double and single quotes.</td>
2686
     *                              </tr>
2687
     *                              <tr valign="top">
2688
     *                              <td><b>ENT_NOQUOTES</b></td>
2689
     *                              <td>Will leave both double and single quotes unconverted.</td>
2690
     *                              </tr>
2691
     *                              <tr valign="top">
2692
     *                              <td><b>ENT_IGNORE</b></td>
2693
     *                              <td>
2694
     *                              Silently discard invalid code unit sequences instead of returning
2695
     *                              an empty string. Using this flag is discouraged as it
2696
     *                              may have security implications.
2697
     *                              </td>
2698
     *                              </tr>
2699
     *                              <tr valign="top">
2700
     *                              <td><b>ENT_SUBSTITUTE</b></td>
2701
     *                              <td>
2702
     *                              Replace invalid code unit sequences with a Unicode Replacement Character
2703
     *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty
2704
     *                              string.
2705
     *                              </td>
2706
     *                              </tr>
2707
     *                              <tr valign="top">
2708
     *                              <td><b>ENT_DISALLOWED</b></td>
2709
     *                              <td>
2710
     *                              Replace invalid code points for the given document type with a
2711
     *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2712
     *                              (otherwise) instead of leaving them as is. This may be useful, for
2713
     *                              instance, to ensure the well-formedness of XML documents with
2714
     *                              embedded external content.
2715
     *                              </td>
2716
     *                              </tr>
2717
     *                              <tr valign="top">
2718
     *                              <td><b>ENT_HTML401</b></td>
2719
     *                              <td>
2720
     *                              Handle code as HTML 4.01.
2721
     *                              </td>
2722
     *                              </tr>
2723
     *                              <tr valign="top">
2724
     *                              <td><b>ENT_XML1</b></td>
2725
     *                              <td>
2726
     *                              Handle code as XML 1.
2727
     *                              </td>
2728
     *                              </tr>
2729
     *                              <tr valign="top">
2730
     *                              <td><b>ENT_XHTML</b></td>
2731
     *                              <td>
2732
     *                              Handle code as XHTML.
2733
     *                              </td>
2734
     *                              </tr>
2735
     *                              <tr valign="top">
2736
     *                              <td><b>ENT_HTML5</b></td>
2737
     *                              <td>
2738
     *                              Handle code as HTML 5.
2739
     *                              </td>
2740
     *                              </tr>
2741
     *                              </table>
2742
     *                              </p>
2743
     * @param string $encoding      [optional] <p>
2744
     *                              Like <b>htmlspecialchars</b>,
2745
     *                              <b>htmlentities</b> takes an optional third argument
2746
     *                              <i>encoding</i> which defines encoding used in
2747
     *                              conversion.
2748
     *                              Although this argument is technically optional, you are highly
2749
     *                              encouraged to specify the correct value for your code.
2750
     *                              </p>
2751
     * @param bool   $double_encode [optional] <p>
2752
     *                              When <i>double_encode</i> is turned off PHP will not
2753
     *                              encode existing html entities. The default is to convert everything.
2754
     *                              </p>
2755
     *
2756
     * @return string
2757
     *                <p>
2758
     *                The encoded string.
2759
     *                <br><br>
2760
     *                If the input <i>string</i> contains an invalid code unit
2761
     *                sequence within the given <i>encoding</i> an empty string
2762
     *                will be returned, unless either the <b>ENT_IGNORE</b> or
2763
     *                <b>ENT_SUBSTITUTE</b> flags are set.
2764
     *                </p>
2765
     */
2766 9
    public static function htmlentities(
2767
        string $str,
2768
        int $flags = \ENT_COMPAT,
2769
        string $encoding = 'UTF-8',
2770
        bool $double_encode = true
2771
    ): string {
2772 9
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2773 7
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2774
        }
2775
2776 9
        $str = \htmlentities($str, $flags, $encoding, $double_encode);
2777
2778
        /**
2779
         * PHP doesn't replace a backslash to its html entity since this is something
2780
         * that's mostly used to escape characters when inserting in a database. Since
2781
         * we're using a decent database layer, we don't need this shit and we're replacing
2782
         * the double backslashes by its' html entity equivalent.
2783
         *
2784
         * https://github.com/forkcms/library/blob/master/spoon/filter/filter.php#L303
2785
         */
2786 9
        $str = \str_replace('\\', '&#92;', $str);
2787
2788 9
        return self::html_encode($str, true, $encoding);
2789
    }
2790
2791
    /**
2792
     * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
2793
     *
2794
     * INFO: Take a look at "UTF8::htmlentities()"
2795
     *
2796
     * @see http://php.net/manual/en/function.htmlspecialchars.php
2797
     *
2798
     * @param string $str           <p>
2799
     *                              The string being converted.
2800
     *                              </p>
2801
     * @param int    $flags         [optional] <p>
2802
     *                              A bitmask of one or more of the following flags, which specify how to handle
2803
     *                              quotes, invalid code unit sequences and the used document type. The default is
2804
     *                              ENT_COMPAT | ENT_HTML401.
2805
     *                              <table>
2806
     *                              Available <i>flags</i> constants
2807
     *                              <tr valign="top">
2808
     *                              <td>Constant Name</td>
2809
     *                              <td>Description</td>
2810
     *                              </tr>
2811
     *                              <tr valign="top">
2812
     *                              <td><b>ENT_COMPAT</b></td>
2813
     *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2814
     *                              </tr>
2815
     *                              <tr valign="top">
2816
     *                              <td><b>ENT_QUOTES</b></td>
2817
     *                              <td>Will convert both double and single quotes.</td>
2818
     *                              </tr>
2819
     *                              <tr valign="top">
2820
     *                              <td><b>ENT_NOQUOTES</b></td>
2821
     *                              <td>Will leave both double and single quotes unconverted.</td>
2822
     *                              </tr>
2823
     *                              <tr valign="top">
2824
     *                              <td><b>ENT_IGNORE</b></td>
2825
     *                              <td>
2826
     *                              Silently discard invalid code unit sequences instead of returning
2827
     *                              an empty string. Using this flag is discouraged as it
2828
     *                              may have security implications.
2829
     *                              </td>
2830
     *                              </tr>
2831
     *                              <tr valign="top">
2832
     *                              <td><b>ENT_SUBSTITUTE</b></td>
2833
     *                              <td>
2834
     *                              Replace invalid code unit sequences with a Unicode Replacement Character
2835
     *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty
2836
     *                              string.
2837
     *                              </td>
2838
     *                              </tr>
2839
     *                              <tr valign="top">
2840
     *                              <td><b>ENT_DISALLOWED</b></td>
2841
     *                              <td>
2842
     *                              Replace invalid code points for the given document type with a
2843
     *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2844
     *                              (otherwise) instead of leaving them as is. This may be useful, for
2845
     *                              instance, to ensure the well-formedness of XML documents with
2846
     *                              embedded external content.
2847
     *                              </td>
2848
     *                              </tr>
2849
     *                              <tr valign="top">
2850
     *                              <td><b>ENT_HTML401</b></td>
2851
     *                              <td>
2852
     *                              Handle code as HTML 4.01.
2853
     *                              </td>
2854
     *                              </tr>
2855
     *                              <tr valign="top">
2856
     *                              <td><b>ENT_XML1</b></td>
2857
     *                              <td>
2858
     *                              Handle code as XML 1.
2859
     *                              </td>
2860
     *                              </tr>
2861
     *                              <tr valign="top">
2862
     *                              <td><b>ENT_XHTML</b></td>
2863
     *                              <td>
2864
     *                              Handle code as XHTML.
2865
     *                              </td>
2866
     *                              </tr>
2867
     *                              <tr valign="top">
2868
     *                              <td><b>ENT_HTML5</b></td>
2869
     *                              <td>
2870
     *                              Handle code as HTML 5.
2871
     *                              </td>
2872
     *                              </tr>
2873
     *                              </table>
2874
     *                              </p>
2875
     * @param string $encoding      [optional] <p>
2876
     *                              Defines encoding used in conversion.
2877
     *                              </p>
2878
     *                              <p>
2879
     *                              For the purposes of this function, the encodings
2880
     *                              ISO-8859-1, ISO-8859-15,
2881
     *                              UTF-8, cp866,
2882
     *                              cp1251, cp1252, and
2883
     *                              KOI8-R are effectively equivalent, provided the
2884
     *                              <i>string</i> itself is valid for the encoding, as
2885
     *                              the characters affected by <b>htmlspecialchars</b> occupy
2886
     *                              the same positions in all of these encodings.
2887
     *                              </p>
2888
     * @param bool   $double_encode [optional] <p>
2889
     *                              When <i>double_encode</i> is turned off PHP will not
2890
     *                              encode existing html entities, the default is to convert everything.
2891
     *                              </p>
2892
     *
2893
     * @return string the converted string.
2894
     *                </p>
2895
     *                <p>
2896
     *                If the input <i>string</i> contains an invalid code unit
2897
     *                sequence within the given <i>encoding</i> an empty string
2898
     *                will be returned, unless either the <b>ENT_IGNORE</b> or
2899
     *                <b>ENT_SUBSTITUTE</b> flags are set
2900
     */
2901 8
    public static function htmlspecialchars(
2902
        string $str,
2903
        int $flags = \ENT_COMPAT,
2904
        string $encoding = 'UTF-8',
2905
        bool $double_encode = true
2906
    ): string {
2907 8
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2908 8
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2909
        }
2910
2911 8
        return \htmlspecialchars($str, $flags, $encoding, $double_encode);
2912
    }
2913
2914
    /**
2915
     * Checks whether iconv is available on the server.
2916
     *
2917
     * @return bool
2918
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
2919
     */
2920
    public static function iconv_loaded(): bool
2921
    {
2922
        return \extension_loaded('iconv');
2923
    }
2924
2925
    /**
2926
     * alias for "UTF8::decimal_to_chr()"
2927
     *
2928
     * @param mixed $int
2929
     *
2930
     * @return string
2931
     *
2932
     * @see UTF8::decimal_to_chr()
2933
     */
2934 4
    public static function int_to_chr($int): string
2935
    {
2936 4
        return self::decimal_to_chr($int);
2937
    }
2938
2939
    /**
2940
     * Converts Integer to hexadecimal U+xxxx code point representation.
2941
     *
2942
     * INFO: opposite to UTF8::hex_to_int()
2943
     *
2944
     * @param int    $int  <p>The integer to be converted to hexadecimal code point.</p>
2945
     * @param string $pfix [optional]
2946
     *
2947
     * @return string the code point, or empty string on failure
2948
     */
2949 6
    public static function int_to_hex(int $int, string $pfix = 'U+'): string
2950
    {
2951 6
        $hex = \dechex($int);
2952
2953 6
        $hex = (\strlen($hex) < 4 ? \substr('0000' . $hex, -4) : $hex);
2954
2955 6
        return $pfix . $hex . '';
2956
    }
2957
2958
    /**
2959
     * Checks whether intl-char is available on the server.
2960
     *
2961
     * @return bool
2962
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
2963
     */
2964
    public static function intlChar_loaded(): bool
2965
    {
2966
        return \class_exists('IntlChar');
2967
    }
2968
2969
    /**
2970
     * Checks whether intl is available on the server.
2971
     *
2972
     * @return bool
2973
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
2974
     */
2975 5
    public static function intl_loaded(): bool
2976
    {
2977 5
        return \extension_loaded('intl');
2978
    }
2979
2980
    /**
2981
     * alias for "UTF8::is_ascii()"
2982
     *
2983
     * @param string $str
2984
     *
2985
     * @return bool
2986
     *
2987
     * @see UTF8::is_ascii()
2988
     * @deprecated <p>use "UTF8::is_ascii()"</p>
2989
     */
2990 2
    public static function isAscii(string $str): bool
2991
    {
2992 2
        return self::is_ascii($str);
2993
    }
2994
2995
    /**
2996
     * alias for "UTF8::is_base64()"
2997
     *
2998
     * @param string $str
2999
     *
3000
     * @return bool
3001
     *
3002
     * @see UTF8::is_base64()
3003
     * @deprecated <p>use "UTF8::is_base64()"</p>
3004
     */
3005 2
    public static function isBase64($str): bool
3006
    {
3007 2
        return self::is_base64($str);
3008
    }
3009
3010
    /**
3011
     * alias for "UTF8::is_binary()"
3012
     *
3013
     * @param mixed $str
3014
     * @param bool  $strict
3015
     *
3016
     * @return bool
3017
     *
3018
     * @see UTF8::is_binary()
3019
     * @deprecated <p>use "UTF8::is_binary()"</p>
3020
     */
3021 4
    public static function isBinary($str, $strict = false): bool
3022
    {
3023 4
        return self::is_binary($str, $strict);
3024
    }
3025
3026
    /**
3027
     * alias for "UTF8::is_bom()"
3028
     *
3029
     * @param string $utf8_chr
3030
     *
3031
     * @return bool
3032
     *
3033
     * @see UTF8::is_bom()
3034
     * @deprecated <p>use "UTF8::is_bom()"</p>
3035
     */
3036 2
    public static function isBom(string $utf8_chr): bool
3037
    {
3038 2
        return self::is_bom($utf8_chr);
3039
    }
3040
3041
    /**
3042
     * alias for "UTF8::is_html()"
3043
     *
3044
     * @param string $str
3045
     *
3046
     * @return bool
3047
     *
3048
     * @see UTF8::is_html()
3049
     * @deprecated <p>use "UTF8::is_html()"</p>
3050
     */
3051 2
    public static function isHtml(string $str): bool
3052
    {
3053 2
        return self::is_html($str);
3054
    }
3055
3056
    /**
3057
     * alias for "UTF8::is_json()"
3058
     *
3059
     * @param string $str
3060
     *
3061
     * @return bool
3062
     *
3063
     * @see UTF8::is_json()
3064
     * @deprecated <p>use "UTF8::is_json()"</p>
3065
     */
3066
    public static function isJson(string $str): bool
3067
    {
3068
        return self::is_json($str);
3069
    }
3070
3071
    /**
3072
     * alias for "UTF8::is_utf16()"
3073
     *
3074
     * @param mixed $str
3075
     *
3076
     * @return false|int
3077
     *                   <strong>false</strong> if is't not UTF16,<br>
3078
     *                   <strong>1</strong> for UTF-16LE,<br>
3079
     *                   <strong>2</strong> for UTF-16BE
3080
     *
3081
     * @see UTF8::is_utf16()
3082
     * @deprecated <p>use "UTF8::is_utf16()"</p>
3083
     */
3084 2
    public static function isUtf16($str)
3085
    {
3086 2
        return self::is_utf16($str);
3087
    }
3088
3089
    /**
3090
     * alias for "UTF8::is_utf32()"
3091
     *
3092
     * @param mixed $str
3093
     *
3094
     * @return false|int
3095
     *                   <strong>false</strong> if is't not UTF16,
3096
     *                   <strong>1</strong> for UTF-32LE,
3097
     *                   <strong>2</strong> for UTF-32BE
3098
     *
3099
     * @see UTF8::is_utf32()
3100
     * @deprecated <p>use "UTF8::is_utf32()"</p>
3101
     */
3102 2
    public static function isUtf32($str)
3103
    {
3104 2
        return self::is_utf32($str);
3105
    }
3106
3107
    /**
3108
     * alias for "UTF8::is_utf8()"
3109
     *
3110
     * @param string $str
3111
     * @param bool   $strict
3112
     *
3113
     * @return bool
3114
     *
3115
     * @see UTF8::is_utf8()
3116
     * @deprecated <p>use "UTF8::is_utf8()"</p>
3117
     */
3118 17
    public static function isUtf8($str, $strict = false): bool
3119
    {
3120 17
        return self::is_utf8($str, $strict);
3121
    }
3122
3123
    /**
3124
     * Returns true if the string contains only alphabetic chars, false otherwise.
3125
     *
3126
     * @param string $str
3127
     *
3128
     * @return bool
3129
     *              Whether or not $str contains only alphabetic chars
3130
     */
3131 10
    public static function is_alpha(string $str): bool
3132
    {
3133 10
        if (self::$SUPPORT['mbstring'] === true) {
3134
            /** @noinspection PhpComposerExtensionStubsInspection */
3135 10
            return \mb_ereg_match('^[[:alpha:]]*$', $str);
3136
        }
3137
3138
        return self::str_matches_pattern($str, '^[[:alpha:]]*$');
3139
    }
3140
3141
    /**
3142
     * Returns true if the string contains only alphabetic and numeric chars, false otherwise.
3143
     *
3144
     * @param string $str
3145
     *
3146
     * @return bool
3147
     *              Whether or not $str contains only alphanumeric chars
3148
     */
3149 13
    public static function is_alphanumeric(string $str): bool
3150
    {
3151 13
        if (self::$SUPPORT['mbstring'] === true) {
3152
            /** @noinspection PhpComposerExtensionStubsInspection */
3153 13
            return \mb_ereg_match('^[[:alnum:]]*$', $str);
3154
        }
3155
3156
        return self::str_matches_pattern($str, '^[[:alnum:]]*$');
3157
    }
3158
3159
    /**
3160
     * Checks if a string is 7 bit ASCII.
3161
     *
3162
     * @param string $str <p>The string to check.</p>
3163
     *
3164
     * @return bool
3165
     *              <strong>true</strong> if it is ASCII<br>
3166
     *              <strong>false</strong> otherwise
3167
     */
3168 137
    public static function is_ascii(string $str): bool
3169
    {
3170 137
        if ($str === '') {
3171 10
            return true;
3172
        }
3173
3174 136
        return !\preg_match('/[^\x09\x10\x13\x0A\x0D\x20-\x7E]/', $str);
3175
    }
3176
3177
    /**
3178
     * Returns true if the string is base64 encoded, false otherwise.
3179
     *
3180
     * @param mixed|string $str                <p>The input string.</p>
3181
     * @param bool         $emptyStringIsValid [optional] <p>Is an empty string valid base64 or not?</p>
3182
     *
3183
     * @return bool whether or not $str is base64 encoded
3184
     */
3185 16
    public static function is_base64($str, $emptyStringIsValid = false): bool
3186
    {
3187 16
        if ($emptyStringIsValid === false && $str === '') {
3188 3
            return false;
3189
        }
3190
3191
        /**
3192
         * @psalm-suppress RedundantConditionGivenDocblockType
3193
         */
3194 15
        if (\is_string($str) === false) {
3195 2
            return false;
3196
        }
3197
3198 15
        $base64String = \base64_decode($str, true);
3199
3200 15
        return $base64String !== false && \base64_encode($base64String) === $str;
3201
    }
3202
3203
    /**
3204
     * Check if the input is binary... (is look like a hack).
3205
     *
3206
     * @param mixed $input
3207
     * @param bool  $strict
3208
     *
3209
     * @return bool
3210
     */
3211 39
    public static function is_binary($input, bool $strict = false): bool
3212
    {
3213 39
        $input = (string) $input;
3214 39
        if ($input === '') {
3215 10
            return false;
3216
        }
3217
3218 39
        if (\preg_match('~^[01]+$~', $input)) {
3219 13
            return true;
3220
        }
3221
3222 39
        $ext = self::get_file_type($input);
3223 39
        if ($ext['type'] === 'binary') {
3224 7
            return true;
3225
        }
3226
3227 36
        $testLength = \strlen($input);
3228 36
        $testNull = \substr_count($input, "\x0", 0, $testLength);
3229 36
        if (($testNull / $testLength) > 0.25) {
3230 13
            return true;
3231
        }
3232
3233 34
        if ($strict === true) {
3234 34
            if (self::$SUPPORT['finfo'] === false) {
3235
                throw new \RuntimeException('ext-fileinfo: is not installed');
3236
            }
3237
3238
            /** @noinspection PhpComposerExtensionStubsInspection */
3239 34
            $finfo_encoding = (new \finfo(\FILEINFO_MIME_ENCODING))->buffer($input);
3240 34
            if ($finfo_encoding && $finfo_encoding === 'binary') {
3241 15
                return true;
3242
            }
3243
        }
3244
3245 30
        return false;
3246
    }
3247
3248
    /**
3249
     * Check if the file is binary.
3250
     *
3251
     * @param string $file
3252
     *
3253
     * @return bool
3254
     */
3255 6
    public static function is_binary_file($file): bool
3256
    {
3257
        // init
3258 6
        $block = '';
3259
3260 6
        $fp = \fopen($file, 'rb');
3261 6
        if (\is_resource($fp)) {
3262 6
            $block = \fread($fp, 512);
3263 6
            \fclose($fp);
3264
        }
3265
3266 6
        if ($block === '') {
3267 2
            return false;
3268
        }
3269
3270 6
        return self::is_binary($block, true);
3271
    }
3272
3273
    /**
3274
     * Returns true if the string contains only whitespace chars, false otherwise.
3275
     *
3276
     * @param string $str
3277
     *
3278
     * @return bool
3279
     *              Whether or not $str contains only whitespace characters
3280
     */
3281 15
    public static function is_blank(string $str): bool
3282
    {
3283 15
        if (self::$SUPPORT['mbstring'] === true) {
3284
            /** @noinspection PhpComposerExtensionStubsInspection */
3285 15
            return \mb_ereg_match('^[[:space:]]*$', $str);
3286
        }
3287
3288
        return self::str_matches_pattern($str, '^[[:space:]]*$');
3289
    }
3290
3291
    /**
3292
     * Checks if the given string is equal to any "Byte Order Mark".
3293
     *
3294
     * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
3295
     *
3296
     * @param string $str <p>The input string.</p>
3297
     *
3298
     * @return bool
3299
     *              <strong>true</strong> if the $utf8_chr is Byte Order Mark, <strong>false</strong> otherwise
3300
     */
3301 2
    public static function is_bom($str): bool
3302
    {
3303
        /** @noinspection PhpUnusedLocalVariableInspection */
3304 2
        foreach (self::$BOM as $bomString => &$bomByteLength) {
3305 2
            if ($str === $bomString) {
3306 2
                return true;
3307
            }
3308
        }
3309
3310 2
        return false;
3311
    }
3312
3313
    /**
3314
     * Determine whether the string is considered to be empty.
3315
     *
3316
     * A variable is considered empty if it does not exist or if its value equals FALSE.
3317
     * empty() does not generate a warning if the variable does not exist.
3318
     *
3319
     * @param mixed $str
3320
     *
3321
     * @return bool whether or not $str is empty()
3322
     */
3323
    public static function is_empty($str): bool
3324
    {
3325
        return empty($str);
3326
    }
3327
3328
    /**
3329
     * Returns true if the string contains only hexadecimal chars, false otherwise.
3330
     *
3331
     * @param string $str
3332
     *
3333
     * @return bool
3334
     *              Whether or not $str contains only hexadecimal chars
3335
     */
3336 13
    public static function is_hexadecimal(string $str): bool
3337
    {
3338 13
        if (self::$SUPPORT['mbstring'] === true) {
3339
            /** @noinspection PhpComposerExtensionStubsInspection */
3340 13
            return \mb_ereg_match('^[[:xdigit:]]*$', $str);
3341
        }
3342
3343
        return self::str_matches_pattern($str, '^[[:xdigit:]]*$');
3344
    }
3345
3346
    /**
3347
     * Check if the string contains any html-tags <lall>.
3348
     *
3349
     * @param string $str <p>The input string.</p>
3350
     *
3351
     * @return bool
3352
     */
3353 3
    public static function is_html(string $str): bool
3354
    {
3355 3
        if ($str === '') {
3356 3
            return false;
3357
        }
3358
3359
        // init
3360 3
        $matches = [];
3361
3362 3
        $str = self::emoji_encode($str); // hack for emoji support :/
3363
3364 3
        \preg_match("/<\\/?\\w+(?:(?:\\s+\\w+(?:\\s*=\\s*(?:\".*?\"|'.*?'|[^'\">\\s]+))?)*\\s*|\\s*)\\/?>/u", $str, $matches);
3365
3366 3
        return \count($matches) !== 0;
3367
    }
3368
3369
    /**
3370
     * Try to check if "$str" is an json-string.
3371
     *
3372
     * @param string $str                              <p>The input string.</p>
3373
     * @param bool   $onlyArrayOrObjectResultsAreValid [optional] <p>Only array and objects are valid json results.</p>
3374
     *
3375
     * @return bool
3376
     */
3377 42
    public static function is_json(string $str, $onlyArrayOrObjectResultsAreValid = true): bool
3378
    {
3379 42
        if ($str === '') {
3380 4
            return false;
3381
        }
3382
3383 40
        if (self::$SUPPORT['json'] === false) {
3384
            throw new \RuntimeException('ext-json: is not installed');
3385
        }
3386
3387 40
        $json = self::json_decode($str);
3388 40
        if ($json === null && \strtoupper($str) !== 'NULL') {
3389 18
            return false;
3390
        }
3391
3392
        if (
3393 24
            $onlyArrayOrObjectResultsAreValid === true
3394
            &&
3395 24
            \is_object($json) === false
3396
            &&
3397 24
            \is_array($json) === false
3398
        ) {
3399 5
            return false;
3400
        }
3401
3402
        /** @noinspection PhpComposerExtensionStubsInspection */
3403 19
        return \json_last_error() === \JSON_ERROR_NONE;
3404
    }
3405
3406
    /**
3407
     * @param string $str
3408
     *
3409
     * @return bool
3410
     */
3411 8
    public static function is_lowercase(string $str): bool
3412
    {
3413 8
        if (self::$SUPPORT['mbstring'] === true) {
3414
            /** @noinspection PhpComposerExtensionStubsInspection */
3415 8
            return \mb_ereg_match('^[[:lower:]]*$', $str);
3416
        }
3417
3418
        return self::str_matches_pattern($str, '^[[:lower:]]*$');
3419
    }
3420
3421
    /**
3422
     * Returns true if the string is serialized, false otherwise.
3423
     *
3424
     * @param string $str
3425
     *
3426
     * @return bool whether or not $str is serialized
3427
     */
3428 7
    public static function is_serialized(string $str): bool
3429
    {
3430 7
        if ($str === '') {
3431 1
            return false;
3432
        }
3433
3434
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
3435
        /** @noinspection UnserializeExploitsInspection */
3436 6
        return $str === 'b:0;'
3437
               ||
3438 6
               @\unserialize($str) !== false;
3439
    }
3440
3441
    /**
3442
     * Returns true if the string contains only lower case chars, false
3443
     * otherwise.
3444
     *
3445
     * @param string $str <p>The input string.</p>
3446
     *
3447
     * @return bool
3448
     *              Whether or not $str contains only lower case characters
3449
     */
3450 8
    public static function is_uppercase(string $str): bool
3451
    {
3452 8
        if (self::$SUPPORT['mbstring'] === true) {
3453
            /** @noinspection PhpComposerExtensionStubsInspection */
3454 8
            return \mb_ereg_match('^[[:upper:]]*$', $str);
3455
        }
3456
3457
        return self::str_matches_pattern($str, '^[[:upper:]]*$');
3458
    }
3459
3460
    /**
3461
     * Check if the string is UTF-16.
3462
     *
3463
     * @param mixed $str                   <p>The input string.</p>
3464
     * @param bool  $checkIfStringIsBinary
3465
     *
3466
     * @return false|int
3467
     *                   <strong>false</strong> if is't not UTF-16,<br>
3468
     *                   <strong>1</strong> for UTF-16LE,<br>
3469
     *                   <strong>2</strong> for UTF-16BE
3470
     */
3471 22
    public static function is_utf16($str, $checkIfStringIsBinary = true)
3472
    {
3473
        // init
3474 22
        $str = (string) $str;
3475 22
        $strChars = [];
3476
3477
        if (
3478 22
            $checkIfStringIsBinary === true
3479
            &&
3480 22
            self::is_binary($str, true) === false
3481
        ) {
3482 2
            return false;
3483
        }
3484
3485 22
        if (self::$SUPPORT['mbstring'] === false) {
3486 3
            \trigger_error('UTF8::is_utf16() without mbstring may did not work correctly', \E_USER_WARNING);
3487
        }
3488
3489 22
        $str = self::remove_bom($str);
3490
3491 22
        $maybeUTF16LE = 0;
3492 22
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
3493 22
        if ($test) {
3494 15
            $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
3495 15
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
3496 15
            if ($test3 === $test) {
3497 15
                if (\count($strChars) === 0) {
3498 15
                    $strChars = self::count_chars($str, true, false);
3499
                }
3500 15
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3501 15
                    if (\in_array($test3char, $strChars, true) === true) {
3502 15
                        ++$maybeUTF16LE;
3503
                    }
3504
                }
3505 15
                unset($test3charEmpty);
3506
            }
3507
        }
3508
3509 22
        $maybeUTF16BE = 0;
3510 22
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
3511 22
        if ($test) {
3512 15
            $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
3513 15
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
3514 15
            if ($test3 === $test) {
3515 15
                if (\count($strChars) === 0) {
3516 7
                    $strChars = self::count_chars($str, true, false);
3517
                }
3518 15
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3519 15
                    if (\in_array($test3char, $strChars, true) === true) {
3520 15
                        ++$maybeUTF16BE;
3521
                    }
3522
                }
3523 15
                unset($test3charEmpty);
3524
            }
3525
        }
3526
3527 22
        if ($maybeUTF16BE !== $maybeUTF16LE) {
3528 7
            if ($maybeUTF16LE > $maybeUTF16BE) {
3529 5
                return 1;
3530
            }
3531
3532 6
            return 2;
3533
        }
3534
3535 18
        return false;
3536
    }
3537
3538
    /**
3539
     * Check if the string is UTF-32.
3540
     *
3541
     * @param mixed $str                   <p>The input string.</p>
3542
     * @param bool  $checkIfStringIsBinary
3543
     *
3544
     * @return false|int
3545
     *                   <strong>false</strong> if is't not UTF-32,<br>
3546
     *                   <strong>1</strong> for UTF-32LE,<br>
3547
     *                   <strong>2</strong> for UTF-32BE
3548
     */
3549 20
    public static function is_utf32($str, $checkIfStringIsBinary = true)
3550
    {
3551
        // init
3552 20
        $str = (string) $str;
3553 20
        $strChars = [];
3554
3555
        if (
3556 20
            $checkIfStringIsBinary === true
3557
            &&
3558 20
            self::is_binary($str, true) === false
3559
        ) {
3560 2
            return false;
3561
        }
3562
3563 20
        if (self::$SUPPORT['mbstring'] === false) {
3564 3
            \trigger_error('UTF8::is_utf32() without mbstring may did not work correctly', \E_USER_WARNING);
3565
        }
3566
3567 20
        $str = self::remove_bom($str);
3568
3569 20
        $maybeUTF32LE = 0;
3570 20
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
3571 20
        if ($test) {
3572 13
            $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
3573 13
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
3574 13
            if ($test3 === $test) {
3575 13
                if (\count($strChars) === 0) {
3576 13
                    $strChars = self::count_chars($str, true, false);
3577
                }
3578 13
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3579 13
                    if (\in_array($test3char, $strChars, true) === true) {
3580 13
                        ++$maybeUTF32LE;
3581
                    }
3582
                }
3583 13
                unset($test3charEmpty);
3584
            }
3585
        }
3586
3587 20
        $maybeUTF32BE = 0;
3588 20
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
3589 20
        if ($test) {
3590 13
            $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
3591 13
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
3592 13
            if ($test3 === $test) {
3593 13
                if (\count($strChars) === 0) {
3594 7
                    $strChars = self::count_chars($str, true, false);
3595
                }
3596 13
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3597 13
                    if (\in_array($test3char, $strChars, true) === true) {
3598 13
                        ++$maybeUTF32BE;
3599
                    }
3600
                }
3601 13
                unset($test3charEmpty);
3602
            }
3603
        }
3604
3605 20
        if ($maybeUTF32BE !== $maybeUTF32LE) {
3606 3
            if ($maybeUTF32LE > $maybeUTF32BE) {
3607 2
                return 1;
3608
            }
3609
3610 3
            return 2;
3611
        }
3612
3613 20
        return false;
3614
    }
3615
3616
    /**
3617
     * Checks whether the passed string contains only byte sequences that appear valid UTF-8 characters.
3618
     *
3619
     * @see http://hsivonen.iki.fi/php-utf8/
3620
     *
3621
     * @param string|string[] $str    <p>The string to be checked.</p>
3622
     * @param bool            $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
3623
     *
3624
     * @return bool
3625
     */
3626 106
    public static function is_utf8($str, bool $strict = false): bool
3627
    {
3628 106
        if (\is_array($str) === true) {
3629 2
            foreach ($str as &$v) {
3630 2
                if (self::is_utf8($v, $strict) === false) {
3631 2
                    return false;
3632
                }
3633
            }
3634
3635
            return true;
3636
        }
3637
3638 106
        if ($str === '') {
3639 12
            return true;
3640
        }
3641
3642 102
        if ($strict === true) {
3643 2
            $isBinary = self::is_binary($str, true);
3644
3645 2
            if ($isBinary && self::is_utf16($str, false) !== false) {
3646 2
                return false;
3647
            }
3648
3649
            if ($isBinary && self::is_utf32($str, false) !== false) {
3650
                return false;
3651
            }
3652
        }
3653
3654 102
        if (self::pcre_utf8_support() !== true) {
3655
3656
            // If even just the first character can be matched, when the /u
3657
            // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
3658
            // invalid, nothing at all will match, even if the string contains
3659
            // some valid sequences
3660
            return \preg_match('/^.{1}/us', $str, $ar) === 1;
3661
        }
3662
3663 102
        $mState = 0; // cached expected number of octets after the current octet
3664
        // until the beginning of the next UTF8 character sequence
3665 102
        $mUcs4 = 0; // cached Unicode character
3666 102
        $mBytes = 1; // cached expected number of octets in the current sequence
3667
3668 102
        if (self::$ORD === null) {
3669
            self::$ORD = self::getData('ord');
3670
        }
3671
3672 102
        $len = \strlen((string) $str);
3673
        /** @noinspection ForeachInvariantsInspection */
3674 102
        for ($i = 0; $i < $len; ++$i) {
3675 102
            $in = self::$ORD[$str[$i]];
3676 102
            if ($mState === 0) {
3677
                // When mState is zero we expect either a US-ASCII character or a
3678
                // multi-octet sequence.
3679 102
                if ((0x80 & $in) === 0) {
3680
                    // US-ASCII, pass straight through.
3681 97
                    $mBytes = 1;
3682 83
                } elseif ((0xE0 & $in) === 0xC0) {
3683
                    // First octet of 2 octet sequence.
3684 73
                    $mUcs4 = $in;
3685 73
                    $mUcs4 = ($mUcs4 & 0x1F) << 6;
3686 73
                    $mState = 1;
3687 73
                    $mBytes = 2;
3688 58
                } elseif ((0xF0 & $in) === 0xE0) {
3689
                    // First octet of 3 octet sequence.
3690 42
                    $mUcs4 = $in;
3691 42
                    $mUcs4 = ($mUcs4 & 0x0F) << 12;
3692 42
                    $mState = 2;
3693 42
                    $mBytes = 3;
3694 29
                } elseif ((0xF8 & $in) === 0xF0) {
3695
                    // First octet of 4 octet sequence.
3696 18
                    $mUcs4 = $in;
3697 18
                    $mUcs4 = ($mUcs4 & 0x07) << 18;
3698 18
                    $mState = 3;
3699 18
                    $mBytes = 4;
3700 13
                } elseif ((0xFC & $in) === 0xF8) {
3701
                    /* First octet of 5 octet sequence.
3702
                     *
3703
                     * This is illegal because the encoded codepoint must be either
3704
                     * (a) not the shortest form or
3705
                     * (b) outside the Unicode range of 0-0x10FFFF.
3706
                     * Rather than trying to resynchronize, we will carry on until the end
3707
                     * of the sequence and let the later error handling code catch it.
3708
                     */
3709 5
                    $mUcs4 = $in;
3710 5
                    $mUcs4 = ($mUcs4 & 0x03) << 24;
3711 5
                    $mState = 4;
3712 5
                    $mBytes = 5;
3713 10
                } elseif ((0xFE & $in) === 0xFC) {
3714
                    // First octet of 6 octet sequence, see comments for 5 octet sequence.
3715 5
                    $mUcs4 = $in;
3716 5
                    $mUcs4 = ($mUcs4 & 1) << 30;
3717 5
                    $mState = 5;
3718 5
                    $mBytes = 6;
3719
                } else {
3720
                    // Current octet is neither in the US-ASCII range nor a legal first
3721
                    // octet of a multi-octet sequence.
3722 102
                    return false;
3723
                }
3724 83
            } elseif ((0xC0 & $in) === 0x80) {
3725
3726
                // When mState is non-zero, we expect a continuation of the multi-octet
3727
                // sequence
3728
3729
                // Legal continuation.
3730 75
                $shift = ($mState - 1) * 6;
3731 75
                $tmp = $in;
3732 75
                $tmp = ($tmp & 0x0000003F) << $shift;
3733 75
                $mUcs4 |= $tmp;
3734
                // Prefix: End of the multi-octet sequence. mUcs4 now contains the final
3735
                // Unicode code point to be output.
3736 75
                if (--$mState === 0) {
3737
                    // Check for illegal sequences and code points.
3738
                    //
3739
                    // From Unicode 3.1, non-shortest form is illegal
3740
                    if (
3741 75
                        ($mBytes === 2 && $mUcs4 < 0x0080)
3742
                        ||
3743 75
                        ($mBytes === 3 && $mUcs4 < 0x0800)
3744
                        ||
3745 75
                        ($mBytes === 4 && $mUcs4 < 0x10000)
3746
                        ||
3747 75
                        ($mBytes > 4)
3748
                        ||
3749
                        // From Unicode 3.2, surrogate characters are illegal.
3750 75
                        (($mUcs4 & 0xFFFFF800) === 0xD800)
3751
                        ||
3752
                        // Code points outside the Unicode range are illegal.
3753 75
                        ($mUcs4 > 0x10FFFF)
3754
                    ) {
3755 8
                        return false;
3756
                    }
3757
                    // initialize UTF8 cache
3758 75
                    $mState = 0;
3759 75
                    $mUcs4 = 0;
3760 75
                    $mBytes = 1;
3761
                }
3762
            } else {
3763
                // ((0xC0 & (*in) != 0x80) && (mState != 0))
3764
                // Incomplete multi-octet sequence.
3765 35
                return false;
3766
            }
3767
        }
3768
3769 67
        return true;
3770
    }
3771
3772
    /**
3773
     * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3774
     * Decodes a JSON string
3775
     *
3776
     * @see http://php.net/manual/en/function.json-decode.php
3777
     *
3778
     * @param string $json    <p>
3779
     *                        The <i>json</i> string being decoded.
3780
     *                        </p>
3781
     *                        <p>
3782
     *                        This function only works with UTF-8 encoded strings.
3783
     *                        </p>
3784
     *                        <p>PHP implements a superset of
3785
     *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3786
     *                        only supports these values when they are nested inside an array or an object.
3787
     *                        </p>
3788
     * @param bool   $assoc   [optional] <p>
3789
     *                        When <b>TRUE</b>, returned objects will be converted into
3790
     *                        associative arrays.
3791
     *                        </p>
3792
     * @param int    $depth   [optional] <p>
3793
     *                        User specified recursion depth.
3794
     *                        </p>
3795
     * @param int    $options [optional] <p>
3796
     *                        Bitmask of JSON decode options. Currently only
3797
     *                        <b>JSON_BIGINT_AS_STRING</b>
3798
     *                        is supported (default is to cast large integers as floats)
3799
     *                        </p>
3800
     *
3801
     * @return mixed
3802
     *               The value encoded in <i>json</i> in appropriate PHP type. Values true, false and
3803
     *               null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b> and <b>NULL</b> respectively.
3804
     *               <b>NULL</b> is returned if the <i>json</i> cannot be decoded or if the encoded data
3805
     *               is deeper than the recursion limit.
3806
     */
3807 43
    public static function json_decode(
3808
        string $json,
3809
        bool $assoc = false,
3810
        int $depth = 512,
3811
        int $options = 0
3812
    ) {
3813 43
        $json = self::filter($json);
3814
3815 43
        if (self::$SUPPORT['json'] === false) {
3816
            throw new \RuntimeException('ext-json: is not installed');
3817
        }
3818
3819
        /** @noinspection PhpComposerExtensionStubsInspection */
3820 43
        return \json_decode($json, $assoc, $depth, $options);
3821
    }
3822
3823
    /**
3824
     * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3825
     * Returns the JSON representation of a value.
3826
     *
3827
     * @see http://php.net/manual/en/function.json-encode.php
3828
     *
3829
     * @param mixed $value   <p>
3830
     *                       The <i>value</i> being encoded. Can be any type except
3831
     *                       a resource.
3832
     *                       </p>
3833
     *                       <p>
3834
     *                       All string data must be UTF-8 encoded.
3835
     *                       </p>
3836
     *                       <p>PHP implements a superset of
3837
     *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3838
     *                       only supports these values when they are nested inside an array or an object.
3839
     *                       </p>
3840
     * @param int   $options [optional] <p>
3841
     *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
3842
     *                       <b>JSON_HEX_TAG</b>,
3843
     *                       <b>JSON_HEX_AMP</b>,
3844
     *                       <b>JSON_HEX_APOS</b>,
3845
     *                       <b>JSON_NUMERIC_CHECK</b>,
3846
     *                       <b>JSON_PRETTY_PRINT</b>,
3847
     *                       <b>JSON_UNESCAPED_SLASHES</b>,
3848
     *                       <b>JSON_FORCE_OBJECT</b>,
3849
     *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
3850
     *                       constants is described on
3851
     *                       the JSON constants page.
3852
     *                       </p>
3853
     * @param int   $depth   [optional] <p>
3854
     *                       Set the maximum depth. Must be greater than zero.
3855
     *                       </p>
3856
     *
3857
     * @return false|string
3858
     *                      A JSON encoded <strong>string</strong> on success or<br>
3859
     *                      <strong>FALSE</strong> on failure
3860
     */
3861 5
    public static function json_encode($value, int $options = 0, int $depth = 512)
3862
    {
3863 5
        $value = self::filter($value);
3864
3865 5
        if (self::$SUPPORT['json'] === false) {
3866
            throw new \RuntimeException('ext-json: is not installed');
3867
        }
3868
3869
        /** @noinspection PhpComposerExtensionStubsInspection */
3870 5
        return \json_encode($value, $options, $depth);
3871
    }
3872
3873
    /**
3874
     * Checks whether JSON is available on the server.
3875
     *
3876
     * @return bool
3877
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
3878
     */
3879
    public static function json_loaded(): bool
3880
    {
3881
        return \function_exists('json_decode');
3882
    }
3883
3884
    /**
3885
     * Makes string's first char lowercase.
3886
     *
3887
     * @param string      $str                   <p>The input string</p>
3888
     * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
3889
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
3890
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
3891
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
3892
     *
3893
     * @return string the resulting string
3894
     */
3895 46
    public static function lcfirst(
3896
        string $str,
3897
        string $encoding = 'UTF-8',
3898
        bool $cleanUtf8 = false,
3899
        string $lang = null,
3900
        bool $tryToKeepStringLength = false
3901
    ): string {
3902 46
        if ($cleanUtf8 === true) {
3903
            $str = self::clean($str);
3904
        }
3905
3906 46
        $useMbFunction = $lang === null && $tryToKeepStringLength === false;
3907
3908 46
        if ($encoding === 'UTF-8') {
3909 43
            $strPartTwo = (string) \mb_substr($str, 1);
3910
3911 43
            if ($useMbFunction === true) {
3912 43
                $strPartOne = \mb_strtolower(
3913 43
                    (string) \mb_substr($str, 0, 1)
3914
                );
3915
            } else {
3916
                $strPartOne = self::strtolower(
3917
                    (string) \mb_substr($str, 0, 1),
3918
                    $encoding,
3919
                    false,
3920
                    $lang,
3921 43
                    $tryToKeepStringLength
3922
                );
3923
            }
3924
        } else {
3925 3
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
3926
3927 3
            $strPartTwo = (string) self::substr($str, 1, null, $encoding);
3928
3929 3
            $strPartOne = self::strtolower(
3930 3
                (string) self::substr($str, 0, 1, $encoding),
3931 3
                $encoding,
3932 3
                false,
3933 3
                $lang,
3934 3
                $tryToKeepStringLength
3935
            );
3936
        }
3937
3938 46
        return $strPartOne . $strPartTwo;
3939
    }
3940
3941
    /**
3942
     * alias for "UTF8::lcfirst()"
3943
     *
3944
     * @param string      $str
3945
     * @param string      $encoding
3946
     * @param bool        $cleanUtf8
3947
     * @param string|null $lang
3948
     * @param bool        $tryToKeepStringLength
3949
     *
3950
     * @return string
3951
     *
3952
     * @see UTF8::lcfirst()
3953
     */
3954 2
    public static function lcword(
3955
        string $str,
3956
        string $encoding = 'UTF-8',
3957
        bool $cleanUtf8 = false,
3958
        string $lang = null,
3959
        bool $tryToKeepStringLength = false
3960
    ): string {
3961 2
        return self::lcfirst($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
3962
    }
3963
3964
    /**
3965
     * Lowercase for all words in the string.
3966
     *
3967
     * @param string      $str                   <p>The input string.</p>
3968
     * @param string[]    $exceptions            [optional] <p>Exclusion for some words.</p>
3969
     * @param string      $charlist              [optional] <p>Additional chars that contains to words and do not start
3970
     *                                           a new word.</p>
3971
     * @param string      $encoding              [optional] <p>Set the charset.</p>
3972
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
3973
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
3974
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
3975
     *
3976
     * @return string
3977
     */
3978 2
    public static function lcwords(
3979
        string $str,
3980
        array $exceptions = [],
3981
        string $charlist = '',
3982
        string $encoding = 'UTF-8',
3983
        bool $cleanUtf8 = false,
3984
        string $lang = null,
3985
        bool $tryToKeepStringLength = false
3986
    ): string {
3987 2
        if (!$str) {
3988 2
            return '';
3989
        }
3990
3991 2
        $words = self::str_to_words($str, $charlist);
3992 2
        $useExceptions = \count($exceptions) > 0;
3993
3994 2
        foreach ($words as &$word) {
3995 2
            if (!$word) {
3996 2
                continue;
3997
            }
3998
3999
            if (
4000 2
                $useExceptions === false
4001
                ||
4002 2
                !\in_array($word, $exceptions, true)
4003
            ) {
4004 2
                $word = self::lcfirst($word, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
4005
            }
4006
        }
4007
4008 2
        return \implode('', $words);
4009
    }
4010
4011
    /**
4012
     * alias for "UTF8::lcfirst()"
4013
     *
4014
     * @param string      $str
4015
     * @param string      $encoding
4016
     * @param bool        $cleanUtf8
4017
     * @param string|null $lang
4018
     * @param bool        $tryToKeepStringLength
4019
     *
4020
     * @return string
4021
     *
4022
     * @see UTF8::lcfirst()
4023
     */
4024 5
    public static function lowerCaseFirst(
4025
        string $str,
4026
        string $encoding = 'UTF-8',
4027
        bool $cleanUtf8 = false,
4028
        string $lang = null,
4029
        bool $tryToKeepStringLength = false
4030
    ): string {
4031 5
        return self::lcfirst($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
4032
    }
4033
4034
    /**
4035
     * Strip whitespace or other characters from beginning of a UTF-8 string.
4036
     *
4037
     * @param string      $str   <p>The string to be trimmed</p>
4038
     * @param string|null $chars <p>Optional characters to be stripped</p>
4039
     *
4040
     * @return string the string with unwanted characters stripped from the left
4041
     */
4042 22
    public static function ltrim(string $str = '', string $chars = null): string
4043
    {
4044 22
        if ($str === '') {
4045 3
            return '';
4046
        }
4047
4048 21
        if ($chars) {
4049 10
            $chars = \preg_quote($chars, '/');
4050 10
            $pattern = "^[${chars}]+";
4051
        } else {
4052
            $pattern = '^[\\s]+';
4053
        }
4054
4055
        if (self::$SUPPORT['mbstring'] === true) {
4056
            /** @noinspection PhpComposerExtensionStubsInspection */
4057
            return (string) \mb_ereg_replace($pattern, '', $str);
4058
        }
4059
4060
        return self::regex_replace($str, $pattern, '', '', '/');
4061
    }
4062
4063
    /**
4064
     * Returns the UTF-8 character with the maximum code point in the given data.
4065
     *
4066
     * @param array<string>|string $arg <p>A UTF-8 encoded string or an array of such strings.</p>
4067
     *
4068
     * @return string|null the character with the highest code point than others, returns null on failure or empty input
4069
     */
4070
    public static function max($arg)
4071
    {
4072 2
        if (\is_array($arg) === true) {
4073 2
            $arg = \implode('', $arg);
4074
        }
4075
4076 2
        $codepoints = self::codepoints($arg, false);
4077 2
        if (\count($codepoints) === 0) {
4078 2
            return null;
4079
        }
4080
4081 2
        $codepoint_max = \max($codepoints);
4082
4083 2
        return self::chr($codepoint_max);
4084
    }
4085
4086
    /**
4087
     * Calculates and returns the maximum number of bytes taken by any
4088
     * UTF-8 encoded character in the given string.
4089
     *
4090
     * @param string $str <p>The original Unicode string.</p>
4091
     *
4092
     * @return int max byte lengths of the given chars
4093
     */
4094
    public static function max_chr_width(string $str): int
4095
    {
4096 2
        $bytes = self::chr_size_list($str);
4097 2
        if (\count($bytes) > 0) {
4098 2
            return (int) \max($bytes);
4099
        }
4100
4101 2
        return 0;
4102
    }
4103
4104
    /**
4105
     * Checks whether mbstring is available on the server.
4106
     *
4107
     * @return bool
4108
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
4109
     */
4110
    public static function mbstring_loaded(): bool
4111
    {
4112 27
        return \extension_loaded('mbstring');
4113
    }
4114
4115
    /**
4116
     * Returns the UTF-8 character with the minimum code point in the given data.
4117
     *
4118
     * @param mixed $arg <strong>A UTF-8 encoded string or an array of such strings.</strong>
4119
     *
4120
     * @return string|null the character with the lowest code point than others, returns null on failure or empty input
4121
     */
4122
    public static function min($arg)
4123
    {
4124 2
        if (\is_array($arg) === true) {
4125 2
            $arg = \implode('', $arg);
4126
        }
4127
4128 2
        $codepoints = self::codepoints($arg, false);
4129 2
        if (\count($codepoints) === 0) {
4130 2
            return null;
4131
        }
4132
4133 2
        $codepoint_min = \min($codepoints);
4134
4135 2
        return self::chr($codepoint_min);
4136
    }
4137
4138
    /**
4139
     * alias for "UTF8::normalize_encoding()"
4140
     *
4141
     * @param mixed $encoding
4142
     * @param mixed $fallback
4143
     *
4144
     * @return mixed
4145
     *
4146
     * @see UTF8::normalize_encoding()
4147
     * @deprecated <p>use "UTF8::normalize_encoding()"</p>
4148
     */
4149
    public static function normalizeEncoding($encoding, $fallback = '')
4150
    {
4151 2
        return self::normalize_encoding($encoding, $fallback);
4152
    }
4153
4154
    /**
4155
     * Normalize the encoding-"name" input.
4156
     *
4157
     * @param mixed $encoding <p>e.g.: ISO, UTF8, WINDOWS-1251 etc.</p>
4158
     * @param mixed $fallback <p>e.g.: UTF-8</p>
4159
     *
4160
     * @return mixed e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.<br>Will return a empty string as fallback (by default)
4161
     */
4162
    public static function normalize_encoding($encoding, $fallback = '')
4163
    {
4164 327
        static $STATIC_NORMALIZE_ENCODING_CACHE = [];
4165
4166
        // init
4167 327
        $encoding = (string) $encoding;
4168
4169 327
        if (!$encoding) {
4170 281
            return $fallback;
4171
        }
4172
4173
        if (
4174 51
            $encoding === 'UTF-8'
4175
            ||
4176 51
            $encoding === 'UTF8'
4177
        ) {
4178 26
            return 'UTF-8';
4179
        }
4180
4181
        if (
4182 43
            $encoding === '8BIT'
4183
            ||
4184 43
            $encoding === 'BINARY'
4185
        ) {
4186
            return 'CP850';
4187
        }
4188
4189
        if (
4190 43
            $encoding === 'HTML'
4191
            ||
4192 43
            $encoding === 'HTML-ENTITIES'
4193
        ) {
4194 2
            return 'HTML-ENTITIES';
4195
        }
4196
4197
        if (
4198 43
            $encoding === '1' // only a fallback, for non "strict_types" usage ...
4199
            ||
4200 43
            $encoding === '0' // only a fallback, for non "strict_types" usage ...
4201
        ) {
4202 1
            return $fallback;
4203
        }
4204
4205 42
        if (isset($STATIC_NORMALIZE_ENCODING_CACHE[$encoding])) {
4206 40
            return $STATIC_NORMALIZE_ENCODING_CACHE[$encoding];
4207
        }
4208
4209 6
        if (self::$ENCODINGS === null) {
4210 1
            self::$ENCODINGS = self::getData('encodings');
4211
        }
4212
4213 6
        if (\in_array($encoding, self::$ENCODINGS, true)) {
4214 4
            $STATIC_NORMALIZE_ENCODING_CACHE[$encoding] = $encoding;
4215
4216 4
            return $encoding;
4217
        }
4218
4219 5
        $encodingOrig = $encoding;
4220 5
        $encoding = \strtoupper($encoding);
4221 5
        $encodingUpperHelper = (string) \preg_replace('/[^a-zA-Z0-9]/u', '', $encoding);
4222
4223
        $equivalences = [
4224 5
            'ISO8859'     => 'ISO-8859-1',
4225
            'ISO88591'    => 'ISO-8859-1',
4226
            'ISO'         => 'ISO-8859-1',
4227
            'LATIN'       => 'ISO-8859-1',
4228
            'LATIN1'      => 'ISO-8859-1', // Western European
4229
            'ISO88592'    => 'ISO-8859-2',
4230
            'LATIN2'      => 'ISO-8859-2', // Central European
4231
            'ISO88593'    => 'ISO-8859-3',
4232
            'LATIN3'      => 'ISO-8859-3', // Southern European
4233
            'ISO88594'    => 'ISO-8859-4',
4234
            'LATIN4'      => 'ISO-8859-4', // Northern European
4235
            'ISO88595'    => 'ISO-8859-5',
4236
            'ISO88596'    => 'ISO-8859-6', // Greek
4237
            'ISO88597'    => 'ISO-8859-7',
4238
            'ISO88598'    => 'ISO-8859-8', // Hebrew
4239
            'ISO88599'    => 'ISO-8859-9',
4240
            'LATIN5'      => 'ISO-8859-9', // Turkish
4241
            'ISO885911'   => 'ISO-8859-11',
4242
            'TIS620'      => 'ISO-8859-11', // Thai
4243
            'ISO885910'   => 'ISO-8859-10',
4244
            'LATIN6'      => 'ISO-8859-10', // Nordic
4245
            'ISO885913'   => 'ISO-8859-13',
4246
            'LATIN7'      => 'ISO-8859-13', // Baltic
4247
            'ISO885914'   => 'ISO-8859-14',
4248
            'LATIN8'      => 'ISO-8859-14', // Celtic
4249
            'ISO885915'   => 'ISO-8859-15',
4250
            'LATIN9'      => 'ISO-8859-15', // Western European (with some extra chars e.g. €)
4251
            'ISO885916'   => 'ISO-8859-16',
4252
            'LATIN10'     => 'ISO-8859-16', // Southeast European
4253
            'CP1250'      => 'WINDOWS-1250',
4254
            'WIN1250'     => 'WINDOWS-1250',
4255
            'WINDOWS1250' => 'WINDOWS-1250',
4256
            'CP1251'      => 'WINDOWS-1251',
4257
            'WIN1251'     => 'WINDOWS-1251',
4258
            'WINDOWS1251' => 'WINDOWS-1251',
4259
            'CP1252'      => 'WINDOWS-1252',
4260
            'WIN1252'     => 'WINDOWS-1252',
4261
            'WINDOWS1252' => 'WINDOWS-1252',
4262
            'CP1253'      => 'WINDOWS-1253',
4263
            'WIN1253'     => 'WINDOWS-1253',
4264
            'WINDOWS1253' => 'WINDOWS-1253',
4265
            'CP1254'      => 'WINDOWS-1254',
4266
            'WIN1254'     => 'WINDOWS-1254',
4267
            'WINDOWS1254' => 'WINDOWS-1254',
4268
            'CP1255'      => 'WINDOWS-1255',
4269
            'WIN1255'     => 'WINDOWS-1255',
4270
            'WINDOWS1255' => 'WINDOWS-1255',
4271
            'CP1256'      => 'WINDOWS-1256',
4272
            'WIN1256'     => 'WINDOWS-1256',
4273
            'WINDOWS1256' => 'WINDOWS-1256',
4274
            'CP1257'      => 'WINDOWS-1257',
4275
            'WIN1257'     => 'WINDOWS-1257',
4276
            'WINDOWS1257' => 'WINDOWS-1257',
4277
            'CP1258'      => 'WINDOWS-1258',
4278
            'WIN1258'     => 'WINDOWS-1258',
4279
            'WINDOWS1258' => 'WINDOWS-1258',
4280
            'UTF16'       => 'UTF-16',
4281
            'UTF32'       => 'UTF-32',
4282
            'UTF8'        => 'UTF-8',
4283
            'UTF'         => 'UTF-8',
4284
            'UTF7'        => 'UTF-7',
4285
            '8BIT'        => 'CP850',
4286
            'BINARY'      => 'CP850',
4287
        ];
4288
4289 5
        if (!empty($equivalences[$encodingUpperHelper])) {
4290 4
            $encoding = $equivalences[$encodingUpperHelper];
4291
        }
4292
4293 5
        $STATIC_NORMALIZE_ENCODING_CACHE[$encodingOrig] = $encoding;
4294
4295 5
        return $encoding;
4296
    }
4297
4298
    /**
4299
     * Standardize line ending to unix-like.
4300
     *
4301
     * @param string $str
4302
     *
4303
     * @return string
4304
     */
4305
    public static function normalize_line_ending(string $str): string
4306
    {
4307 5
        return \str_replace(["\r\n", "\r"], "\n", $str);
4308
    }
4309
4310
    /**
4311
     * Normalize some MS Word special characters.
4312
     *
4313
     * @param string $str <p>The string to be normalized.</p>
4314
     *
4315
     * @return string
4316
     */
4317
    public static function normalize_msword(string $str): string
4318
    {
4319 38
        if ($str === '') {
4320 2
            return '';
4321
        }
4322
4323
        $keys = [
4324 38
            "\xc2\xab", // « (U+00AB) in UTF-8
4325
            "\xc2\xbb", // » (U+00BB) in UTF-8
4326
            "\xe2\x80\x98", // ‘ (U+2018) in UTF-8
4327
            "\xe2\x80\x99", // ’ (U+2019) in UTF-8
4328
            "\xe2\x80\x9a", // ‚ (U+201A) in UTF-8
4329
            "\xe2\x80\x9b", // ‛ (U+201B) in UTF-8
4330
            "\xe2\x80\x9c", // “ (U+201C) in UTF-8
4331
            "\xe2\x80\x9d", // ” (U+201D) in UTF-8
4332
            "\xe2\x80\x9e", // „ (U+201E) in UTF-8
4333
            "\xe2\x80\x9f", // ‟ (U+201F) in UTF-8
4334
            "\xe2\x80\xb9", // ‹ (U+2039) in UTF-8
4335
            "\xe2\x80\xba", // › (U+203A) in UTF-8
4336
            "\xe2\x80\x93", // – (U+2013) in UTF-8
4337
            "\xe2\x80\x94", // — (U+2014) in UTF-8
4338
            "\xe2\x80\xa6", // … (U+2026) in UTF-8
4339
        ];
4340
4341
        $values = [
4342 38
            '"', // « (U+00AB) in UTF-8
4343
            '"', // » (U+00BB) in UTF-8
4344
            "'", // ‘ (U+2018) in UTF-8
4345
            "'", // ’ (U+2019) in UTF-8
4346
            "'", // ‚ (U+201A) in UTF-8
4347
            "'", // ‛ (U+201B) in UTF-8
4348
            '"', // “ (U+201C) in UTF-8
4349
            '"', // ” (U+201D) in UTF-8
4350
            '"', // „ (U+201E) in UTF-8
4351
            '"', // ‟ (U+201F) in UTF-8
4352
            "'", // ‹ (U+2039) in UTF-8
4353
            "'", // › (U+203A) in UTF-8
4354
            '-', // – (U+2013) in UTF-8
4355
            '-', // — (U+2014) in UTF-8
4356
            '...', // … (U+2026) in UTF-8
4357
        ];
4358
4359 38
        return \str_replace($keys, $values, $str);
4360
    }
4361
4362
    /**
4363
     * Normalize the whitespace.
4364
     *
4365
     * @param string $str                     <p>The string to be normalized.</p>
4366
     * @param bool   $keepNonBreakingSpace    [optional] <p>Set to true, to keep non-breaking-spaces.</p>
4367
     * @param bool   $keepBidiUnicodeControls [optional] <p>Set to true, to keep non-printable (for the web)
4368
     *                                        bidirectional text chars.</p>
4369
     *
4370
     * @return string
4371
     */
4372
    public static function normalize_whitespace(string $str, bool $keepNonBreakingSpace = false, bool $keepBidiUnicodeControls = false): string
4373
    {
4374 88
        if ($str === '') {
4375 9
            return '';
4376
        }
4377
4378 88
        static $WHITESPACE_CACHE = [];
4379 88
        $cacheKey = (int) $keepNonBreakingSpace;
4380
4381 88
        if (!isset($WHITESPACE_CACHE[$cacheKey])) {
4382 2
            $WHITESPACE_CACHE[$cacheKey] = self::$WHITESPACE_TABLE;
4383
4384 2
            if ($keepNonBreakingSpace === true) {
4385 1
                unset($WHITESPACE_CACHE[$cacheKey]['NO-BREAK SPACE']);
4386
            }
4387
4388 2
            $WHITESPACE_CACHE[$cacheKey] = \array_values($WHITESPACE_CACHE[$cacheKey]);
4389
        }
4390
4391 88
        if ($keepBidiUnicodeControls === false) {
4392 88
            static $BIDI_UNICODE_CONTROLS_CACHE = null;
4393
4394 88
            if ($BIDI_UNICODE_CONTROLS_CACHE === null) {
4395 1
                $BIDI_UNICODE_CONTROLS_CACHE = \array_values(self::$BIDI_UNI_CODE_CONTROLS_TABLE);
4396
            }
4397
4398 88
            $str = \str_replace($BIDI_UNICODE_CONTROLS_CACHE, '', $str);
4399
        }
4400
4401 88
        return \str_replace($WHITESPACE_CACHE[$cacheKey], ' ', $str);
4402
    }
4403
4404
    /**
4405
     * Calculates Unicode code point of the given UTF-8 encoded character.
4406
     *
4407
     * INFO: opposite to UTF8::chr()
4408
     *
4409
     * @param string $chr      <p>The character of which to calculate code point.<p/>
4410
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
4411
     *
4412
     * @return int
4413
     *             Unicode code point of the given character,<br>
4414
     *             0 on invalid UTF-8 byte sequence
4415
     */
4416
    public static function ord($chr, string $encoding = 'UTF-8'): int
4417
    {
4418 30
        static $CHAR_CACHE = [];
4419
4420
        // init
4421 30
        $chr = (string) $chr;
4422
4423 30
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
4424 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4425
        }
4426
4427 30
        $cacheKey = $chr . $encoding;
4428 30
        if (isset($CHAR_CACHE[$cacheKey]) === true) {
4429 30
            return $CHAR_CACHE[$cacheKey];
4430
        }
4431
4432
        // check again, if it's still not UTF-8
4433 12
        if ($encoding !== 'UTF-8') {
4434 3
            $chr = self::encode($encoding, $chr);
4435
        }
4436
4437 12
        if (self::$ORD === null) {
4438
            self::$ORD = self::getData('ord');
4439
        }
4440
4441 12
        if (isset(self::$ORD[$chr])) {
4442 12
            return $CHAR_CACHE[$cacheKey] = self::$ORD[$chr];
4443
        }
4444
4445
        //
4446
        // fallback via "IntlChar"
4447
        //
4448
4449 6
        if (self::$SUPPORT['intlChar'] === true) {
4450
            /** @noinspection PhpComposerExtensionStubsInspection */
4451 5
            $code = \IntlChar::ord($chr);
4452 5
            if ($code) {
4453 5
                return $CHAR_CACHE[$cacheKey] = $code;
4454
            }
4455
        }
4456
4457
        //
4458
        // fallback via vanilla php
4459
        //
4460
4461
        /** @noinspection CallableParameterUseCaseInTypeContextInspection */
4462 1
        $chr = \unpack('C*', (string) \substr($chr, 0, 4));
4463
        /** @noinspection OffsetOperationsInspection */
4464 1
        $code = $chr ? $chr[1] : 0;
4465
4466
        /** @noinspection OffsetOperationsInspection */
4467 1
        if ($code >= 0xF0 && isset($chr[4])) {
4468
            /** @noinspection UnnecessaryCastingInspection */
4469
            /** @noinspection OffsetOperationsInspection */
4470
            return $CHAR_CACHE[$cacheKey] = (int) ((($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80);
4471
        }
4472
4473
        /** @noinspection OffsetOperationsInspection */
4474 1
        if ($code >= 0xE0 && isset($chr[3])) {
4475
            /** @noinspection UnnecessaryCastingInspection */
4476
            /** @noinspection OffsetOperationsInspection */
4477 1
            return $CHAR_CACHE[$cacheKey] = (int) ((($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80);
4478
        }
4479
4480
        /** @noinspection OffsetOperationsInspection */
4481 1
        if ($code >= 0xC0 && isset($chr[2])) {
4482
            /** @noinspection UnnecessaryCastingInspection */
4483
            /** @noinspection OffsetOperationsInspection */
4484 1
            return $CHAR_CACHE[$cacheKey] = (int) ((($code - 0xC0) << 6) + $chr[2] - 0x80);
4485
        }
4486
4487
        return $CHAR_CACHE[$cacheKey] = $code;
4488
    }
4489
4490
    /**
4491
     * Parses the string into an array (into the the second parameter).
4492
     *
4493
     * WARNING: Instead of "parse_str()" this method do not (re-)placing variables in the current scope,
4494
     *          if the second parameter is not set!
4495
     *
4496
     * @see http://php.net/manual/en/function.parse-str.php
4497
     *
4498
     * @param string $str       <p>The input string.</p>
4499
     * @param array  $result    <p>The result will be returned into this reference parameter.</p>
4500
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4501
     *
4502
     * @return bool
4503
     *              Will return <strong>false</strong> if php can't parse the string and we haven't any $result
4504
     */
4505
    public static function parse_str(string $str, &$result, bool $cleanUtf8 = false): bool
4506
    {
4507 2
        if ($cleanUtf8 === true) {
4508 2
            $str = self::clean($str);
4509
        }
4510
4511 2
        if (self::$SUPPORT['mbstring'] === true) {
4512 2
            $return = \mb_parse_str($str, $result);
4513
4514 2
            return $return !== false && $result !== [];
4515
        }
4516
4517
        /** @noinspection PhpVoidFunctionResultUsedInspection */
4518
        \parse_str($str, $result);
4519
4520
        return $result !== [];
4521
    }
4522
4523
    /**
4524
     * Checks if \u modifier is available that enables Unicode support in PCRE.
4525
     *
4526
     * @return bool
4527
     *              <strong>true</strong> if support is available,<br>
4528
     *              <strong>false</strong> otherwise
4529
     */
4530
    public static function pcre_utf8_support(): bool
4531
    {
4532
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
4533 102
        return (bool) @\preg_match('//u', '');
4534
    }
4535
4536
    /**
4537
     * Create an array containing a range of UTF-8 characters.
4538
     *
4539
     * @param mixed     $var1      <p>Numeric or hexadecimal code points, or a UTF-8 character to start from.</p>
4540
     * @param mixed     $var2      <p>Numeric or hexadecimal code points, or a UTF-8 character to end at.</p>
4541
     * @param bool      $use_ctype <p>use ctype to detect numeric and hexadecimal, otherwise we will use a simple "is_numeric"</p>
4542
     * @param string    $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
4543
     * @param float|int $step      [optional] <p>
4544
     *                             If a step value is given, it will be used as the
4545
     *                             increment between elements in the sequence. step
4546
     *                             should be given as a positive number. If not specified,
4547
     *                             step will default to 1.
4548
     *                             </p>
4549
     *
4550
     * @return string[]
4551
     */
4552
    public static function range(
4553
        $var1,
4554
        $var2,
4555
        bool $use_ctype = true,
4556
        string $encoding = 'UTF-8',
4557
        $step = 1
4558
    ): array {
4559 2
        if (!$var1 || !$var2) {
4560 2
            return [];
4561
        }
4562
4563 2
        if ($step !== 1) {
4564 1
            if (!\is_numeric($step)) {
0 ignored issues
show
introduced by
The condition is_numeric($step) is always true.
Loading history...
4565
                throw new \InvalidArgumentException('$step need to be a number, type given: ' . \gettype($step));
4566
            }
4567
4568 1
            if ($step <= 0) {
4569
                throw new \InvalidArgumentException('$step need to be a positive number, given: ' . $step);
4570
            }
4571
        }
4572
4573 2
        if ($use_ctype && self::$SUPPORT['ctype'] === false) {
4574
            throw new \RuntimeException('ext-ctype: is not installed');
4575
        }
4576
4577 2
        $is_digit = false;
4578 2
        $is_xdigit = false;
4579
4580
        /** @noinspection PhpComposerExtensionStubsInspection */
4581 2
        if ($use_ctype && \ctype_digit((string) $var1) && \ctype_digit((string) $var2)) {
4582 2
            $is_digit = true;
4583 2
            $start = (int) $var1;
4584 2
        } /** @noinspection PhpComposerExtensionStubsInspection */ elseif ($use_ctype && \ctype_xdigit($var1) && \ctype_xdigit($var2)) {
4585
            $is_xdigit = true;
4586
            $start = (int) self::hex_to_int($var1);
4587 2
        } elseif (!$use_ctype && \is_numeric($var1)) {
4588 1
            $start = (int) $var1;
4589
        } else {
4590 2
            $start = self::ord($var1);
4591
        }
4592
4593 2
        if (!$start) {
4594
            return [];
4595
        }
4596
4597 2
        if ($is_digit) {
4598 2
            $end = (int) $var2;
4599 2
        } elseif ($is_xdigit) {
4600
            $end = (int) self::hex_to_int($var2);
4601 2
        } elseif (!$use_ctype && \is_numeric($var2)) {
4602 1
            $end = (int) $var2;
4603
        } else {
4604 2
            $end = self::ord($var2);
4605
        }
4606
4607 2
        if (!$end) {
4608
            return [];
4609
        }
4610
4611 2
        $array = [];
4612 2
        foreach (\range($start, $end, $step) as $i) {
4613 2
            $array[] = (string) self::chr((int) $i, $encoding);
4614
        }
4615
4616 2
        return $array;
4617
    }
4618
4619
    /**
4620
     * Multi decode html entity & fix urlencoded-win1252-chars.
4621
     *
4622
     * e.g:
4623
     * 'test+test'                     => 'test+test'
4624
     * 'D&#252;sseldorf'               => 'Düsseldorf'
4625
     * 'D%FCsseldorf'                  => 'Düsseldorf'
4626
     * 'D&#xFC;sseldorf'               => 'Düsseldorf'
4627
     * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
4628
     * 'Düsseldorf'                   => 'Düsseldorf'
4629
     * 'D%C3%BCsseldorf'               => 'Düsseldorf'
4630
     * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
4631
     * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
4632
     *
4633
     * @param string $str          <p>The input string.</p>
4634
     * @param bool   $multi_decode <p>Decode as often as possible.</p>
4635
     *
4636
     * @return string
4637
     */
4638
    public static function rawurldecode(string $str, bool $multi_decode = true): string
4639
    {
4640 6
        if ($str === '') {
4641 4
            return '';
4642
        }
4643
4644
        if (
4645 6
            \strpos($str, '&') === false
4646
            &&
4647 6
            \strpos($str, '%') === false
4648
            &&
4649 6
            \strpos($str, '+') === false
4650
            &&
4651 6
            \strpos($str, '\u') === false
4652
        ) {
4653 4
            return self::fix_simple_utf8($str);
4654
        }
4655
4656 6
        $str = self::urldecode_unicode_helper($str);
4657
4658
        do {
4659 6
            $str_compare = $str;
4660
4661
            /**
4662
             * @psalm-suppress PossiblyInvalidArgument
4663
             */
4664 6
            $str = self::fix_simple_utf8(
4665 6
                \rawurldecode(
4666 6
                    self::html_entity_decode(
4667 6
                        self::to_utf8($str),
4668 6
                        \ENT_QUOTES | \ENT_HTML5
4669
                    )
4670
                )
4671
            );
4672 6
        } while ($multi_decode === true && $str_compare !== $str);
4673
4674 6
        return $str;
4675
    }
4676
4677
    /**
4678
     * Replaces all occurrences of $pattern in $str by $replacement.
4679
     *
4680
     * @param string $str         <p>The input string.</p>
4681
     * @param string $pattern     <p>The regular expression pattern.</p>
4682
     * @param string $replacement <p>The string to replace with.</p>
4683
     * @param string $options     [optional] <p>Matching conditions to be used.</p>
4684
     * @param string $delimiter   [optional] <p>Delimiter the the regex. Default: '/'</p>
4685
     *
4686
     * @return string
4687
     */
4688
    public static function regex_replace(
4689
        string $str,
4690
        string $pattern,
4691
        string $replacement,
4692
        string $options = '',
4693
        string $delimiter = '/'
4694
    ): string {
4695 18
        if ($options === 'msr') {
4696 9
            $options = 'ms';
4697
        }
4698
4699
        // fallback
4700 18
        if (!$delimiter) {
4701
            $delimiter = '/';
4702
        }
4703
4704 18
        return (string) \preg_replace(
4705 18
            $delimiter . $pattern . $delimiter . 'u' . $options,
4706 18
            $replacement,
4707 18
            $str
4708
        );
4709
    }
4710
4711
    /**
4712
     * alias for "UTF8::remove_bom()"
4713
     *
4714
     * @param string $str
4715
     *
4716
     * @return string
4717
     *
4718
     * @see UTF8::remove_bom()
4719
     * @deprecated <p>use "UTF8::remove_bom()"</p>
4720
     */
4721
    public static function removeBOM(string $str): string
4722
    {
4723
        return self::remove_bom($str);
4724
    }
4725
4726
    /**
4727
     * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
4728
     *
4729
     * @param string $str <p>The input string.</p>
4730
     *
4731
     * @return string string without UTF-BOM
4732
     */
4733
    public static function remove_bom(string $str): string
4734
    {
4735 82
        if ($str === '') {
4736 9
            return '';
4737
        }
4738
4739 82
        $strLength = \strlen($str);
4740 82
        foreach (self::$BOM as $bomString => $bomByteLength) {
4741 82
            if (\strpos($str, $bomString, 0) === 0) {
4742 11
                $strTmp = \substr($str, $bomByteLength, $strLength);
4743 11
                if ($strTmp === false) {
4744
                    return '';
4745
                }
4746
4747 11
                $strLength -= (int) $bomByteLength;
4748
4749 82
                $str = (string) $strTmp;
4750
            }
4751
        }
4752
4753 82
        return $str;
4754
    }
4755
4756
    /**
4757
     * Removes duplicate occurrences of a string in another string.
4758
     *
4759
     * @param string          $str  <p>The base string.</p>
4760
     * @param string|string[] $what <p>String to search for in the base string.</p>
4761
     *
4762
     * @return string the result string with removed duplicates
4763
     */
4764
    public static function remove_duplicates(string $str, $what = ' '): string
4765
    {
4766 2
        if (\is_string($what) === true) {
4767 2
            $what = [$what];
4768
        }
4769
4770 2
        if (\is_array($what) === true) {
0 ignored issues
show
introduced by
The condition is_array($what) === true is always true.
Loading history...
4771
            /** @noinspection ForeachSourceInspection */
4772 2
            foreach ($what as $item) {
4773 2
                $str = (string) \preg_replace('/(' . \preg_quote($item, '/u') . ')+/u', $item, $str);
4774
            }
4775
        }
4776
4777 2
        return $str;
4778
    }
4779
4780
    /**
4781
     * Remove html via "strip_tags()" from the string.
4782
     *
4783
     * @param string $str
4784
     * @param string $allowableTags [optional] <p>You can use the optional second parameter to specify tags which should
4785
     *                              not be stripped. Default: null
4786
     *                              </p>
4787
     *
4788
     * @return string
4789
     */
4790
    public static function remove_html(string $str, string $allowableTags = ''): string
4791
    {
4792 6
        return \strip_tags($str, $allowableTags);
4793
    }
4794
4795
    /**
4796
     * Remove all breaks [<br> | \r\n | \r | \n | ...] from the string.
4797
     *
4798
     * @param string $str
4799
     * @param string $replacement [optional] <p>Default is a empty string.</p>
4800
     *
4801
     * @return string
4802
     */
4803
    public static function remove_html_breaks(string $str, string $replacement = ''): string
4804
    {
4805 6
        return (string) \preg_replace("#/\r\n|\r|\n|<br.*/?>#isU", $replacement, $str);
4806
    }
4807
4808
    /**
4809
     * Remove invisible characters from a string.
4810
     *
4811
     * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
4812
     *
4813
     * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
4814
     *
4815
     * @param string $str
4816
     * @param bool   $url_encoded
4817
     * @param string $replacement
4818
     *
4819
     * @return string
4820
     */
4821
    public static function remove_invisible_characters(string $str, bool $url_encoded = true, string $replacement = ''): string
4822
    {
4823
        // init
4824 116
        $non_displayables = [];
4825
4826
        // every control character except newline (dec 10),
4827
        // carriage return (dec 13) and horizontal tab (dec 09)
4828 116
        if ($url_encoded) {
4829 116
            $non_displayables[] = '/%0[0-8bcefBCEF]/'; // url encoded 00-08, 11, 12, 14, 15
4830 116
            $non_displayables[] = '/%1[0-9a-fA-F]/'; // url encoded 16-31
4831
        }
4832
4833 116
        $non_displayables[] = '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S'; // 00-08, 11, 12, 14-31, 127
4834
4835
        do {
4836 116
            $str = (string) \preg_replace($non_displayables, $replacement, $str, -1, $count);
4837 116
        } while ($count !== 0);
4838
4839 116
        return $str;
4840
    }
4841
4842
    /**
4843
     * Returns a new string with the prefix $substring removed, if present.
4844
     *
4845
     * @param string $str
4846
     * @param string $substring <p>The prefix to remove.</p>
4847
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
4848
     *
4849
     * @return string string without the prefix $substring
4850
     */
4851
    public static function remove_left(string $str, string $substring, string $encoding = 'UTF-8'): string
4852
    {
4853 12
        if ($substring && \strpos($str, $substring) === 0) {
4854 6
            if ($encoding === 'UTF-8') {
4855 4
                return (string) \mb_substr(
4856 4
                    $str,
4857 4
                    (int) \mb_strlen($substring)
4858
                );
4859
            }
4860
4861 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4862
4863 2
            return (string) self::substr(
4864 2
                $str,
4865 2
                (int) self::strlen($substring, $encoding),
4866 2
                null,
4867 2
                $encoding
4868
            );
4869
        }
4870
4871 6
        return $str;
4872
    }
4873
4874
    /**
4875
     * Returns a new string with the suffix $substring removed, if present.
4876
     *
4877
     * @param string $str
4878
     * @param string $substring <p>The suffix to remove.</p>
4879
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
4880
     *
4881
     * @return string string having a $str without the suffix $substring
4882
     */
4883
    public static function remove_right(string $str, string $substring, string $encoding = 'UTF-8'): string
4884
    {
4885 12
        if ($substring && \substr($str, -\strlen($substring)) === $substring) {
4886 6
            if ($encoding === 'UTF-8') {
4887 4
                return (string) \mb_substr(
4888 4
                    $str,
4889 4
                    0,
4890 4
                    (int) \mb_strlen($str) - (int) \mb_strlen($substring)
4891
                );
4892
            }
4893
4894 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4895
4896 2
            return (string) self::substr(
4897 2
                $str,
4898 2
                0,
4899 2
                (int) self::strlen($str, $encoding) - (int) self::strlen($substring, $encoding),
4900 2
                $encoding
4901
            );
4902
        }
4903
4904 6
        return $str;
4905
    }
4906
4907
    /**
4908
     * Replaces all occurrences of $search in $str by $replacement.
4909
     *
4910
     * @param string $str           <p>The input string.</p>
4911
     * @param string $search        <p>The needle to search for.</p>
4912
     * @param string $replacement   <p>The string to replace with.</p>
4913
     * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4914
     *
4915
     * @return string string after the replacements
4916
     */
4917
    public static function replace(
4918
        string $str,
4919
        string $search,
4920
        string $replacement,
4921
        bool $caseSensitive = true
4922
    ): string {
4923 29
        if ($caseSensitive) {
4924 22
            return \str_replace($search, $replacement, $str);
4925
        }
4926
4927 7
        return self::str_ireplace($search, $replacement, $str);
4928
    }
4929
4930
    /**
4931
     * Replaces all occurrences of $search in $str by $replacement.
4932
     *
4933
     * @param string       $str           <p>The input string.</p>
4934
     * @param array        $search        <p>The elements to search for.</p>
4935
     * @param array|string $replacement   <p>The string to replace with.</p>
4936
     * @param bool         $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4937
     *
4938
     * @return string string after the replacements
4939
     */
4940
    public static function replace_all(
4941
        string $str,
4942
        array $search,
4943
        $replacement,
4944
        bool $caseSensitive = true
4945
    ): string {
4946 30
        if ($caseSensitive) {
4947 23
            return \str_replace($search, $replacement, $str);
4948
        }
4949
4950 7
        return self::str_ireplace($search, $replacement, $str);
4951
    }
4952
4953
    /**
4954
     * Replace the diamond question mark (�) and invalid-UTF8 chars with the replacement.
4955
     *
4956
     * @param string $str                <p>The input string</p>
4957
     * @param string $replacementChar    <p>The replacement character.</p>
4958
     * @param bool   $processInvalidUtf8 <p>Convert invalid UTF-8 chars </p>
4959
     *
4960
     * @return string
4961
     */
4962
    public static function replace_diamond_question_mark(string $str, string $replacementChar = '', bool $processInvalidUtf8 = true): string
4963
    {
4964 62
        if ($str === '') {
4965 9
            return '';
4966
        }
4967
4968 62
        if ($processInvalidUtf8 === true) {
4969 62
            $replacementCharHelper = $replacementChar;
4970 62
            if ($replacementChar === '') {
4971 62
                $replacementCharHelper = 'none';
4972
            }
4973
4974 62
            if (self::$SUPPORT['mbstring'] === false) {
4975
                // if there is no native support for "mbstring",
4976
                // then we need to clean the string before ...
4977
                $str = self::clean($str);
4978
            }
4979
4980 62
            $save = \mb_substitute_character();
4981 62
            \mb_substitute_character($replacementCharHelper);
4982
            // the polyfill maybe return false, so cast to string
4983 62
            $str = (string) \mb_convert_encoding($str, 'UTF-8', 'UTF-8');
4984 62
            \mb_substitute_character($save);
4985
        }
4986
4987 62
        return \str_replace(
4988
            [
4989 62
                "\xEF\xBF\xBD",
4990
                '�',
4991
            ],
4992
            [
4993 62
                $replacementChar,
4994 62
                $replacementChar,
4995
            ],
4996 62
            $str
4997
        );
4998
    }
4999
5000
    /**
5001
     * Strip whitespace or other characters from end of a UTF-8 string.
5002
     *
5003
     * @param string      $str   <p>The string to be trimmed.</p>
5004
     * @param string|null $chars <p>Optional characters to be stripped.</p>
5005
     *
5006
     * @return string the string with unwanted characters stripped from the right
5007
     */
5008
    public static function rtrim(string $str = '', string $chars = null): string
5009
    {
5010 20
        if ($str === '') {
5011 3
            return '';
5012
        }
5013
5014 19
        if ($chars) {
5015 8
            $chars = \preg_quote($chars, '/');
5016 8
            $pattern = "[${chars}]+$";
5017
        } else {
5018 14
            $pattern = '[\\s]+$';
5019
        }
5020
5021 19
        if (self::$SUPPORT['mbstring'] === true) {
5022
            /** @noinspection PhpComposerExtensionStubsInspection */
5023 19
            return (string) \mb_ereg_replace($pattern, '', $str);
5024
        }
5025
5026
        return self::regex_replace($str, $pattern, '', '', '/');
5027
    }
5028
5029
    /**
5030
     * WARNING: Print native UTF-8 support (libs), e.g. for debugging.
5031
     *
5032
     * @psalm-suppress MissingReturnType
5033
     */
5034
    public static function showSupport()
5035
    {
5036 2
        echo '<pre>';
5037 2
        foreach (self::$SUPPORT as $key => &$value) {
5038 2
            echo $key . ' - ' . \print_r($value, true) . "\n<br>";
5039
        }
5040 2
        unset($value);
5041 2
        echo '</pre>';
5042 2
    }
5043
5044
    /**
5045
     * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
5046
     *
5047
     * @param string $char           <p>The Unicode character to be encoded as numbered entity.</p>
5048
     * @param bool   $keepAsciiChars <p>Set to <strong>true</strong> to keep ASCII chars.</>
5049
     * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
5050
     *
5051
     * @return string the HTML numbered entity
5052
     */
5053
    public static function single_chr_html_encode(string $char, bool $keepAsciiChars = false, string $encoding = 'UTF-8'): string
5054
    {
5055 2
        if ($char === '') {
5056 2
            return '';
5057
        }
5058
5059
        if (
5060 2
            $keepAsciiChars === true
5061
            &&
5062 2
            self::is_ascii($char) === true
5063
        ) {
5064 2
            return $char;
5065
        }
5066
5067 2
        return '&#' . self::ord($char, $encoding) . ';';
5068
    }
5069
5070
    /**
5071
     * @param string $str
5072
     * @param int    $tabLength
5073
     *
5074
     * @return string
5075
     */
5076
    public static function spaces_to_tabs(string $str, int $tabLength = 4): string
5077
    {
5078 5
        if ($tabLength === 4) {
5079 3
            $tab = '    ';
5080 2
        } elseif ($tabLength === 2) {
5081 1
            $tab = '  ';
5082
        } else {
5083 1
            $tab = \str_repeat(' ', $tabLength);
5084
        }
5085
5086 5
        return \str_replace($tab, "\t", $str);
5087
    }
5088
5089
    /**
5090
     * alias for "UTF8::str_split()"
5091
     *
5092
     * @param string|string[] $str
5093
     * @param int             $length
5094
     * @param bool            $cleanUtf8
5095
     *
5096
     * @return string[]
5097
     *
5098
     * @see UTF8::str_split()
5099
     */
5100
    public static function split(
5101
        $str,
5102
        int $length = 1,
5103
        bool $cleanUtf8 = false
5104
    ): array {
5105 9
        return self::str_split($str, $length, $cleanUtf8);
5106
    }
5107
5108
    /**
5109
     * alias for "UTF8::str_starts_with()"
5110
     *
5111
     * @param string $haystack
5112
     * @param string $needle
5113
     *
5114
     * @return bool
5115
     *
5116
     * @see UTF8::str_starts_with()
5117
     */
5118
    public static function str_begins(string $haystack, string $needle): bool
5119
    {
5120
        return self::str_starts_with($haystack, $needle);
5121
    }
5122
5123
    /**
5124
     * Returns a camelCase version of the string. Trims surrounding spaces,
5125
     * capitalizes letters following digits, spaces, dashes and underscores,
5126
     * and removes spaces, dashes, as well as underscores.
5127
     *
5128
     * @param string      $str                   <p>The input string.</p>
5129
     * @param string      $encoding              [optional] <p>Default: 'UTF-8'</p>
5130
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
5131
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
5132
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
5133
     *
5134
     * @return string
5135
     */
5136
    public static function str_camelize(
5137
        string $str,
5138
        string $encoding = 'UTF-8',
5139
        bool $cleanUtf8 = false,
5140
        string $lang = null,
5141
        bool $tryToKeepStringLength = false
5142
    ): string {
5143 32
        if ($cleanUtf8 === true) {
5144
            $str = self::clean($str);
5145
        }
5146
5147 32
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
5148 26
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
5149
        }
5150
5151 32
        $str = self::lcfirst(
5152 32
            \trim($str),
5153 32
            $encoding,
5154 32
            false,
5155 32
            $lang,
5156 32
            $tryToKeepStringLength
5157
        );
5158 32
        $str = (string) \preg_replace('/^[-_]+/', '', $str);
5159
5160 32
        $useMbFunction = $lang === null && $tryToKeepStringLength === false;
5161
5162 32
        $str = (string) \preg_replace_callback(
5163 32
            '/[-_\\s]+(.)?/u',
5164
            /**
5165
             * @param array $match
5166
             *
5167
             * @return string
5168
             */
5169
            static function (array $match) use ($useMbFunction, $encoding, $lang, $tryToKeepStringLength): string {
5170 27
                if (isset($match[1])) {
5171 27
                    if ($useMbFunction === true) {
5172 27
                        if ($encoding === 'UTF-8') {
5173 27
                            return \mb_strtoupper($match[1]);
5174
                        }
5175
5176
                        return \mb_strtoupper($match[1], $encoding);
5177
                    }
5178
5179
                    return self::strtoupper($match[1], $encoding, false, $lang, $tryToKeepStringLength);
5180
                }
5181
5182 1
                return '';
5183 32
            },
5184 32
            $str
5185
        );
5186
5187 32
        return (string) \preg_replace_callback(
5188 32
            '/[\\p{N}]+(.)?/u',
5189
            /**
5190
             * @param array $match
5191
             *
5192
             * @return string
5193
             */
5194
            static function (array $match) use ($useMbFunction, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength): string {
5195 6
                if ($useMbFunction === true) {
5196 6
                    if ($encoding === 'UTF-8') {
5197 6
                        return \mb_strtoupper($match[0]);
5198
                    }
5199
5200
                    return \mb_strtoupper($match[0], $encoding);
5201
                }
5202
5203
                return self::strtoupper($match[0], $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
5204 32
            },
5205 32
            $str
5206
        );
5207
    }
5208
5209
    /**
5210
     * Returns the string with the first letter of each word capitalized,
5211
     * except for when the word is a name which shouldn't be capitalized.
5212
     *
5213
     * @param string $str
5214
     *
5215
     * @return string string with $str capitalized
5216
     */
5217
    public static function str_capitalize_name(string $str): string
5218
    {
5219 1
        return self::str_capitalize_name_helper(
5220 1
            self::str_capitalize_name_helper(
5221 1
                self::collapse_whitespace($str),
5222 1
                ' '
5223
            ),
5224 1
            '-'
5225
        );
5226
    }
5227
5228
    /**
5229
     * Returns true if the string contains $needle, false otherwise. By default
5230
     * the comparison is case-sensitive, but can be made insensitive by setting
5231
     * $caseSensitive to false.
5232
     *
5233
     * @param string $haystack      <p>The input string.</p>
5234
     * @param string $needle        <p>Substring to look for.</p>
5235
     * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5236
     *
5237
     * @return bool whether or not $haystack contains $needle
5238
     */
5239
    public static function str_contains(
5240
        string $haystack,
5241
        string $needle,
5242
        bool $caseSensitive = true
5243
    ): bool {
5244 21
        if ($caseSensitive) {
5245 11
            return \strpos($haystack, $needle) !== false;
5246
        }
5247
5248 10
        return \mb_stripos($haystack, $needle) !== false;
5249
    }
5250
5251
    /**
5252
     * Returns true if the string contains all $needles, false otherwise. By
5253
     * default the comparison is case-sensitive, but can be made insensitive by
5254
     * setting $caseSensitive to false.
5255
     *
5256
     * @param string $haystack      <p>The input string.</p>
5257
     * @param array  $needles       <p>SubStrings to look for.</p>
5258
     * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5259
     *
5260
     * @return bool whether or not $haystack contains $needle
5261
     */
5262
    public static function str_contains_all(
5263
        string $haystack,
5264
        array $needles,
5265
        bool $caseSensitive = true
5266
    ): bool {
5267 44
        if ($haystack === '' || $needles === []) {
5268 1
            return false;
5269
        }
5270
5271
        /** @noinspection LoopWhichDoesNotLoopInspection */
5272 43
        foreach ($needles as &$needle) {
5273 43
            if (!$needle) {
5274 1
                return false;
5275
            }
5276
5277 42
            if ($caseSensitive) {
5278 22
                return \strpos($haystack, $needle) !== false;
5279
            }
5280
5281 20
            return \mb_stripos($haystack, $needle) !== false;
5282
        }
5283
5284
        return true;
5285
    }
5286
5287
    /**
5288
     * Returns true if the string contains any $needles, false otherwise. By
5289
     * default the comparison is case-sensitive, but can be made insensitive by
5290
     * setting $caseSensitive to false.
5291
     *
5292
     * @param string $haystack      <p>The input string.</p>
5293
     * @param array  $needles       <p>SubStrings to look for.</p>
5294
     * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5295
     *
5296
     * @return bool
5297
     *              Whether or not $str contains $needle
5298
     */
5299
    public static function str_contains_any(
5300
        string $haystack,
5301
        array $needles,
5302
        bool $caseSensitive = true
5303
    ): bool {
5304 46
        if ($haystack === '' || $needles === []) {
5305 1
            return false;
5306
        }
5307
5308
        /** @noinspection LoopWhichDoesNotLoopInspection */
5309 45
        foreach ($needles as &$needle) {
5310 45
            if (!$needle) {
5311
                continue;
5312
            }
5313
5314 45
            if ($caseSensitive) {
5315 25
                if (\strpos($haystack, $needle) !== false) {
5316 14
                    return true;
5317
                }
5318
5319 13
                continue;
5320
            }
5321
5322 20
            if (\mb_stripos($haystack, $needle) !== false) {
5323 20
                return true;
5324
            }
5325
        }
5326
5327 19
        return false;
5328
    }
5329
5330
    /**
5331
     * Returns a lowercase and trimmed string separated by dashes. Dashes are
5332
     * inserted before uppercase characters (with the exception of the first
5333
     * character of the string), and in place of spaces as well as underscores.
5334
     *
5335
     * @param string $str      <p>The input string.</p>
5336
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5337
     *
5338
     * @return string
5339
     */
5340
    public static function str_dasherize(string $str, string $encoding = 'UTF-8'): string
5341
    {
5342 19
        return self::str_delimit($str, '-', $encoding);
5343
    }
5344
5345
    /**
5346
     * Returns a lowercase and trimmed string separated by the given delimiter.
5347
     * Delimiters are inserted before uppercase characters (with the exception
5348
     * of the first character of the string), and in place of spaces, dashes,
5349
     * and underscores. Alpha delimiters are not converted to lowercase.
5350
     *
5351
     * @param string      $str                   <p>The input string.</p>
5352
     * @param string      $delimiter             <p>Sequence used to separate parts of the string.</p>
5353
     * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
5354
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
5355
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt,
5356
     *                                           tr</p>
5357
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ ->
5358
     *                                           ß</p>
5359
     *
5360
     * @return string
5361
     */
5362
    public static function str_delimit(
5363
        string $str,
5364
        string $delimiter,
5365
        string $encoding = 'UTF-8',
5366
        bool $cleanUtf8 = false,
5367
        string $lang = null,
5368
        bool $tryToKeepStringLength = false
5369
    ): string {
5370 49
        if (self::$SUPPORT['mbstring'] === true) {
5371
            /** @noinspection PhpComposerExtensionStubsInspection */
5372 49
            $str = (string) \mb_ereg_replace('\\B(\\p{Lu})', '-\1', \trim($str));
5373
5374 49
            $useMbFunction = $lang === null && $tryToKeepStringLength === false;
5375 49
            if ($useMbFunction === true && $encoding === 'UTF-8') {
5376 22
                $str = \mb_strtolower($str);
5377
            } else {
5378 27
                $str = self::strtolower($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
5379
            }
5380
5381
            /** @noinspection PhpComposerExtensionStubsInspection */
5382 49
            return (string) \mb_ereg_replace('[\\-_\\s]+', $delimiter, $str);
5383
        }
5384
5385
        $str = (string) \preg_replace('/\\B(\\p{Lu})/u', '-\1', \trim($str));
5386
5387
        $useMbFunction = $lang === null && $tryToKeepStringLength === false;
5388
        if ($useMbFunction === true && $encoding === 'UTF-8') {
5389
            $str = \mb_strtolower($str);
5390
        } else {
5391
            $str = self::strtolower($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
5392
        }
5393
5394
        return (string) \preg_replace('/[\\-_\\s]+/u', $delimiter, $str);
5395
    }
5396
5397
    /**
5398
     * Optimized "mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
5399
     *
5400
     * @param string $str <p>The input string.</p>
5401
     *
5402
     * @return false|string
5403
     *                      The detected string-encoding e.g. UTF-8 or UTF-16BE,<br>
5404
     *                      otherwise it will return false e.g. for BINARY or not detected encoding.
5405
     */
5406
    public static function str_detect_encoding($str)
5407
    {
5408
        // init
5409 30
        $str = (string) $str;
5410
5411
        //
5412
        // 1.) check binary strings (010001001...) like UTF-16 / UTF-32 / PDF / Images / ...
5413
        //
5414
5415 30
        if (self::is_binary($str, true) === true) {
5416 11
            $isUtf32 = self::is_utf32($str, false);
5417 11
            if ($isUtf32 === 1) {
5418
                return 'UTF-32LE';
5419
            }
5420 11
            if ($isUtf32 === 2) {
5421 1
                return 'UTF-32BE';
5422
            }
5423
5424 11
            $isUtf16 = self::is_utf16($str, false);
5425 11
            if ($isUtf16 === 1) {
5426 3
                return 'UTF-16LE';
5427
            }
5428 11
            if ($isUtf16 === 2) {
5429 2
                return 'UTF-16BE';
5430
            }
5431
5432
            // is binary but not "UTF-16" or "UTF-32"
5433 9
            return false;
5434
        }
5435
5436
        //
5437
        // 2.) simple check for ASCII chars
5438
        //
5439
5440 26
        if (self::is_ascii($str) === true) {
5441 10
            return 'ASCII';
5442
        }
5443
5444
        //
5445
        // 3.) simple check for UTF-8 chars
5446
        //
5447
5448 26
        if (self::is_utf8($str) === true) {
5449 19
            return 'UTF-8';
5450
        }
5451
5452
        //
5453
        // 4.) check via "mb_detect_encoding()"
5454
        //
5455
        // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "mb_detect_encoding()"
5456
5457
        $detectOrder = [
5458 15
            'ISO-8859-1',
5459
            'ISO-8859-2',
5460
            'ISO-8859-3',
5461
            'ISO-8859-4',
5462
            'ISO-8859-5',
5463
            'ISO-8859-6',
5464
            'ISO-8859-7',
5465
            'ISO-8859-8',
5466
            'ISO-8859-9',
5467
            'ISO-8859-10',
5468
            'ISO-8859-13',
5469
            'ISO-8859-14',
5470
            'ISO-8859-15',
5471
            'ISO-8859-16',
5472
            'WINDOWS-1251',
5473
            'WINDOWS-1252',
5474
            'WINDOWS-1254',
5475
            'CP932',
5476
            'CP936',
5477
            'CP950',
5478
            'CP866',
5479
            'CP850',
5480
            'CP51932',
5481
            'CP50220',
5482
            'CP50221',
5483
            'CP50222',
5484
            'ISO-2022-JP',
5485
            'ISO-2022-KR',
5486
            'JIS',
5487
            'JIS-ms',
5488
            'EUC-CN',
5489
            'EUC-JP',
5490
        ];
5491
5492 15
        if (self::$SUPPORT['mbstring'] === true) {
5493
            // info: do not use the symfony polyfill here
5494 15
            $encoding = \mb_detect_encoding($str, $detectOrder, true);
5495 15
            if ($encoding) {
5496 15
                return $encoding;
5497
            }
5498
        }
5499
5500
        //
5501
        // 5.) check via "iconv()"
5502
        //
5503
5504
        if (self::$ENCODINGS === null) {
5505
            self::$ENCODINGS = self::getData('encodings');
5506
        }
5507
5508
        foreach (self::$ENCODINGS as $encodingTmp) {
5509
            // INFO: //IGNORE but still throw notice
5510
            /** @noinspection PhpUsageOfSilenceOperatorInspection */
5511
            if ((string) @\iconv($encodingTmp, $encodingTmp . '//IGNORE', $str) === $str) {
5512
                return $encodingTmp;
5513
            }
5514
        }
5515
5516
        return false;
5517
    }
5518
5519
    /**
5520
     * alias for "UTF8::str_ends_with()"
5521
     *
5522
     * @param string $haystack
5523
     * @param string $needle
5524
     *
5525
     * @return bool
5526
     *
5527
     * @see UTF8::str_ends_with()
5528
     */
5529
    public static function str_ends(string $haystack, string $needle): bool
5530
    {
5531
        return self::str_ends_with($haystack, $needle);
5532
    }
5533
5534
    /**
5535
     * Check if the string ends with the given substring.
5536
     *
5537
     * @param string $haystack <p>The string to search in.</p>
5538
     * @param string $needle   <p>The substring to search for.</p>
5539
     *
5540
     * @return bool
5541
     */
5542
    public static function str_ends_with(string $haystack, string $needle): bool
5543
    {
5544 9
        if ($needle === '') {
5545 2
            return true;
5546
        }
5547
5548 9
        if ($haystack === '') {
5549
            return false;
5550
        }
5551
5552 9
        return \substr($haystack, -\strlen($needle)) === $needle;
5553
    }
5554
5555
    /**
5556
     * Returns true if the string ends with any of $substrings, false otherwise.
5557
     *
5558
     * - case-sensitive
5559
     *
5560
     * @param string   $str        <p>The input string.</p>
5561
     * @param string[] $substrings <p>Substrings to look for.</p>
5562
     *
5563
     * @return bool whether or not $str ends with $substring
5564
     */
5565
    public static function str_ends_with_any(string $str, array $substrings): bool
5566
    {
5567 7
        if ($substrings === []) {
5568
            return false;
5569
        }
5570
5571 7
        foreach ($substrings as &$substring) {
5572 7
            if (\substr($str, -\strlen($substring)) === $substring) {
5573 7
                return true;
5574
            }
5575
        }
5576
5577 6
        return false;
5578
    }
5579
5580
    /**
5581
     * Ensures that the string begins with $substring. If it doesn't, it's
5582
     * prepended.
5583
     *
5584
     * @param string $str       <p>The input string.</p>
5585
     * @param string $substring <p>The substring to add if not present.</p>
5586
     *
5587
     * @return string
5588
     */
5589
    public static function str_ensure_left(string $str, string $substring): string
5590
    {
5591
        if (
5592 10
            $substring !== ''
5593
            &&
5594 10
            \strpos($str, $substring) === 0
5595
        ) {
5596 6
            return $str;
5597
        }
5598
5599 4
        return $substring . $str;
5600
    }
5601
5602
    /**
5603
     * Ensures that the string ends with $substring. If it doesn't, it's appended.
5604
     *
5605
     * @param string $str       <p>The input string.</p>
5606
     * @param string $substring <p>The substring to add if not present.</p>
5607
     *
5608
     * @return string
5609
     */
5610
    public static function str_ensure_right(string $str, string $substring): string
5611
    {
5612
        if (
5613 10
            $str === ''
5614
            ||
5615 10
            $substring === ''
5616
            ||
5617 10
            \substr($str, -\strlen($substring)) !== $substring
5618
        ) {
5619 4
            $str .= $substring;
5620
        }
5621
5622 10
        return $str;
5623
    }
5624
5625
    /**
5626
     * Capitalizes the first word of the string, replaces underscores with
5627
     * spaces, and strips '_id'.
5628
     *
5629
     * @param string $str
5630
     *
5631
     * @return string
5632
     */
5633
    public static function str_humanize($str): string
5634
    {
5635 3
        $str = \str_replace(
5636
            [
5637 3
                '_id',
5638
                '_',
5639
            ],
5640
            [
5641 3
                '',
5642
                ' ',
5643
            ],
5644 3
            $str
5645
        );
5646
5647 3
        return self::ucfirst(\trim($str));
5648
    }
5649
5650
    /**
5651
     * alias for "UTF8::str_istarts_with()"
5652
     *
5653
     * @param string $haystack
5654
     * @param string $needle
5655
     *
5656
     * @return bool
5657
     *
5658
     * @see UTF8::str_istarts_with()
5659
     */
5660
    public static function str_ibegins(string $haystack, string $needle): bool
5661
    {
5662
        return self::str_istarts_with($haystack, $needle);
5663
    }
5664
5665
    /**
5666
     * alias for "UTF8::str_iends_with()"
5667
     *
5668
     * @param string $haystack
5669
     * @param string $needle
5670
     *
5671
     * @return bool
5672
     *
5673
     * @see UTF8::str_iends_with()
5674
     */
5675
    public static function str_iends(string $haystack, string $needle): bool
5676
    {
5677
        return self::str_iends_with($haystack, $needle);
5678
    }
5679
5680
    /**
5681
     * Check if the string ends with the given substring, case insensitive.
5682
     *
5683
     * @param string $haystack <p>The string to search in.</p>
5684
     * @param string $needle   <p>The substring to search for.</p>
5685
     *
5686
     * @return bool
5687
     */
5688
    public static function str_iends_with(string $haystack, string $needle): bool
5689
    {
5690 12
        if ($needle === '') {
5691 2
            return true;
5692
        }
5693
5694 12
        if ($haystack === '') {
5695
            return false;
5696
        }
5697
5698 12
        return self::strcasecmp(\substr($haystack, -\strlen($needle)), $needle) === 0;
5699
    }
5700
5701
    /**
5702
     * Returns true if the string ends with any of $substrings, false otherwise.
5703
     *
5704
     * - case-insensitive
5705
     *
5706
     * @param string   $str        <p>The input string.</p>
5707
     * @param string[] $substrings <p>Substrings to look for.</p>
5708
     *
5709
     * @return bool whether or not $str ends with $substring
5710
     */
5711
    public static function str_iends_with_any(string $str, array $substrings): bool
5712
    {
5713 4
        if ($substrings === []) {
5714
            return false;
5715
        }
5716
5717 4
        foreach ($substrings as &$substring) {
5718 4
            if (self::str_iends_with($str, $substring)) {
5719 4
                return true;
5720
            }
5721
        }
5722
5723
        return false;
5724
    }
5725
5726
    /**
5727
     * Returns the index of the first occurrence of $needle in the string,
5728
     * and false if not found. Accepts an optional offset from which to begin
5729
     * the search.
5730
     *
5731
     * @param string $str      <p>The input string.</p>
5732
     * @param string $needle   <p>Substring to look for.</p>
5733
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5734
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5735
     *
5736
     * @return false|int
5737
     *                   The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>
5738
     */
5739
    public static function str_iindex_first(
5740
        string $str,
5741
        string $needle,
5742
        int $offset = 0,
5743
        string $encoding = 'UTF-8'
5744
    ) {
5745 2
        return self::stripos(
5746 2
            $str,
5747 2
            $needle,
5748 2
            $offset,
5749 2
            $encoding
5750
        );
5751
    }
5752
5753
    /**
5754
     * Returns the index of the last occurrence of $needle in the string,
5755
     * and false if not found. Accepts an optional offset from which to begin
5756
     * the search. Offsets may be negative to count from the last character
5757
     * in the string.
5758
     *
5759
     * @param string $str      <p>The input string.</p>
5760
     * @param string $needle   <p>Substring to look for.</p>
5761
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5762
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5763
     *
5764
     * @return false|int
5765
     *                   The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>
5766
     */
5767
    public static function str_iindex_last(
5768
        string $str,
5769
        string $needle,
5770
        int $offset = 0,
5771
        string $encoding = 'UTF-8'
5772
    ) {
5773
        return self::strripos(
5774
            $str,
5775
            $needle,
5776
            $offset,
5777
            $encoding
5778
        );
5779
    }
5780
5781
    /**
5782
     * Returns the index of the first occurrence of $needle in the string,
5783
     * and false if not found. Accepts an optional offset from which to begin
5784
     * the search.
5785
     *
5786
     * @param string $str      <p>The input string.</p>
5787
     * @param string $needle   <p>Substring to look for.</p>
5788
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5789
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5790
     *
5791
     * @return false|int
5792
     *                   The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>
5793
     */
5794
    public static function str_index_first(
5795
        string $str,
5796
        string $needle,
5797
        int $offset = 0,
5798
        string $encoding = 'UTF-8'
5799
    ) {
5800 10
        return self::strpos(
5801 10
            $str,
5802 10
            $needle,
5803 10
            $offset,
5804 10
            $encoding
5805
        );
5806
    }
5807
5808
    /**
5809
     * Returns the index of the last occurrence of $needle in the string,
5810
     * and false if not found. Accepts an optional offset from which to begin
5811
     * the search. Offsets may be negative to count from the last character
5812
     * in the string.
5813
     *
5814
     * @param string $str      <p>The input string.</p>
5815
     * @param string $needle   <p>Substring to look for.</p>
5816
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5817
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5818
     *
5819
     * @return false|int
5820
     *                   The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>
5821
     */
5822
    public static function str_index_last(
5823
        string $str,
5824
        string $needle,
5825
        int $offset = 0,
5826
        string $encoding = 'UTF-8'
5827
    ) {
5828 10
        return self::strrpos(
5829 10
            $str,
5830 10
            $needle,
5831 10
            $offset,
5832 10
            $encoding
5833
        );
5834
    }
5835
5836
    /**
5837
     * Inserts $substring into the string at the $index provided.
5838
     *
5839
     * @param string $str       <p>The input string.</p>
5840
     * @param string $substring <p>String to be inserted.</p>
5841
     * @param int    $index     <p>The index at which to insert the substring.</p>
5842
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
5843
     *
5844
     * @return string
5845
     */
5846
    public static function str_insert(
5847
        string $str,
5848
        string $substring,
5849
        int $index,
5850
        string $encoding = 'UTF-8'
5851
    ): string {
5852 8
        if ($encoding === 'UTF-8') {
5853 4
            $len = (int) \mb_strlen($str);
5854 4
            if ($index > $len) {
5855
                return $str;
5856
            }
5857
5858
            /** @noinspection UnnecessaryCastingInspection */
5859 4
            return (string) \mb_substr($str, 0, $index) .
5860 4
                   $substring .
5861 4
                   (string) \mb_substr($str, $index, $len);
5862
        }
5863
5864 4
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
5865
5866 4
        $len = (int) self::strlen($str, $encoding);
5867 4
        if ($index > $len) {
5868 1
            return $str;
5869
        }
5870
5871 3
        return ((string) self::substr($str, 0, $index, $encoding)) .
5872 3
               $substring .
5873 3
               ((string) self::substr($str, $index, $len, $encoding));
5874
    }
5875
5876
    /**
5877
     * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
5878
     *
5879
     * @see http://php.net/manual/en/function.str-ireplace.php
5880
     *
5881
     * @param mixed $search  <p>
5882
     *                       Every replacement with search array is
5883
     *                       performed on the result of previous replacement.
5884
     *                       </p>
5885
     * @param mixed $replace <p>
5886
     *                       </p>
5887
     * @param mixed $subject <p>
5888
     *                       If subject is an array, then the search and
5889
     *                       replace is performed with every entry of
5890
     *                       subject, and the return value is an array as
5891
     *                       well.
5892
     *                       </p>
5893
     * @param int   $count   [optional] <p>
5894
     *                       The number of matched and replaced needles will
5895
     *                       be returned in count which is passed by
5896
     *                       reference.
5897
     *                       </p>
5898
     *
5899
     * @return mixed a string or an array of replacements
5900
     */
5901
    public static function str_ireplace($search, $replace, $subject, &$count = null)
5902
    {
5903 29
        $search = (array) $search;
5904
5905
        /** @noinspection AlterInForeachInspection */
5906 29
        foreach ($search as &$s) {
5907 29
            $s = (string) $s;
5908 29
            if ($s === '') {
5909 6
                $s = '/^(?<=.)$/';
5910
            } else {
5911 29
                $s = '/' . \preg_quote($s, '/') . '/ui';
5912
            }
5913
        }
5914
5915 29
        $subject = \preg_replace($search, $replace, $subject, -1, $replace);
5916 29
        $count = $replace; // used as reference parameter
5917
5918 29
        return $subject;
5919
    }
5920
5921
    /**
5922
     * Replaces $search from the beginning of string with $replacement.
5923
     *
5924
     * @param string $str         <p>The input string.</p>
5925
     * @param string $search      <p>The string to search for.</p>
5926
     * @param string $replacement <p>The replacement.</p>
5927
     *
5928
     * @return string string after the replacements
5929
     */
5930
    public static function str_ireplace_beginning(string $str, string $search, string $replacement): string
5931
    {
5932 17
        if ($str === '') {
5933 4
            if ($replacement === '') {
5934 2
                return '';
5935
            }
5936
5937 2
            if ($search === '') {
5938 2
                return $replacement;
5939
            }
5940
        }
5941
5942 13
        if ($search === '') {
5943 2
            return $str . $replacement;
5944
        }
5945
5946 11
        if (\stripos($str, $search) === 0) {
5947 10
            return $replacement . \substr($str, \strlen($search));
5948
        }
5949
5950 1
        return $str;
5951
    }
5952
5953
    /**
5954
     * Replaces $search from the ending of string with $replacement.
5955
     *
5956
     * @param string $str         <p>The input string.</p>
5957
     * @param string $search      <p>The string to search for.</p>
5958
     * @param string $replacement <p>The replacement.</p>
5959
     *
5960
     * @return string string after the replacements
5961
     */
5962
    public static function str_ireplace_ending(string $str, string $search, string $replacement): string
5963
    {
5964 17
        if ($str === '') {
5965 4
            if ($replacement === '') {
5966 2
                return '';
5967
            }
5968
5969 2
            if ($search === '') {
5970 2
                return $replacement;
5971
            }
5972
        }
5973
5974 13
        if ($search === '') {
5975 2
            return $str . $replacement;
5976
        }
5977
5978 11
        if (\stripos($str, $search, \strlen($str) - \strlen($search)) !== false) {
5979 9
            $str = \substr($str, 0, -\strlen($search)) . $replacement;
5980
        }
5981
5982 11
        return $str;
5983
    }
5984
5985
    /**
5986
     * Check if the string starts with the given substring, case insensitive.
5987
     *
5988
     * @param string $haystack <p>The string to search in.</p>
5989
     * @param string $needle   <p>The substring to search for.</p>
5990
     *
5991
     * @return bool
5992
     */
5993
    public static function str_istarts_with(string $haystack, string $needle): bool
5994
    {
5995 12
        if ($needle === '') {
5996 2
            return true;
5997
        }
5998
5999 12
        if ($haystack === '') {
6000
            return false;
6001
        }
6002
6003 12
        return self::stripos($haystack, $needle) === 0;
6004
    }
6005
6006
    /**
6007
     * Returns true if the string begins with any of $substrings, false otherwise.
6008
     *
6009
     * - case-insensitive
6010
     *
6011
     * @param string $str        <p>The input string.</p>
6012
     * @param array  $substrings <p>Substrings to look for.</p>
6013
     *
6014
     * @return bool whether or not $str starts with $substring
6015
     */
6016
    public static function str_istarts_with_any(string $str, array $substrings): bool
6017
    {
6018 4
        if ($str === '') {
6019
            return false;
6020
        }
6021
6022 4
        if ($substrings === []) {
6023
            return false;
6024
        }
6025
6026 4
        foreach ($substrings as &$substring) {
6027 4
            if (self::str_istarts_with($str, $substring)) {
6028 4
                return true;
6029
            }
6030
        }
6031
6032
        return false;
6033
    }
6034
6035
    /**
6036
     * Gets the substring after the first occurrence of a separator.
6037
     *
6038
     * @param string $str       <p>The input string.</p>
6039
     * @param string $separator <p>The string separator.</p>
6040
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6041
     *
6042
     * @return string
6043
     */
6044
    public static function str_isubstr_after_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6045
    {
6046 1
        if ($separator === '' || $str === '') {
6047 1
            return '';
6048
        }
6049
6050 1
        $offset = self::str_iindex_first($str, $separator);
6051 1
        if ($offset === false) {
6052 1
            return '';
6053
        }
6054
6055 1
        if ($encoding === 'UTF-8') {
6056 1
            return (string) \mb_substr(
6057 1
                $str,
6058 1
                $offset + (int) \mb_strlen($separator)
6059
            );
6060
        }
6061
6062
        return (string) self::substr(
6063
            $str,
6064
            $offset + (int) self::strlen($separator, $encoding),
6065
            null,
6066
            $encoding
6067
        );
6068
    }
6069
6070
    /**
6071
     * Gets the substring after the last occurrence of a separator.
6072
     *
6073
     * @param string $str       <p>The input string.</p>
6074
     * @param string $separator <p>The string separator.</p>
6075
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6076
     *
6077
     * @return string
6078
     */
6079
    public static function str_isubstr_after_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6080
    {
6081 1
        if ($separator === '' || $str === '') {
6082 1
            return '';
6083
        }
6084
6085 1
        $offset = self::strripos($str, $separator);
6086 1
        if ($offset === false) {
6087 1
            return '';
6088
        }
6089
6090 1
        if ($encoding === 'UTF-8') {
6091 1
            return (string) \mb_substr(
6092 1
                $str,
6093 1
                $offset + (int) self::strlen($separator)
6094
            );
6095
        }
6096
6097
        return (string) self::substr(
6098
            $str,
6099
            $offset + (int) self::strlen($separator, $encoding),
6100
            null,
6101
            $encoding
6102
        );
6103
    }
6104
6105
    /**
6106
     * Gets the substring before the first occurrence of a separator.
6107
     *
6108
     * @param string $str       <p>The input string.</p>
6109
     * @param string $separator <p>The string separator.</p>
6110
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6111
     *
6112
     * @return string
6113
     */
6114
    public static function str_isubstr_before_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6115
    {
6116 1
        if ($separator === '' || $str === '') {
6117 1
            return '';
6118
        }
6119
6120 1
        $offset = self::str_iindex_first($str, $separator);
6121 1
        if ($offset === false) {
6122 1
            return '';
6123
        }
6124
6125 1
        if ($encoding === 'UTF-8') {
6126 1
            return (string) \mb_substr($str, 0, $offset);
6127
        }
6128
6129
        return (string) self::substr($str, 0, $offset, $encoding);
6130
    }
6131
6132
    /**
6133
     * Gets the substring before the last occurrence of a separator.
6134
     *
6135
     * @param string $str       <p>The input string.</p>
6136
     * @param string $separator <p>The string separator.</p>
6137
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6138
     *
6139
     * @return string
6140
     */
6141
    public static function str_isubstr_before_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6142
    {
6143 1
        if ($separator === '' || $str === '') {
6144 1
            return '';
6145
        }
6146
6147 1
        if ($encoding === 'UTF-8') {
6148 1
            $offset = \mb_strripos($str, $separator);
6149 1
            if ($offset === false) {
6150 1
                return '';
6151
            }
6152
6153 1
            return (string) \mb_substr($str, 0, $offset);
6154
        }
6155
6156
        $offset = self::strripos($str, $separator, 0, $encoding);
6157
        if ($offset === false) {
6158
            return '';
6159
        }
6160
6161
        return (string) self::substr($str, 0, $offset, $encoding);
6162
    }
6163
6164
    /**
6165
     * Gets the substring after (or before via "$beforeNeedle") the first occurrence of the "$needle".
6166
     *
6167
     * @param string $str          <p>The input string.</p>
6168
     * @param string $needle       <p>The string to look for.</p>
6169
     * @param bool   $beforeNeedle [optional] <p>Default: false</p>
6170
     * @param string $encoding     [optional] <p>Default: 'UTF-8'</p>
6171
     *
6172
     * @return string
6173
     */
6174
    public static function str_isubstr_first(
6175
        string $str,
6176
        string $needle,
6177
        bool $beforeNeedle = false,
6178
        string $encoding = 'UTF-8'
6179
    ): string {
6180
        if (
6181 2
            $needle === ''
6182
            ||
6183 2
            $str === ''
6184
        ) {
6185 2
            return '';
6186
        }
6187
6188 2
        $part = self::stristr(
6189 2
            $str,
6190 2
            $needle,
6191 2
            $beforeNeedle,
6192 2
            $encoding
6193
        );
6194 2
        if ($part === false) {
6195 2
            return '';
6196
        }
6197
6198 2
        return $part;
6199
    }
6200
6201
    /**
6202
     * Gets the substring after (or before via "$beforeNeedle") the last occurrence of the "$needle".
6203
     *
6204
     * @param string $str          <p>The input string.</p>
6205
     * @param string $needle       <p>The string to look for.</p>
6206
     * @param bool   $beforeNeedle [optional] <p>Default: false</p>
6207
     * @param string $encoding     [optional] <p>Default: 'UTF-8'</p>
6208
     *
6209
     * @return string
6210
     */
6211
    public static function str_isubstr_last(
6212
        string $str,
6213
        string $needle,
6214
        bool $beforeNeedle = false,
6215
        string $encoding = 'UTF-8'
6216
    ): string {
6217
        if (
6218 1
            $needle === ''
6219
            ||
6220 1
            $str === ''
6221
        ) {
6222 1
            return '';
6223
        }
6224
6225 1
        $part = self::strrichr($str, $needle, $beforeNeedle, $encoding);
6226 1
        if ($part === false) {
6227 1
            return '';
6228
        }
6229
6230 1
        return $part;
6231
    }
6232
6233
    /**
6234
     * Returns the last $n characters of the string.
6235
     *
6236
     * @param string $str      <p>The input string.</p>
6237
     * @param int    $n        <p>Number of characters to retrieve from the end.</p>
6238
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6239
     *
6240
     * @return string
6241
     */
6242
    public static function str_last_char(string $str, int $n = 1, string $encoding = 'UTF-8'): string
6243
    {
6244 12
        if ($str === '' || $n <= 0) {
6245 4
            return '';
6246
        }
6247
6248 8
        if ($encoding === 'UTF-8') {
6249 4
            return (string) \mb_substr($str, -$n);
6250
        }
6251
6252 4
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6253
6254 4
        return (string) self::substr($str, -$n, null, $encoding);
6255
    }
6256
6257
    /**
6258
     * Limit the number of characters in a string.
6259
     *
6260
     * @param string $str      <p>The input string.</p>
6261
     * @param int    $length   [optional] <p>Default: 100</p>
6262
     * @param string $strAddOn [optional] <p>Default: …</p>
6263
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6264
     *
6265
     * @return string
6266
     */
6267
    public static function str_limit(
6268
        string $str,
6269
        int $length = 100,
6270
        string $strAddOn = '…',
6271
        string $encoding = 'UTF-8'
6272
    ): string {
6273 2
        if ($str === '' || $length <= 0) {
6274 2
            return '';
6275
        }
6276
6277 2
        if ($encoding === 'UTF-8') {
6278 2
            if ((int) \mb_strlen($str) <= $length) {
6279 2
                return $str;
6280
            }
6281
6282
            /** @noinspection UnnecessaryCastingInspection */
6283 2
            return (string) \mb_substr($str, 0, $length - (int) self::strlen($strAddOn)) . $strAddOn;
6284
        }
6285
6286
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6287
6288
        if ((int) self::strlen($str, $encoding) <= $length) {
6289
            return $str;
6290
        }
6291
6292
        return ((string) self::substr($str, 0, $length - (int) self::strlen($strAddOn), $encoding)) . $strAddOn;
6293
    }
6294
6295
    /**
6296
     * Limit the number of characters in a string, but also after the next word.
6297
     *
6298
     * @param string $str      <p>The input string.</p>
6299
     * @param int    $length   [optional] <p>Default: 100</p>
6300
     * @param string $strAddOn [optional] <p>Default: …</p>
6301
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6302
     *
6303
     * @return string
6304
     */
6305
    public static function str_limit_after_word(
6306
        string $str,
6307
        int $length = 100,
6308
        string $strAddOn = '…',
6309
        string $encoding = 'UTF-8'
6310
    ): string {
6311 6
        if ($str === '' || $length <= 0) {
6312 2
            return '';
6313
        }
6314
6315 6
        if ($encoding === 'UTF-8') {
6316
            /** @noinspection UnnecessaryCastingInspection */
6317 2
            if ((int) \mb_strlen($str) <= $length) {
6318 2
                return $str;
6319
            }
6320
6321 2
            if (\mb_substr($str, $length - 1, 1) === ' ') {
6322 2
                return ((string) \mb_substr($str, 0, $length - 1)) . $strAddOn;
6323
            }
6324
6325 2
            $str = \mb_substr($str, 0, $length);
6326
6327 2
            $array = \explode(' ', $str);
6328 2
            \array_pop($array);
6329 2
            $new_str = \implode(' ', $array);
6330
6331 2
            if ($new_str === '') {
6332 2
                return ((string) \mb_substr($str, 0, $length - 1)) . $strAddOn;
6333
            }
6334
        } else {
6335 4
            if ((int) self::strlen($str, $encoding) <= $length) {
6336
                return $str;
6337
            }
6338
6339 4
            if (self::substr($str, $length - 1, 1, $encoding) === ' ') {
6340 3
                return ((string) self::substr($str, 0, $length - 1, $encoding)) . $strAddOn;
6341
            }
6342
6343 1
            $str = self::substr($str, 0, $length, $encoding);
6344 1
            if ($str === false) {
6345
                return '' . $strAddOn;
6346
            }
6347
6348 1
            $array = \explode(' ', $str);
6349 1
            \array_pop($array);
6350 1
            $new_str = \implode(' ', $array);
6351
6352 1
            if ($new_str === '') {
6353
                return ((string) self::substr($str, 0, $length - 1, $encoding)) . $strAddOn;
6354
            }
6355
        }
6356
6357 3
        return $new_str . $strAddOn;
6358
    }
6359
6360
    /**
6361
     * Returns the longest common prefix between the string and $otherStr.
6362
     *
6363
     * @param string $str      <p>The input sting.</p>
6364
     * @param string $otherStr <p>Second string for comparison.</p>
6365
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6366
     *
6367
     * @return string
6368
     */
6369
    public static function str_longest_common_prefix(string $str, string $otherStr, string $encoding = 'UTF-8'): string
6370
    {
6371
        // init
6372 10
        $longestCommonPrefix = '';
6373
6374 10
        if ($encoding === 'UTF-8') {
6375 5
            $maxLength = (int) \min(
6376 5
                \mb_strlen($str),
6377 5
                \mb_strlen($otherStr)
6378
            );
6379
6380 5
            for ($i = 0; $i < $maxLength; ++$i) {
6381 4
                $char = \mb_substr($str, $i, 1);
6382
6383
                if (
6384 4
                    $char !== false
6385
                    &&
6386 4
                    $char === \mb_substr($otherStr, $i, 1)
6387
                ) {
6388 3
                    $longestCommonPrefix .= $char;
6389
                } else {
6390 3
                    break;
6391
                }
6392
            }
6393
        } else {
6394 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
6395
6396 5
            $maxLength = (int) \min(
6397 5
                self::strlen($str, $encoding),
6398 5
                self::strlen($otherStr, $encoding)
6399
            );
6400
6401 5
            for ($i = 0; $i < $maxLength; ++$i) {
6402 4
                $char = self::substr($str, $i, 1, $encoding);
6403
6404
                if (
6405 4
                    $char !== false
6406
                    &&
6407 4
                    $char === self::substr($otherStr, $i, 1, $encoding)
6408
                ) {
6409 3
                    $longestCommonPrefix .= $char;
6410
                } else {
6411 3
                    break;
6412
                }
6413
            }
6414
        }
6415
6416 10
        return $longestCommonPrefix;
6417
    }
6418
6419
    /**
6420
     * Returns the longest common substring between the string and $otherStr.
6421
     * In the case of ties, it returns that which occurs first.
6422
     *
6423
     * @param string $str
6424
     * @param string $otherStr <p>Second string for comparison.</p>
6425
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6426
     *
6427
     * @return string string with its $str being the longest common substring
6428
     */
6429
    public static function str_longest_common_substring(string $str, string $otherStr, string $encoding = 'UTF-8'): string
6430
    {
6431 11
        if ($str === '' || $otherStr === '') {
6432 2
            return '';
6433
        }
6434
6435
        // Uses dynamic programming to solve
6436
        // http://en.wikipedia.org/wiki/Longest_common_substring_problem
6437
6438 9
        if ($encoding === 'UTF-8') {
6439 4
            $strLength = (int) \mb_strlen($str);
6440 4
            $otherLength = (int) \mb_strlen($otherStr);
6441
        } else {
6442 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
6443
6444 5
            $strLength = (int) self::strlen($str, $encoding);
6445 5
            $otherLength = (int) self::strlen($otherStr, $encoding);
6446
        }
6447
6448
        // Return if either string is empty
6449 9
        if ($strLength === 0 || $otherLength === 0) {
6450
            return '';
6451
        }
6452
6453 9
        $len = 0;
6454 9
        $end = 0;
6455 9
        $table = \array_fill(
6456 9
            0,
6457 9
            $strLength + 1,
6458 9
            \array_fill(0, $otherLength + 1, 0)
6459
        );
6460
6461 9
        if ($encoding === 'UTF-8') {
6462 9
            for ($i = 1; $i <= $strLength; ++$i) {
6463 9
                for ($j = 1; $j <= $otherLength; ++$j) {
6464 9
                    $strChar = \mb_substr($str, $i - 1, 1);
6465 9
                    $otherChar = \mb_substr($otherStr, $j - 1, 1);
6466
6467 9
                    if ($strChar === $otherChar) {
6468 8
                        $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
6469 8
                        if ($table[$i][$j] > $len) {
6470 8
                            $len = $table[$i][$j];
6471 8
                            $end = $i;
6472
                        }
6473
                    } else {
6474 9
                        $table[$i][$j] = 0;
6475
                    }
6476
                }
6477
            }
6478
        } else {
6479
            for ($i = 1; $i <= $strLength; ++$i) {
6480
                for ($j = 1; $j <= $otherLength; ++$j) {
6481
                    $strChar = self::substr($str, $i - 1, 1, $encoding);
6482
                    $otherChar = self::substr($otherStr, $j - 1, 1, $encoding);
6483
6484
                    if ($strChar === $otherChar) {
6485
                        $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
6486
                        if ($table[$i][$j] > $len) {
6487
                            $len = $table[$i][$j];
6488
                            $end = $i;
6489
                        }
6490
                    } else {
6491
                        $table[$i][$j] = 0;
6492
                    }
6493
                }
6494
            }
6495
        }
6496
6497 9
        if ($encoding === 'UTF-8') {
6498 9
            return (string) \mb_substr($str, $end - $len, $len);
6499
        }
6500
6501
        return (string) self::substr($str, $end - $len, $len, $encoding);
6502
    }
6503
6504
    /**
6505
     * Returns the longest common suffix between the string and $otherStr.
6506
     *
6507
     * @param string $str
6508
     * @param string $otherStr <p>Second string for comparison.</p>
6509
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6510
     *
6511
     * @return string
6512
     */
6513
    public static function str_longest_common_suffix(string $str, string $otherStr, string $encoding = 'UTF-8'): string
6514
    {
6515 10
        if ($str === '' || $otherStr === '') {
6516 2
            return '';
6517
        }
6518
6519 8
        if ($encoding === 'UTF-8') {
6520 4
            $maxLength = (int) \min(
6521 4
                \mb_strlen($str, $encoding),
6522 4
                \mb_strlen($otherStr, $encoding)
6523
            );
6524
6525 4
            $longestCommonSuffix = '';
6526 4
            for ($i = 1; $i <= $maxLength; ++$i) {
6527 4
                $char = \mb_substr($str, -$i, 1);
6528
6529
                if (
6530 4
                    $char !== false
6531
                    &&
6532 4
                    $char === \mb_substr($otherStr, -$i, 1)
6533
                ) {
6534 3
                    $longestCommonSuffix = $char . $longestCommonSuffix;
6535
                } else {
6536 3
                    break;
6537
                }
6538
            }
6539
        } else {
6540 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
6541
6542 4
            $maxLength = (int) \min(
6543 4
                self::strlen($str, $encoding),
6544 4
                self::strlen($otherStr, $encoding)
6545
            );
6546
6547 4
            $longestCommonSuffix = '';
6548 4
            for ($i = 1; $i <= $maxLength; ++$i) {
6549 4
                $char = self::substr($str, -$i, 1, $encoding);
6550
6551
                if (
6552 4
                    $char !== false
6553
                    &&
6554 4
                    $char === self::substr($otherStr, -$i, 1, $encoding)
6555
                ) {
6556 3
                    $longestCommonSuffix = $char . $longestCommonSuffix;
6557
                } else {
6558 3
                    break;
6559
                }
6560
            }
6561
        }
6562
6563 8
        return $longestCommonSuffix;
6564
    }
6565
6566
    /**
6567
     * Returns true if $str matches the supplied pattern, false otherwise.
6568
     *
6569
     * @param string $str     <p>The input string.</p>
6570
     * @param string $pattern <p>Regex pattern to match against.</p>
6571
     *
6572
     * @return bool whether or not $str matches the pattern
6573
     */
6574
    public static function str_matches_pattern(string $str, string $pattern): bool
6575
    {
6576
        return (bool) \preg_match('/' . $pattern . '/u', $str);
6577
    }
6578
6579
    /**
6580
     * Returns whether or not a character exists at an index. Offsets may be
6581
     * negative to count from the last character in the string. Implements
6582
     * part of the ArrayAccess interface.
6583
     *
6584
     * @param string $str      <p>The input string.</p>
6585
     * @param int    $offset   <p>The index to check.</p>
6586
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6587
     *
6588
     * @return bool whether or not the index exists
6589
     */
6590
    public static function str_offset_exists(string $str, int $offset, string $encoding = 'UTF-8'): bool
6591
    {
6592
        // init
6593 6
        $length = (int) self::strlen($str, $encoding);
6594
6595 6
        if ($offset >= 0) {
6596 3
            return $length > $offset;
6597
        }
6598
6599 3
        return $length >= \abs($offset);
6600
    }
6601
6602
    /**
6603
     * Returns the character at the given index. Offsets may be negative to
6604
     * count from the last character in the string. Implements part of the
6605
     * ArrayAccess interface, and throws an OutOfBoundsException if the index
6606
     * does not exist.
6607
     *
6608
     * @param string $str      <p>The input string.</p>
6609
     * @param int    $index    <p>The <strong>index</strong> from which to retrieve the char.</p>
6610
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6611
     *
6612
     * @throws \OutOfBoundsException if the positive or negative offset does not exist
6613
     *
6614
     * @return string the character at the specified index
6615
     */
6616
    public static function str_offset_get(string $str, int $index, string $encoding = 'UTF-8'): string
6617
    {
6618
        // init
6619 2
        $length = (int) self::strlen($str);
6620
6621
        if (
6622 2
            ($index >= 0 && $length <= $index)
6623
            ||
6624 2
            $length < \abs($index)
6625
        ) {
6626 1
            throw new \OutOfBoundsException('No character exists at the index');
6627
        }
6628
6629 1
        return self::char_at($str, $index, $encoding);
6630
    }
6631
6632
    /**
6633
     * Pad a UTF-8 string to given length with another string.
6634
     *
6635
     * @param string     $str        <p>The input string.</p>
6636
     * @param int        $pad_length <p>The length of return string.</p>
6637
     * @param string     $pad_string [optional] <p>String to use for padding the input string.</p>
6638
     * @param int|string $pad_type   [optional] <p>
6639
     *                               Can be <strong>STR_PAD_RIGHT</strong> (default), [or string "right"]<br>
6640
     *                               <strong>STR_PAD_LEFT</strong> [or string "left"] or<br>
6641
     *                               <strong>STR_PAD_BOTH</strong> [or string "both"]
6642
     *                               </p>
6643
     * @param string     $encoding   [optional] <p>Default: 'UTF-8'</p>
6644
     *
6645
     * @return string returns the padded string
6646
     */
6647
    public static function str_pad(
6648
        string $str,
6649
        int $pad_length,
6650
        string $pad_string = ' ',
6651
        $pad_type = \STR_PAD_RIGHT,
6652
        string $encoding = 'UTF-8'
6653
    ): string {
6654 41
        if ($pad_length === 0 || $pad_string === '') {
6655 1
            return $str;
6656
        }
6657
6658 41
        if ($pad_type !== (int) $pad_type) {
6659 13
            if ($pad_type === 'left') {
6660 3
                $pad_type = \STR_PAD_LEFT;
6661 10
            } elseif ($pad_type === 'right') {
6662 6
                $pad_type = \STR_PAD_RIGHT;
6663 4
            } elseif ($pad_type === 'both') {
6664 3
                $pad_type = \STR_PAD_BOTH;
6665
            } else {
6666 1
                throw new \InvalidArgumentException(
6667 1
                    'Pad expects $padType to be "STR_PAD_*" or ' . "to be one of 'left', 'right' or 'both'"
6668
                );
6669
            }
6670
        }
6671
6672 40
        if ($encoding === 'UTF-8') {
6673 25
            $str_length = (int) \mb_strlen($str);
6674
6675 25
            if ($pad_length >= $str_length) {
6676
                switch ($pad_type) {
6677 25
                    case \STR_PAD_LEFT:
6678 8
                        $ps_length = (int) \mb_strlen($pad_string);
6679
6680 8
                        $diff = ($pad_length - $str_length);
6681
6682 8
                        $pre = (string) \mb_substr(
6683 8
                            \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
6684 8
                            0,
6685 8
                            $diff
6686
                        );
6687 8
                        $post = '';
6688
6689 8
                        break;
6690
6691 20
                    case \STR_PAD_BOTH:
6692 14
                        $diff = ($pad_length - $str_length);
6693
6694 14
                        $ps_length_left = (int) \floor($diff / 2);
6695
6696 14
                        $ps_length_right = (int) \ceil($diff / 2);
6697
6698 14
                        $pre = (string) \mb_substr(
6699 14
                            \str_repeat($pad_string, $ps_length_left),
6700 14
                            0,
6701 14
                            $ps_length_left
6702
                        );
6703 14
                        $post = (string) \mb_substr(
6704 14
                            \str_repeat($pad_string, $ps_length_right),
6705 14
                            0,
6706 14
                            $ps_length_right
6707
                        );
6708
6709 14
                        break;
6710
6711 9
                    case \STR_PAD_RIGHT:
6712
                    default:
6713 9
                        $ps_length = (int) \mb_strlen($pad_string);
6714
6715 9
                        $diff = ($pad_length - $str_length);
6716
6717 9
                        $post = (string) \mb_substr(
6718 9
                            \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
6719 9
                            0,
6720 9
                            $diff
6721
                        );
6722 9
                        $pre = '';
6723
                }
6724
6725 25
                return $pre . $str . $post;
6726
            }
6727
6728 3
            return $str;
6729
        }
6730
6731 15
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6732
6733 15
        $str_length = (int) self::strlen($str, $encoding);
6734
6735 15
        if ($pad_length >= $str_length) {
6736
            switch ($pad_type) {
6737 14
                case \STR_PAD_LEFT:
6738 5
                    $ps_length = (int) self::strlen($pad_string, $encoding);
6739
6740 5
                    $diff = ($pad_length - $str_length);
6741
6742 5
                    $pre = (string) self::substr(
6743 5
                        \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
6744 5
                        0,
6745 5
                        $diff,
6746 5
                        $encoding
6747
                    );
6748 5
                    $post = '';
6749
6750 5
                    break;
6751
6752 9
                case \STR_PAD_BOTH:
6753 3
                    $diff = ($pad_length - $str_length);
6754
6755 3
                    $ps_length_left = (int) \floor($diff / 2);
6756
6757 3
                    $ps_length_right = (int) \ceil($diff / 2);
6758
6759 3
                    $pre = (string) self::substr(
6760 3
                        \str_repeat($pad_string, $ps_length_left),
6761 3
                        0,
6762 3
                        $ps_length_left,
6763 3
                        $encoding
6764
                    );
6765 3
                    $post = (string) self::substr(
6766 3
                        \str_repeat($pad_string, $ps_length_right),
6767 3
                        0,
6768 3
                        $ps_length_right,
6769 3
                        $encoding
6770
                    );
6771
6772 3
                    break;
6773
6774 6
                case \STR_PAD_RIGHT:
6775
                default:
6776 6
                    $ps_length = (int) self::strlen($pad_string, $encoding);
6777
6778 6
                    $diff = ($pad_length - $str_length);
6779
6780 6
                    $post = (string) self::substr(
6781 6
                        \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
6782 6
                        0,
6783 6
                        $diff,
6784 6
                        $encoding
6785
                    );
6786 6
                    $pre = '';
6787
            }
6788
6789 14
            return $pre . $str . $post;
6790
        }
6791
6792 1
        return $str;
6793
    }
6794
6795
    /**
6796
     * Returns a new string of a given length such that both sides of the
6797
     * string are padded. Alias for pad() with a $padType of 'both'.
6798
     *
6799
     * @param string $str
6800
     * @param int    $length   <p>Desired string length after padding.</p>
6801
     * @param string $padStr   [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6802
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6803
     *
6804
     * @return string string with padding applied
6805
     */
6806
    public static function str_pad_both(
6807
        string $str,
6808
        int $length,
6809
        string $padStr = ' ',
6810
        string $encoding = 'UTF-8'
6811
    ): string {
6812 11
        return self::str_pad($str, $length, $padStr, \STR_PAD_BOTH, $encoding);
6813
    }
6814
6815
    /**
6816
     * Returns a new string of a given length such that the beginning of the
6817
     * string is padded. Alias for pad() with a $padType of 'left'.
6818
     *
6819
     * @param string $str
6820
     * @param int    $length   <p>Desired string length after padding.</p>
6821
     * @param string $padStr   [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6822
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6823
     *
6824
     * @return string string with left padding
6825
     */
6826
    public static function str_pad_left(
6827
        string $str,
6828
        int $length,
6829
        string $padStr = ' ',
6830
        string $encoding = 'UTF-8'
6831
    ): string {
6832 7
        return self::str_pad($str, $length, $padStr, \STR_PAD_LEFT, $encoding);
6833
    }
6834
6835
    /**
6836
     * Returns a new string of a given length such that the end of the string
6837
     * is padded. Alias for pad() with a $padType of 'right'.
6838
     *
6839
     * @param string $str
6840
     * @param int    $length   <p>Desired string length after padding.</p>
6841
     * @param string $padStr   [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6842
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6843
     *
6844
     * @return string string with right padding
6845
     */
6846
    public static function str_pad_right(
6847
        string $str,
6848
        int $length,
6849
        string $padStr = ' ',
6850
        string $encoding = 'UTF-8'
6851
    ): string {
6852 7
        return self::str_pad($str, $length, $padStr, \STR_PAD_RIGHT, $encoding);
6853
    }
6854
6855
    /**
6856
     * Repeat a string.
6857
     *
6858
     * @param string $str        <p>
6859
     *                           The string to be repeated.
6860
     *                           </p>
6861
     * @param int    $multiplier <p>
6862
     *                           Number of time the input string should be
6863
     *                           repeated.
6864
     *                           </p>
6865
     *                           <p>
6866
     *                           multiplier has to be greater than or equal to 0.
6867
     *                           If the multiplier is set to 0, the function
6868
     *                           will return an empty string.
6869
     *                           </p>
6870
     *
6871
     * @return string the repeated string
6872
     */
6873
    public static function str_repeat(string $str, int $multiplier): string
6874
    {
6875 9
        $str = self::filter($str);
6876
6877 9
        return \str_repeat($str, $multiplier);
6878
    }
6879
6880
    /**
6881
     * INFO: This is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe.
6882
     *
6883
     * Replace all occurrences of the search string with the replacement string
6884
     *
6885
     * @see http://php.net/manual/en/function.str-replace.php
6886
     *
6887
     * @param mixed $search  <p>
6888
     *                       The value being searched for, otherwise known as the needle.
6889
     *                       An array may be used to designate multiple needles.
6890
     *                       </p>
6891
     * @param mixed $replace <p>
6892
     *                       The replacement value that replaces found search
6893
     *                       values. An array may be used to designate multiple replacements.
6894
     *                       </p>
6895
     * @param mixed $subject <p>
6896
     *                       The string or array being searched and replaced on,
6897
     *                       otherwise known as the haystack.
6898
     *                       </p>
6899
     *                       <p>
6900
     *                       If subject is an array, then the search and
6901
     *                       replace is performed with every entry of
6902
     *                       subject, and the return value is an array as
6903
     *                       well.
6904
     *                       </p>
6905
     * @param int   $count   [optional] If passed, this will hold the number of matched and replaced needles
6906
     *
6907
     * @return mixed this function returns a string or an array with the replaced values
6908
     */
6909
    public static function str_replace(
6910
        $search,
6911
        $replace,
6912
        $subject,
6913
        int &$count = null
6914
    ) {
6915
        /**
6916
         * @psalm-suppress PossiblyNullArgument
6917
         */
6918 12
        return \str_replace(
6919 12
            $search,
6920 12
            $replace,
6921 12
            $subject,
6922 12
            $count
6923
        );
6924
    }
6925
6926
    /**
6927
     * Replaces $search from the beginning of string with $replacement.
6928
     *
6929
     * @param string $str         <p>The input string.</p>
6930
     * @param string $search      <p>The string to search for.</p>
6931
     * @param string $replacement <p>The replacement.</p>
6932
     *
6933
     * @return string string after the replacements
6934
     */
6935
    public static function str_replace_beginning(string $str, string $search, string $replacement): string
6936
    {
6937 17
        if ($str === '') {
6938 4
            if ($replacement === '') {
6939 2
                return '';
6940
            }
6941
6942 2
            if ($search === '') {
6943 2
                return $replacement;
6944
            }
6945
        }
6946
6947 13
        if ($search === '') {
6948 2
            return $str . $replacement;
6949
        }
6950
6951 11
        if (\strpos($str, $search) === 0) {
6952 9
            return $replacement . \substr($str, \strlen($search));
6953
        }
6954
6955 2
        return $str;
6956
    }
6957
6958
    /**
6959
     * Replaces $search from the ending of string with $replacement.
6960
     *
6961
     * @param string $str         <p>The input string.</p>
6962
     * @param string $search      <p>The string to search for.</p>
6963
     * @param string $replacement <p>The replacement.</p>
6964
     *
6965
     * @return string string after the replacements
6966
     */
6967
    public static function str_replace_ending(string $str, string $search, string $replacement): string
6968
    {
6969 17
        if ($str === '') {
6970 4
            if ($replacement === '') {
6971 2
                return '';
6972
            }
6973
6974 2
            if ($search === '') {
6975 2
                return $replacement;
6976
            }
6977
        }
6978
6979 13
        if ($search === '') {
6980 2
            return $str . $replacement;
6981
        }
6982
6983 11
        if (\strpos($str, $search, \strlen($str) - \strlen($search)) !== false) {
6984 8
            $str = \substr($str, 0, -\strlen($search)) . $replacement;
6985
        }
6986
6987 11
        return $str;
6988
    }
6989
6990
    /**
6991
     * Replace the first "$search"-term with the "$replace"-term.
6992
     *
6993
     * @param string $search
6994
     * @param string $replace
6995
     * @param string $subject
6996
     *
6997
     * @return string
6998
     *
6999
     * @psalm-suppress InvalidReturnType
7000
     */
7001
    public static function str_replace_first(string $search, string $replace, string $subject): string
7002
    {
7003 2
        $pos = self::strpos($subject, $search);
7004
7005 2
        if ($pos !== false) {
7006
            /**
7007
             * @psalm-suppress InvalidReturnStatement
7008
             */
7009 2
            return self::substr_replace(
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr_repl...)self::strlen($search)) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
7010 2
                $subject,
7011 2
                $replace,
7012 2
                $pos,
7013 2
                (int) self::strlen($search)
7014
            );
7015
        }
7016
7017 2
        return $subject;
7018
    }
7019
7020
    /**
7021
     * Replace the last "$search"-term with the "$replace"-term.
7022
     *
7023
     * @param string $search
7024
     * @param string $replace
7025
     * @param string $subject
7026
     *
7027
     * @return string
7028
     *
7029
     * @psalm-suppress InvalidReturnType
7030
     */
7031
    public static function str_replace_last(
7032
        string $search,
7033
        string $replace,
7034
        string $subject
7035
    ): string {
7036 2
        $pos = self::strrpos($subject, $search);
7037 2
        if ($pos !== false) {
7038
            /**
7039
             * @psalm-suppress InvalidReturnStatement
7040
             */
7041 2
            return self::substr_replace(
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr_repl...)self::strlen($search)) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
7042 2
                $subject,
7043 2
                $replace,
7044 2
                $pos,
7045 2
                (int) self::strlen($search)
7046
            );
7047
        }
7048
7049 2
        return $subject;
7050
    }
7051
7052
    /**
7053
     * Shuffles all the characters in the string.
7054
     *
7055
     * PS: uses random algorithm which is weak for cryptography purposes
7056
     *
7057
     * @param string $str      <p>The input string</p>
7058
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7059
     *
7060
     * @return string the shuffled string
7061
     */
7062
    public static function str_shuffle(string $str, string $encoding = 'UTF-8'): string
7063
    {
7064 5
        if ($encoding === 'UTF-8') {
7065 5
            $indexes = \range(0, (int) \mb_strlen($str) - 1);
7066
            /** @noinspection NonSecureShuffleUsageInspection */
7067 5
            \shuffle($indexes);
7068
7069
            // init
7070 5
            $shuffledStr = '';
7071
7072 5
            foreach ($indexes as &$i) {
7073 5
                $tmpSubStr = \mb_substr($str, $i, 1);
7074 5
                if ($tmpSubStr !== false) {
7075 5
                    $shuffledStr .= $tmpSubStr;
7076
                }
7077
            }
7078
        } else {
7079
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7080
7081
            $indexes = \range(0, (int) self::strlen($str, $encoding) - 1);
7082
            /** @noinspection NonSecureShuffleUsageInspection */
7083
            \shuffle($indexes);
7084
7085
            // init
7086
            $shuffledStr = '';
7087
7088
            foreach ($indexes as &$i) {
7089
                $tmpSubStr = self::substr($str, $i, 1, $encoding);
7090
                if ($tmpSubStr !== false) {
7091
                    $shuffledStr .= $tmpSubStr;
7092
                }
7093
            }
7094
        }
7095
7096 5
        return $shuffledStr;
7097
    }
7098
7099
    /**
7100
     * Returns the substring beginning at $start, and up to, but not including
7101
     * the index specified by $end. If $end is omitted, the function extracts
7102
     * the remaining string. If $end is negative, it is computed from the end
7103
     * of the string.
7104
     *
7105
     * @param string $str
7106
     * @param int    $start    <p>Initial index from which to begin extraction.</p>
7107
     * @param int    $end      [optional] <p>Index at which to end extraction. Default: null</p>
7108
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7109
     *
7110
     * @return false|string
7111
     *                      <p>The extracted substring.</p><p>If <i>str</i> is shorter than <i>start</i>
7112
     *                      characters long, <b>FALSE</b> will be returned.
7113
     */
7114
    public static function str_slice(
7115
        string $str,
7116
        int $start,
7117
        int $end = null,
7118
        string $encoding = 'UTF-8'
7119
    ) {
7120 18
        if ($encoding === 'UTF-8') {
7121 7
            if ($end === null) {
7122 1
                $length = (int) \mb_strlen($str);
7123 6
            } elseif ($end >= 0 && $end <= $start) {
7124 2
                return '';
7125 4
            } elseif ($end < 0) {
7126 1
                $length = (int) \mb_strlen($str) + $end - $start;
7127
            } else {
7128 3
                $length = $end - $start;
7129
            }
7130
7131 5
            return \mb_substr($str, $start, $length);
7132
        }
7133
7134 11
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
7135
7136 11
        if ($end === null) {
7137 5
            $length = (int) self::strlen($str, $encoding);
7138 6
        } elseif ($end >= 0 && $end <= $start) {
7139 2
            return '';
7140 4
        } elseif ($end < 0) {
7141 1
            $length = (int) self::strlen($str, $encoding) + $end - $start;
7142
        } else {
7143 3
            $length = $end - $start;
7144
        }
7145
7146 9
        return self::substr($str, $start, $length, $encoding);
7147
    }
7148
7149
    /**
7150
     * Convert a string to e.g.: "snake_case"
7151
     *
7152
     * @param string $str
7153
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7154
     *
7155
     * @return string string in snake_case
7156
     */
7157
    public static function str_snakeize(string $str, string $encoding = 'UTF-8'): string
7158
    {
7159 22
        if ($str === '') {
7160
            return '';
7161
        }
7162
7163 22
        $str = \str_replace(
7164 22
            '-',
7165 22
            '_',
7166 22
            self::normalize_whitespace($str)
7167
        );
7168
7169 22
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7170 19
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7171
        }
7172
7173 22
        $str = (string) \preg_replace_callback(
7174 22
            '/([\\p{N}|\\p{Lu}])/u',
7175
            /**
7176
             * @param string[] $matches
7177
             *
7178
             * @return string
7179
             */
7180
            static function (array $matches) use ($encoding): string {
7181 9
                $match = $matches[1];
7182 9
                $matchInt = (int) $match;
7183
7184 9
                if ((string) $matchInt === $match) {
7185 4
                    return '_' . $match . '_';
7186
                }
7187
7188 5
                if ($encoding === 'UTF-8') {
7189 5
                    return '_' . \mb_strtolower($match);
7190
                }
7191
7192
                return '_' . self::strtolower($match, $encoding);
7193 22
            },
7194 22
            $str
7195
        );
7196
7197 22
        $str = (string) \preg_replace(
7198
            [
7199 22
                '/\\s+/u',           // convert spaces to "_"
7200
                '/^\\s+|\\s+$/u', // trim leading & trailing spaces
7201
                '/_+/',                 // remove double "_"
7202
            ],
7203
            [
7204 22
                '_',
7205
                '',
7206
                '_',
7207
            ],
7208 22
            $str
7209
        );
7210
7211 22
        return \trim(\trim($str, '_')); // trim leading & trailing "_" + whitespace
7212
    }
7213
7214
    /**
7215
     * Sort all characters according to code points.
7216
     *
7217
     * @param string $str    <p>A UTF-8 string.</p>
7218
     * @param bool   $unique <p>Sort unique. If <strong>true</strong>, repeated characters are ignored.</p>
7219
     * @param bool   $desc   <p>If <strong>true</strong>, will sort characters in reverse code point order.</p>
7220
     *
7221
     * @return string string of sorted characters
7222
     */
7223
    public static function str_sort(string $str, bool $unique = false, bool $desc = false): string
7224
    {
7225 2
        $array = self::codepoints($str);
7226
7227 2
        if ($unique) {
7228 2
            $array = \array_flip(\array_flip($array));
7229
        }
7230
7231 2
        if ($desc) {
7232 2
            \arsort($array);
0 ignored issues
show
Bug introduced by
It seems like $array can also be of type null; however, parameter $array of arsort() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

7232
            \arsort(/** @scrutinizer ignore-type */ $array);
Loading history...
7233
        } else {
7234 2
            \asort($array);
0 ignored issues
show
Bug introduced by
It seems like $array can also be of type null; however, parameter $array of asort() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

7234
            \asort(/** @scrutinizer ignore-type */ $array);
Loading history...
7235
        }
7236
7237 2
        return self::string($array);
7238
    }
7239
7240
    /**
7241
     * Convert a string to an array of Unicode characters.
7242
     *
7243
     * @param int|int[]|string|string[] $str                <p>The string to split into array.</p>
7244
     * @param int                       $length             [optional] <p>Max character length of each array
7245
     *                                                      element.</p>
7246
     * @param bool                      $cleanUtf8          [optional] <p>Remove non UTF-8 chars from the string.</p>
7247
     * @param bool                      $tryToUseMbFunction [optional] <p>Set to false, if you don't want to use
7248
     *                                                      "mb_substr"</p>
7249
     *
7250
     * @return array
7251
     *               <p>An array containing chunks of the input.</p>
7252
     */
7253
    public static function str_split(
7254
        $str,
7255
        int $length = 1,
7256
        bool $cleanUtf8 = false,
7257
        bool $tryToUseMbFunction = true
7258
    ): array {
7259 89
        if ($length <= 0) {
7260 3
            return [];
7261
        }
7262
7263 88
        if (\is_array($str) === true) {
7264 2
            foreach ($str as $k => &$v) {
7265 2
                $v = self::str_split(
7266 2
                    $v,
7267 2
                    $length,
7268 2
                    $cleanUtf8,
7269 2
                    $tryToUseMbFunction
7270
                );
7271
            }
7272
7273 2
            return $str;
7274
        }
7275
7276
        // init
7277 88
        $str = (string) $str;
7278
7279 88
        if ($str === '') {
7280 13
            return [];
7281
        }
7282
7283 85
        if ($cleanUtf8 === true) {
7284 19
            $str = self::clean($str);
7285
        }
7286
7287
        if (
7288 85
            $tryToUseMbFunction === true
7289
            &&
7290 85
            self::$SUPPORT['mbstring'] === true
7291
        ) {
7292 81
            $iMax = \mb_strlen($str);
7293 81
            if ($iMax <= 127) {
7294 75
                $ret = [];
7295 75
                for ($i = 0; $i < $iMax; ++$i) {
7296 75
                    $ret[] = \mb_substr($str, $i, 1);
7297
                }
7298
            } else {
7299 16
                $retArray = [];
7300 16
                \preg_match_all('/./us', $str, $retArray);
7301 81
                $ret = $retArray[0] ?? [];
7302
            }
7303 23
        } elseif (self::$SUPPORT['pcre_utf8'] === true) {
7304 17
            $retArray = [];
7305 17
            \preg_match_all('/./us', $str, $retArray);
7306 17
            $ret = $retArray[0] ?? [];
7307
        } else {
7308
7309
            // fallback
7310
7311 8
            $ret = [];
7312 8
            $len = \strlen($str);
7313
7314
            /** @noinspection ForeachInvariantsInspection */
7315 8
            for ($i = 0; $i < $len; ++$i) {
7316 8
                if (($str[$i] & "\x80") === "\x00") {
7317 8
                    $ret[] = $str[$i];
7318
                } elseif (
7319 8
                    isset($str[$i + 1])
7320
                    &&
7321 8
                    ($str[$i] & "\xE0") === "\xC0"
7322
                ) {
7323 4
                    if (($str[$i + 1] & "\xC0") === "\x80") {
7324 4
                        $ret[] = $str[$i] . $str[$i + 1];
7325
7326 4
                        ++$i;
7327
                    }
7328
                } elseif (
7329 6
                    isset($str[$i + 2])
7330
                    &&
7331 6
                    ($str[$i] & "\xF0") === "\xE0"
7332
                ) {
7333
                    if (
7334 6
                        ($str[$i + 1] & "\xC0") === "\x80"
7335
                        &&
7336 6
                        ($str[$i + 2] & "\xC0") === "\x80"
7337
                    ) {
7338 6
                        $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2];
7339
7340 6
                        $i += 2;
7341
                    }
7342
                } elseif (
7343
                    isset($str[$i + 3])
7344
                    &&
7345
                    ($str[$i] & "\xF8") === "\xF0"
7346
                ) {
7347
                    if (
7348
                        ($str[$i + 1] & "\xC0") === "\x80"
7349
                        &&
7350
                        ($str[$i + 2] & "\xC0") === "\x80"
7351
                        &&
7352
                        ($str[$i + 3] & "\xC0") === "\x80"
7353
                    ) {
7354
                        $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3];
7355
7356
                        $i += 3;
7357
                    }
7358
                }
7359
            }
7360
        }
7361
7362 85
        if ($length > 1) {
7363 11
            $ret = \array_chunk($ret, $length);
7364
7365 11
            return \array_map(
7366
                static function (array &$item): string {
7367 11
                    return \implode('', $item);
7368 11
                },
7369 11
                $ret
7370
            );
7371
        }
7372
7373 78
        if (isset($ret[0]) && $ret[0] === '') {
7374
            return [];
7375
        }
7376
7377 78
        return $ret;
7378
    }
7379
7380
    /**
7381
     * Splits the string with the provided regular expression, returning an
7382
     * array of Stringy objects. An optional integer $limit will truncate the
7383
     * results.
7384
     *
7385
     * @param string $str
7386
     * @param string $pattern <p>The regex with which to split the string.</p>
7387
     * @param int    $limit   [optional] <p>Maximum number of results to return. Default: -1 === no limit</p>
7388
     *
7389
     * @return string[] an array of strings
7390
     */
7391
    public static function str_split_pattern(string $str, string $pattern, int $limit = -1): array
7392
    {
7393 16
        if ($limit === 0) {
7394 2
            return [];
7395
        }
7396
7397 14
        if ($pattern === '') {
7398 1
            return [$str];
7399
        }
7400
7401 13
        if (self::$SUPPORT['mbstring'] === true) {
7402 13
            if ($limit >= 0) {
7403
                /** @noinspection PhpComposerExtensionStubsInspection */
7404 8
                $resultTmp = \mb_split($pattern, $str);
7405
7406 8
                $result = [];
7407 8
                foreach ($resultTmp as $itemTmp) {
7408 8
                    if ($limit === 0) {
7409 4
                        break;
7410
                    }
7411 8
                    --$limit;
7412
7413 8
                    $result[] = $itemTmp;
7414
                }
7415
7416 8
                return $result;
7417
            }
7418
7419
            /** @noinspection PhpComposerExtensionStubsInspection */
7420 5
            return \mb_split($pattern, $str);
7421
        }
7422
7423
        if ($limit > 0) {
7424
            ++$limit;
7425
        } else {
7426
            $limit = -1;
7427
        }
7428
7429
        $array = \preg_split('/' . \preg_quote($pattern, '/') . '/u', $str, $limit);
7430
7431
        if ($array === false) {
7432
            return [];
7433
        }
7434
7435
        if ($limit > 0 && \count($array) === $limit) {
7436
            \array_pop($array);
7437
        }
7438
7439
        return $array;
7440
    }
7441
7442
    /**
7443
     * Check if the string starts with the given substring.
7444
     *
7445
     * @param string $haystack <p>The string to search in.</p>
7446
     * @param string $needle   <p>The substring to search for.</p>
7447
     *
7448
     * @return bool
7449
     */
7450
    public static function str_starts_with(string $haystack, string $needle): bool
7451
    {
7452 19
        if ($needle === '') {
7453 2
            return true;
7454
        }
7455
7456 19
        if ($haystack === '') {
7457
            return false;
7458
        }
7459
7460 19
        return \strpos($haystack, $needle) === 0;
7461
    }
7462
7463
    /**
7464
     * Returns true if the string begins with any of $substrings, false otherwise.
7465
     *
7466
     * - case-sensitive
7467
     *
7468
     * @param string $str        <p>The input string.</p>
7469
     * @param array  $substrings <p>Substrings to look for.</p>
7470
     *
7471
     * @return bool whether or not $str starts with $substring
7472
     */
7473
    public static function str_starts_with_any(string $str, array $substrings): bool
7474
    {
7475 8
        if ($str === '') {
7476
            return false;
7477
        }
7478
7479 8
        if ($substrings === []) {
7480
            return false;
7481
        }
7482
7483 8
        foreach ($substrings as &$substring) {
7484 8
            if (self::str_starts_with($str, $substring)) {
7485 8
                return true;
7486
            }
7487
        }
7488
7489 6
        return false;
7490
    }
7491
7492
    /**
7493
     * Gets the substring after the first occurrence of a separator.
7494
     *
7495
     * @param string $str       <p>The input string.</p>
7496
     * @param string $separator <p>The string separator.</p>
7497
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7498
     *
7499
     * @return string
7500
     */
7501
    public static function str_substr_after_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
7502
    {
7503 1
        if ($separator === '' || $str === '') {
7504 1
            return '';
7505
        }
7506
7507 1
        if ($encoding === 'UTF-8') {
7508 1
            $offset = \mb_strpos($str, $separator);
7509 1
            if ($offset === false) {
7510 1
                return '';
7511
            }
7512
7513 1
            return (string) \mb_substr(
7514 1
                $str,
7515 1
                $offset + (int) \mb_strlen($separator)
7516
            );
7517
        }
7518
7519
        $offset = self::strpos($str, $separator, 0, $encoding);
7520
        if ($offset === false) {
7521
            return '';
7522
        }
7523
7524
        return (string) \mb_substr(
7525
            $str,
7526
            $offset + (int) self::strlen($separator, $encoding),
7527
            null,
7528
            $encoding
7529
        );
7530
    }
7531
7532
    /**
7533
     * Gets the substring after the last occurrence of a separator.
7534
     *
7535
     * @param string $str       <p>The input string.</p>
7536
     * @param string $separator <p>The string separator.</p>
7537
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7538
     *
7539
     * @return string
7540
     */
7541
    public static function str_substr_after_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
7542
    {
7543 1
        if ($separator === '' || $str === '') {
7544 1
            return '';
7545
        }
7546
7547 1
        if ($encoding === 'UTF-8') {
7548 1
            $offset = \mb_strrpos($str, $separator);
7549 1
            if ($offset === false) {
7550 1
                return '';
7551
            }
7552
7553 1
            return (string) \mb_substr(
7554 1
                $str,
7555 1
                $offset + (int) \mb_strlen($separator)
7556
            );
7557
        }
7558
7559
        $offset = self::strrpos($str, $separator, 0, $encoding);
7560
        if ($offset === false) {
7561
            return '';
7562
        }
7563
7564
        return (string) self::substr(
7565
            $str,
7566
            $offset + (int) self::strlen($separator, $encoding),
7567
            null,
7568
            $encoding
7569
        );
7570
    }
7571
7572
    /**
7573
     * Gets the substring before the first occurrence of a separator.
7574
     *
7575
     * @param string $str       <p>The input string.</p>
7576
     * @param string $separator <p>The string separator.</p>
7577
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7578
     *
7579
     * @return string
7580
     */
7581
    public static function str_substr_before_first_separator(
7582
        string $str,
7583
        string $separator,
7584
        string $encoding = 'UTF-8'
7585
    ): string {
7586 1
        if ($separator === '' || $str === '') {
7587 1
            return '';
7588
        }
7589
7590 1
        if ($encoding === 'UTF-8') {
7591 1
            $offset = \mb_strpos($str, $separator);
7592 1
            if ($offset === false) {
7593 1
                return '';
7594
            }
7595
7596 1
            return (string) \mb_substr(
7597 1
                $str,
7598 1
                0,
7599 1
                $offset
7600
            );
7601
        }
7602
7603
        $offset = self::strpos($str, $separator, 0, $encoding);
7604
        if ($offset === false) {
7605
            return '';
7606
        }
7607
7608
        return (string) self::substr(
7609
            $str,
7610
            0,
7611
            $offset,
7612
            $encoding
7613
        );
7614
    }
7615
7616
    /**
7617
     * Gets the substring before the last occurrence of a separator.
7618
     *
7619
     * @param string $str       <p>The input string.</p>
7620
     * @param string $separator <p>The string separator.</p>
7621
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7622
     *
7623
     * @return string
7624
     */
7625
    public static function str_substr_before_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
7626
    {
7627 1
        if ($separator === '' || $str === '') {
7628 1
            return '';
7629
        }
7630
7631 1
        if ($encoding === 'UTF-8') {
7632 1
            $offset = \mb_strrpos($str, $separator);
7633 1
            if ($offset === false) {
7634 1
                return '';
7635
            }
7636
7637 1
            return (string) \mb_substr(
7638 1
                $str,
7639 1
                0,
7640 1
                $offset
7641
            );
7642
        }
7643
7644
        $offset = self::strrpos($str, $separator, 0, $encoding);
7645
        if ($offset === false) {
7646
            return '';
7647
        }
7648
7649
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
7650
7651
        return (string) self::substr(
7652
            $str,
7653
            0,
7654
            $offset,
7655
            $encoding
7656
        );
7657
    }
7658
7659
    /**
7660
     * Gets the substring after (or before via "$beforeNeedle") the first occurrence of the "$needle".
7661
     *
7662
     * @param string $str          <p>The input string.</p>
7663
     * @param string $needle       <p>The string to look for.</p>
7664
     * @param bool   $beforeNeedle [optional] <p>Default: false</p>
7665
     * @param string $encoding     [optional] <p>Default: 'UTF-8'</p>
7666
     *
7667
     * @return string
7668
     */
7669
    public static function str_substr_first(
7670
        string $str,
7671
        string $needle,
7672
        bool $beforeNeedle = false,
7673
        string $encoding = 'UTF-8'
7674
    ): string {
7675 2
        if ($str === '' || $needle === '') {
7676 2
            return '';
7677
        }
7678
7679 2
        if ($encoding === 'UTF-8') {
7680 2
            if ($beforeNeedle === true) {
7681 1
                $part = \mb_strstr(
7682 1
                    $str,
7683 1
                    $needle,
7684 1
                    $beforeNeedle
7685
                );
7686
            } else {
7687 1
                $part = \mb_strstr(
7688 1
                    $str,
7689 2
                    $needle
7690
                );
7691
            }
7692
        } else {
7693
            $part = self::strstr(
7694
                $str,
7695
                $needle,
7696
                $beforeNeedle,
7697
                $encoding
7698
            );
7699
        }
7700
7701 2
        return $part === false ? '' : $part;
7702
    }
7703
7704
    /**
7705
     * Gets the substring after (or before via "$beforeNeedle") the last occurrence of the "$needle".
7706
     *
7707
     * @param string $str          <p>The input string.</p>
7708
     * @param string $needle       <p>The string to look for.</p>
7709
     * @param bool   $beforeNeedle [optional] <p>Default: false</p>
7710
     * @param string $encoding     [optional] <p>Default: 'UTF-8'</p>
7711
     *
7712
     * @return string
7713
     */
7714
    public static function str_substr_last(
7715
        string $str,
7716
        string $needle,
7717
        bool $beforeNeedle = false,
7718
        string $encoding = 'UTF-8'
7719
    ): string {
7720 2
        if ($str === '' || $needle === '') {
7721 2
            return '';
7722
        }
7723
7724 2
        if ($encoding === 'UTF-8') {
7725 2
            if ($beforeNeedle === true) {
7726 1
                $part = \mb_strrchr(
7727 1
                    $str,
7728 1
                    $needle,
7729 1
                    $beforeNeedle
7730
                );
7731
            } else {
7732 1
                $part = \mb_strrchr(
7733 1
                    $str,
7734 2
                    $needle
7735
                );
7736
            }
7737
        } else {
7738
            $part = self::strrchr(
7739
                $str,
7740
                $needle,
7741
                $beforeNeedle,
7742
                $encoding
7743
            );
7744
        }
7745
7746 2
        return $part === false ? '' : $part;
7747
    }
7748
7749
    /**
7750
     * Surrounds $str with the given substring.
7751
     *
7752
     * @param string $str
7753
     * @param string $substring <p>The substring to add to both sides.</P>
7754
     *
7755
     * @return string string with the substring both prepended and appended
7756
     */
7757
    public static function str_surround(string $str, string $substring): string
7758
    {
7759 5
        return $substring . $str . $substring;
7760
    }
7761
7762
    /**
7763
     * Returns a trimmed string with the first letter of each word capitalized.
7764
     * Also accepts an array, $ignore, allowing you to list words not to be
7765
     * capitalized.
7766
     *
7767
     * @param string              $str
7768
     * @param array|string[]|null $ignore                [optional] <p>An array of words not to capitalize or null.
7769
     *                                                   Default: null</p>
7770
     * @param string              $encoding              [optional] <p>Default: 'UTF-8'</p>
7771
     * @param bool                $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
7772
     * @param string|null         $lang                  [optional] <p>Set the language for special cases: az, el, lt,
7773
     *                                                   tr</p>
7774
     * @param bool                $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ ->
7775
     *                                                   ß</p>
7776
     * @param bool                $useTrimFirst          [optional] <p>true === trim the input string, first</p>
7777
     *
7778
     * @return string the titleized string
7779
     */
7780
    public static function str_titleize(
7781
        string $str,
7782
        array $ignore = null,
7783
        string $encoding = 'UTF-8',
7784
        bool $cleanUtf8 = false,
7785
        string $lang = null,
7786
        bool $tryToKeepStringLength = false,
7787
        bool $useTrimFirst = true
7788
    ): string {
7789 6
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7790 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7791
        }
7792
7793 6
        if ($useTrimFirst === true) {
7794 6
            $str = \trim($str);
7795
        }
7796
7797 6
        if ($cleanUtf8 === true) {
7798
            $str = self::clean($str);
7799
        }
7800
7801 6
        $useMbFunction = $lang === null && $tryToKeepStringLength === false;
7802
7803 6
        return (string) \preg_replace_callback(
7804 6
            '/([^\\s]+)/u',
7805
            static function (array $match) use ($tryToKeepStringLength, $lang, $ignore, $useMbFunction, $encoding): string {
7806 6
                if ($ignore !== null && \in_array($match[0], $ignore, true)) {
7807 2
                    return $match[0];
7808
                }
7809
7810 6
                if ($useMbFunction === true) {
7811 6
                    if ($encoding === 'UTF-8') {
7812 6
                        return \mb_strtoupper(\mb_substr($match[0], 0, 1))
7813 6
                               . \mb_strtolower(\mb_substr($match[0], 1));
7814
                    }
7815
7816
                    return \mb_strtoupper(\mb_substr($match[0], 0, 1, $encoding), $encoding)
7817
                           . \mb_strtolower(\mb_substr($match[0], 1, null, $encoding), $encoding);
7818
                }
7819
7820
                return self::ucfirst(
7821
                    self::strtolower(
7822
                        $match[0],
7823
                        $encoding,
7824
                        false,
7825
                        $lang,
7826
                        $tryToKeepStringLength
7827
                    ),
7828
                    $encoding,
7829
                    false,
7830
                    $lang,
7831
                    $tryToKeepStringLength
7832
                );
7833 6
            },
7834 6
            $str
7835
        );
7836
    }
7837
7838
    /**
7839
     * Returns a trimmed string in proper title case.
7840
     *
7841
     * Also accepts an array, $ignore, allowing you to list words not to be
7842
     * capitalized.
7843
     *
7844
     * Adapted from John Gruber's script.
7845
     *
7846
     * @see https://gist.github.com/gruber/9f9e8650d68b13ce4d78
7847
     *
7848
     * @param string $str
7849
     * @param array  $ignore   <p>An array of words not to capitalize.</p>
7850
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7851
     *
7852
     * @return string the titleized string
7853
     */
7854
    public static function str_titleize_for_humans(string $str, array $ignore = [], string $encoding = 'UTF-8'): string
7855
    {
7856 35
        $smallWords = \array_merge(
7857
            [
7858 35
                '(?<!q&)a',
7859
                'an',
7860
                'and',
7861
                'as',
7862
                'at(?!&t)',
7863
                'but',
7864
                'by',
7865
                'en',
7866
                'for',
7867
                'if',
7868
                'in',
7869
                'of',
7870
                'on',
7871
                'or',
7872
                'the',
7873
                'to',
7874
                'v[.]?',
7875
                'via',
7876
                'vs[.]?',
7877
            ],
7878 35
            $ignore
7879
        );
7880
7881 35
        $smallWordsRx = \implode('|', $smallWords);
7882 35
        $apostropheRx = '(?x: [\'’] [[:lower:]]* )?';
7883
7884 35
        $str = \trim($str);
7885
7886 35
        if (self::has_lowercase($str) === false) {
7887 2
            $str = self::strtolower($str, $encoding);
7888
        }
7889
7890
        // the main substitutions
7891 35
        $str = (string) \preg_replace_callback(
7892
            '~\\b (_*) (?:                                                         # 1. Leading underscore and
7893
                        ( (?<=[ ][/\\\\]) [[:alpha:]]+ [-_[:alpha:]/\\\\]+ |              # 2. file path or 
7894 35
                          [-_[:alpha:]]+ [@.:] [-_[:alpha:]@.:/]+ ' . $apostropheRx . ' ) #    URL, domain, or email
7895
                        |
7896 35
                        ( (?i: ' . $smallWordsRx . ' ) ' . $apostropheRx . ' )            # 3. or small word (case-insensitive)
7897
                        |
7898 35
                        ( [[:alpha:]] [[:lower:]\'’()\[\]{}]* ' . $apostropheRx . ' )     # 4. or word w/o internal caps
7899
                        |
7900 35
                        ( [[:alpha:]] [[:alpha:]\'’()\[\]{}]* ' . $apostropheRx . ' )     # 5. or some other word
7901
                      ) (_*) \\b                                                          # 6. With trailing underscore
7902
                    ~ux',
7903
            /**
7904
             * @param string[] $matches
7905
             *
7906
             * @return string
7907
             */
7908
            static function (array $matches) use ($encoding): string {
7909
                // preserve leading underscore
7910 35
                $str = $matches[1];
7911 35
                if ($matches[2]) {
7912
                    // preserve URLs, domains, emails and file paths
7913 5
                    $str .= $matches[2];
7914 35
                } elseif ($matches[3]) {
7915
                    // lower-case small words
7916 25
                    $str .= self::strtolower($matches[3], $encoding);
7917 35
                } elseif ($matches[4]) {
7918
                    // capitalize word w/o internal caps
7919 34
                    $str .= static::str_upper_first($matches[4], $encoding);
7920
                } else {
7921
                    // preserve other kinds of word (iPhone)
7922 7
                    $str .= $matches[5];
7923
                }
7924
                // Preserve trailing underscore
7925 35
                $str .= $matches[6];
7926
7927 35
                return $str;
7928 35
            },
7929 35
            $str
7930
        );
7931
7932
        // Exceptions for small words: capitalize at start of title...
7933 35
        $str = (string) \preg_replace_callback(
7934
            '~(  \\A [[:punct:]]*            # start of title...
7935
                      |  [:.;?!][ ]+                # or of subsentence...
7936
                      |  [ ][\'"“‘(\[][ ]* )        # or of inserted subphrase...
7937 35
                      ( ' . $smallWordsRx . ' ) \\b # ...followed by small word
7938
                     ~uxi',
7939
            /**
7940
             * @param string[] $matches
7941
             *
7942
             * @return string
7943
             */
7944
            static function (array $matches) use ($encoding): string {
7945 11
                return $matches[1] . static::str_upper_first($matches[2], $encoding);
7946 35
            },
7947 35
            $str
7948
        );
7949
7950
        // ...and end of title
7951 35
        $str = (string) \preg_replace_callback(
7952 35
            '~\\b ( ' . $smallWordsRx . ' ) # small word...
7953
                      (?= [[:punct:]]* \Z          # ...at the end of the title...
7954
                      |   [\'"’”)\]] [ ] )         # ...or of an inserted subphrase?
7955
                     ~uxi',
7956
            /**
7957
             * @param string[] $matches
7958
             *
7959
             * @return string
7960
             */
7961
            static function (array $matches) use ($encoding): string {
7962 3
                return static::str_upper_first($matches[1], $encoding);
7963 35
            },
7964 35
            $str
7965
        );
7966
7967
        // Exceptions for small words in hyphenated compound words.
7968
        // e.g. "in-flight" -> In-Flight
7969 35
        $str = (string) \preg_replace_callback(
7970
            '~\\b
7971
                        (?<! -)                   # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (in-flight)
7972 35
                        ( ' . $smallWordsRx . ' )
7973
                        (?= -[[:alpha:]]+)        # lookahead for "-someword"
7974
                       ~uxi',
7975
            /**
7976
             * @param string[] $matches
7977
             *
7978
             * @return string
7979
             */
7980
            static function (array $matches) use ($encoding): string {
7981
                return static::str_upper_first($matches[1], $encoding);
7982 35
            },
7983 35
            $str
7984
        );
7985
7986
        // e.g. "Stand-in" -> "Stand-In" (Stand is already capped at this point)
7987 35
        $str = (string) \preg_replace_callback(
7988
            '~\\b
7989
                      (?<!…)                    # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (stand-in)
7990
                      ( [[:alpha:]]+- )         # $1 = first word and hyphen, should already be properly capped
7991 35
                      ( ' . $smallWordsRx . ' ) # ...followed by small word
7992
                      (?!	- )                 # Negative lookahead for another -
7993
                     ~uxi',
7994
            /**
7995
             * @param string[] $matches
7996
             *
7997
             * @return string
7998
             */
7999
            static function (array $matches) use ($encoding): string {
8000
                return $matches[1] . static::str_upper_first($matches[2], $encoding);
8001 35
            },
8002 35
            $str
8003
        );
8004
8005 35
        return $str;
8006
    }
8007
8008
    /**
8009
     * Get a binary representation of a specific string.
8010
     *
8011
     * @param string $str <p>The input string.</p>
8012
     *
8013
     * @return false|string
8014
     *                      <p>false on error</p>
8015
     */
8016
    public static function str_to_binary(string $str)
8017
    {
8018 2
        $value = \unpack('H*', $str);
8019 2
        if ($value === false) {
8020
            return false;
8021
        }
8022
8023
        /** @noinspection OffsetOperationsInspection */
8024 2
        return \base_convert($value[1], 16, 2);
8025
    }
8026
8027
    /**
8028
     * @param string   $str
8029
     * @param bool     $removeEmptyValues <p>Remove empty values.</p>
8030
     * @param int|null $removeShortValues <p>The min. string length or null to disable</p>
8031
     *
8032
     * @return string[]
8033
     */
8034
    public static function str_to_lines(string $str, bool $removeEmptyValues = false, int $removeShortValues = null): array
8035
    {
8036 17
        if ($str === '') {
8037 1
            return $removeEmptyValues === true ? [] : [''];
8038
        }
8039
8040 16
        if (self::$SUPPORT['mbstring'] === true) {
8041
            /** @noinspection PhpComposerExtensionStubsInspection */
8042 16
            $return = \mb_split("[\r\n]{1,2}", $str);
8043
        } else {
8044
            $return = \preg_split("/[\r\n]{1,2}/u", $str);
8045
        }
8046
8047 16
        if ($return === false) {
8048
            return $removeEmptyValues === true ? [] : [''];
8049
        }
8050
8051
        if (
8052 16
            $removeShortValues === null
8053
            &&
8054 16
            $removeEmptyValues === false
8055
        ) {
8056 16
            return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
8057
        }
8058
8059
        return self::reduce_string_array(
8060
            $return,
8061
            $removeEmptyValues,
8062
            $removeShortValues
8063
        );
8064
    }
8065
8066
    /**
8067
     * Convert a string into an array of words.
8068
     *
8069
     * @param string   $str
8070
     * @param string   $charList          <p>Additional chars for the definition of "words".</p>
8071
     * @param bool     $removeEmptyValues <p>Remove empty values.</p>
8072
     * @param int|null $removeShortValues <p>The min. string length or null to disable</p>
8073
     *
8074
     * @return string[]
8075
     */
8076
    public static function str_to_words(
8077
        string $str,
8078
        string $charList = '',
8079
        bool $removeEmptyValues = false,
8080
        int $removeShortValues = null
8081
    ): array {
8082 13
        if ($str === '') {
8083 4
            return $removeEmptyValues === true ? [] : [''];
8084
        }
8085
8086 13
        $charList = self::rxClass($charList, '\pL');
8087
8088 13
        $return = \preg_split("/({$charList}+(?:[\p{Pd}’']{$charList}+)*)/u", $str, -1, \PREG_SPLIT_DELIM_CAPTURE);
8089 13
        if ($return === false) {
8090
            return $removeEmptyValues === true ? [] : [''];
8091
        }
8092
8093
        if (
8094 13
            $removeShortValues === null
8095
            &&
8096 13
            $removeEmptyValues === false
8097
        ) {
8098 13
            return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
8099
        }
8100
8101 2
        $tmpReturn = self::reduce_string_array(
8102 2
            $return,
8103 2
            $removeEmptyValues,
8104 2
            $removeShortValues
8105
        );
8106
8107 2
        foreach ($tmpReturn as &$item) {
8108 2
            $item = (string) $item;
8109
        }
8110
8111 2
        return $tmpReturn;
8112
    }
8113
8114
    /**
8115
     * alias for "UTF8::to_ascii()"
8116
     *
8117
     * @param string $str
8118
     * @param string $unknown
8119
     * @param bool   $strict
8120
     *
8121
     * @return string
8122
     *
8123
     * @see UTF8::to_ascii()
8124
     */
8125
    public static function str_transliterate(string $str, string $unknown = '?', bool $strict = false): string
8126
    {
8127 8
        return self::to_ascii($str, $unknown, $strict);
8128
    }
8129
8130
    /**
8131
     * Truncates the string to a given length. If $substring is provided, and
8132
     * truncating occurs, the string is further truncated so that the substring
8133
     * may be appended without exceeding the desired length.
8134
     *
8135
     * @param string $str
8136
     * @param int    $length    <p>Desired length of the truncated string.</p>
8137
     * @param string $substring [optional] <p>The substring to append if it can fit. Default: ''</p>
8138
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
8139
     *
8140
     * @return string string after truncating
8141
     */
8142
    public static function str_truncate(
8143
        string $str,
8144
        int $length,
8145
        string $substring = '',
8146
        string $encoding = 'UTF-8'
8147
    ): string {
8148 22
        if ($str === '') {
8149
            return '';
8150
        }
8151
8152 22
        if ($encoding === 'UTF-8') {
8153 10
            if ($length >= (int) \mb_strlen($str)) {
8154 2
                return $str;
8155
            }
8156
8157 8
            if ($substring !== '') {
8158 4
                $length -= (int) \mb_strlen($substring);
8159
8160
                /** @noinspection UnnecessaryCastingInspection */
8161 4
                return (string) \mb_substr($str, 0, $length) . $substring;
8162
            }
8163
8164
            /** @noinspection UnnecessaryCastingInspection */
8165 4
            return (string) \mb_substr($str, 0, $length);
8166
        }
8167
8168 12
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
8169
8170 12
        if ($length >= (int) self::strlen($str, $encoding)) {
8171 2
            return $str;
8172
        }
8173
8174 10
        if ($substring !== '') {
8175 6
            $length -= (int) self::strlen($substring, $encoding);
8176
        }
8177
8178
        return (
8179 10
               (string) self::substr(
8180 10
                   $str,
8181 10
                   0,
8182 10
                   $length,
8183 10
                   $encoding
8184
               )
8185 10
               ) . $substring;
8186
    }
8187
8188
    /**
8189
     * Truncates the string to a given length, while ensuring that it does not
8190
     * split words. If $substring is provided, and truncating occurs, the
8191
     * string is further truncated so that the substring may be appended without
8192
     * exceeding the desired length.
8193
     *
8194
     * @param string $str
8195
     * @param int    $length                          <p>Desired length of the truncated string.</p>
8196
     * @param string $substring                       [optional] <p>The substring to append if it can fit. Default:
8197
     *                                                ''</p>
8198
     * @param string $encoding                        [optional] <p>Default: 'UTF-8'</p>
8199
     * @param bool   $ignoreDoNotSplitWordsForOneWord [optional] <p>Default: false</p>
8200
     *
8201
     * @return string string after truncating
8202
     */
8203
    public static function str_truncate_safe(
8204
        string $str,
8205
        int $length,
8206
        string $substring = '',
8207
        string $encoding = 'UTF-8',
8208
        bool $ignoreDoNotSplitWordsForOneWord = false
8209
    ): string {
8210 47
        if ($str === '' || $length <= 0) {
8211 1
            return $substring;
8212
        }
8213
8214 47
        if ($encoding === 'UTF-8') {
8215 21
            if ($length >= (int) \mb_strlen($str)) {
8216 5
                return $str;
8217
            }
8218
8219
            // need to further trim the string so we can append the substring
8220 17
            $length -= (int) \mb_strlen($substring);
8221 17
            if ($length <= 0) {
8222 1
                return $substring;
8223
            }
8224
8225 17
            $truncated = \mb_substr($str, 0, $length);
8226
8227 17
            if ($truncated === false) {
8228
                return '';
8229
            }
8230
8231
            // if the last word was truncated
8232 17
            $strPosSpace = \mb_strpos($str, ' ', $length - 1);
8233 17
            if ($strPosSpace !== $length) {
8234
                // find pos of the last occurrence of a space, get up to that
8235 13
                $lastPos = \mb_strrpos($truncated, ' ', 0);
8236
8237
                if (
8238 13
                    $lastPos !== false
8239
                    ||
8240 13
                    ($strPosSpace !== false && $ignoreDoNotSplitWordsForOneWord === false)
8241
                ) {
8242 17
                    $truncated = (string) \mb_substr($truncated, 0, (int) $lastPos);
8243
                }
8244
            }
8245
        } else {
8246 26
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8247
8248 26
            if ($length >= (int) self::strlen($str, $encoding)) {
8249 4
                return $str;
8250
            }
8251
8252
            // need to further trim the string so we can append the substring
8253 22
            $length -= (int) self::strlen($substring, $encoding);
8254 22
            if ($length <= 0) {
8255
                return $substring;
8256
            }
8257
8258 22
            $truncated = self::substr($str, 0, $length, $encoding);
8259
8260 22
            if ($truncated === false) {
8261
                return '';
8262
            }
8263
8264
            // if the last word was truncated
8265 22
            $strPosSpace = self::strpos($str, ' ', $length - 1, $encoding);
8266 22
            if ($strPosSpace !== $length) {
8267
                // find pos of the last occurrence of a space, get up to that
8268 12
                $lastPos = self::strrpos($truncated, ' ', 0, $encoding);
8269
8270
                if (
8271 12
                    $lastPos !== false
8272
                    ||
8273 12
                    ($strPosSpace !== false && $ignoreDoNotSplitWordsForOneWord === false)
8274
                ) {
8275 9
                    $truncated = (string) self::substr($truncated, 0, (int) $lastPos, $encoding);
8276
                }
8277
            }
8278
        }
8279
8280 39
        return $truncated . $substring;
8281
    }
8282
8283
    /**
8284
     * Returns a lowercase and trimmed string separated by underscores.
8285
     * Underscores are inserted before uppercase characters (with the exception
8286
     * of the first character of the string), and in place of spaces as well as
8287
     * dashes.
8288
     *
8289
     * @param string $str
8290
     *
8291
     * @return string the underscored string
8292
     */
8293
    public static function str_underscored(string $str): string
8294
    {
8295 16
        return self::str_delimit($str, '_');
8296
    }
8297
8298
    /**
8299
     * Returns an UpperCamelCase version of the supplied string. It trims
8300
     * surrounding spaces, capitalizes letters following digits, spaces, dashes
8301
     * and underscores, and removes spaces, dashes, underscores.
8302
     *
8303
     * @param string      $str                   <p>The input string.</p>
8304
     * @param string      $encoding              [optional] <p>Default: 'UTF-8'</p>
8305
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
8306
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
8307
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
8308
     *
8309
     * @return string string in UpperCamelCase
8310
     */
8311
    public static function str_upper_camelize(
8312
        string $str,
8313
        string $encoding = 'UTF-8',
8314
        bool $cleanUtf8 = false,
8315
        string $lang = null,
8316
        bool $tryToKeepStringLength = false
8317
    ): string {
8318 13
        return self::ucfirst(self::str_camelize($str, $encoding), $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
8319
    }
8320
8321
    /**
8322
     * alias for "UTF8::ucfirst()"
8323
     *
8324
     * @param string      $str
8325
     * @param string      $encoding
8326
     * @param bool        $cleanUtf8
8327
     * @param string|null $lang
8328
     * @param bool        $tryToKeepStringLength
8329
     *
8330
     * @return string
8331
     *
8332
     * @see UTF8::ucfirst()
8333
     */
8334
    public static function str_upper_first(
8335
        string $str,
8336
        string $encoding = 'UTF-8',
8337
        bool $cleanUtf8 = false,
8338
        string $lang = null,
8339
        bool $tryToKeepStringLength = false
8340
    ): string {
8341 39
        return self::ucfirst($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
8342
    }
8343
8344
    /**
8345
     * Counts number of words in the UTF-8 string.
8346
     *
8347
     * @param string $str      <p>The input string.</p>
8348
     * @param int    $format   [optional] <p>
8349
     *                         <strong>0</strong> => return a number of words (default)<br>
8350
     *                         <strong>1</strong> => return an array of words<br>
8351
     *                         <strong>2</strong> => return an array of words with word-offset as key
8352
     *                         </p>
8353
     * @param string $charlist [optional] <p>Additional chars that contains to words and do not start a new word.</p>
8354
     *
8355
     * @return int|string[] The number of words in the string
8356
     */
8357
    public static function str_word_count(string $str, int $format = 0, string $charlist = '')
8358
    {
8359 2
        $strParts = self::str_to_words($str, $charlist);
8360
8361 2
        $len = \count($strParts);
8362
8363 2
        if ($format === 1) {
8364 2
            $numberOfWords = [];
8365 2
            for ($i = 1; $i < $len; $i += 2) {
8366 2
                $numberOfWords[] = $strParts[$i];
8367
            }
8368 2
        } elseif ($format === 2) {
8369 2
            $numberOfWords = [];
8370 2
            $offset = (int) self::strlen($strParts[0]);
8371 2
            for ($i = 1; $i < $len; $i += 2) {
8372 2
                $numberOfWords[$offset] = $strParts[$i];
8373 2
                $offset += (int) self::strlen($strParts[$i]) + (int) self::strlen($strParts[$i + 1]);
8374
            }
8375
        } else {
8376 2
            $numberOfWords = (int) (($len - 1) / 2);
8377
        }
8378
8379 2
        return $numberOfWords;
8380
    }
8381
8382
    /**
8383
     * Case-insensitive string comparison.
8384
     *
8385
     * INFO: Case-insensitive version of UTF8::strcmp()
8386
     *
8387
     * @param string $str1     <p>The first string.</p>
8388
     * @param string $str2     <p>The second string.</p>
8389
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8390
     *
8391
     * @return int
8392
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
8393
     *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
8394
     *             <strong>0</strong> if they are equal
8395
     */
8396
    public static function strcasecmp(string $str1, string $str2, string $encoding = 'UTF-8'): int
8397
    {
8398 23
        return self::strcmp(
8399 23
            self::strtocasefold($str1, true, false, $encoding, null, false),
8400 23
            self::strtocasefold($str2, true, false, $encoding, null, false)
8401
        );
8402
    }
8403
8404
    /**
8405
     * alias for "UTF8::strstr()"
8406
     *
8407
     * @param string $haystack
8408
     * @param string $needle
8409
     * @param bool   $before_needle
8410
     * @param string $encoding
8411
     * @param bool   $cleanUtf8
8412
     *
8413
     * @return false|string
8414
     *
8415
     * @see UTF8::strstr()
8416
     */
8417
    public static function strchr(
8418
        string $haystack,
8419
        string $needle,
8420
        bool $before_needle = false,
8421
        string $encoding = 'UTF-8',
8422
        bool $cleanUtf8 = false
8423
    ) {
8424 2
        return self::strstr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
8425
    }
8426
8427
    /**
8428
     * Case-sensitive string comparison.
8429
     *
8430
     * @param string $str1 <p>The first string.</p>
8431
     * @param string $str2 <p>The second string.</p>
8432
     *
8433
     * @return int
8434
     *             <strong>&lt; 0</strong> if str1 is less than str2<br>
8435
     *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
8436
     *             <strong>0</strong> if they are equal
8437
     */
8438
    public static function strcmp(string $str1, string $str2): int
8439
    {
8440 29
        if ($str1 === $str2) {
8441 21
            return 0;
8442
        }
8443
8444 24
        return \strcmp(
8445 24
            \Normalizer::normalize($str1, \Normalizer::NFD),
8446 24
            \Normalizer::normalize($str2, \Normalizer::NFD)
8447
        );
8448
    }
8449
8450
    /**
8451
     * Find length of initial segment not matching mask.
8452
     *
8453
     * @param string $str
8454
     * @param string $charList
8455
     * @param int    $offset
8456
     * @param int    $length
8457
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8458
     *
8459
     * @return int
8460
     */
8461
    public static function strcspn(
8462
        string $str,
8463
        string $charList,
8464
        int $offset = null,
8465
        int $length = null,
8466
        string $encoding = 'UTF-8'
8467
    ): int {
8468 12
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8469
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8470
        }
8471
8472 12
        if ($charList === '') {
8473 2
            return (int) self::strlen($str, $encoding);
8474
        }
8475
8476 11
        if ($offset !== null || $length !== null) {
8477 3
            if ($encoding === 'UTF-8') {
8478 3
                if ($length === null) {
8479
                    /** @noinspection UnnecessaryCastingInspection */
8480 2
                    $strTmp = \mb_substr($str, (int) $offset);
8481
                } else {
8482
                    /** @noinspection UnnecessaryCastingInspection */
8483 3
                    $strTmp = \mb_substr($str, (int) $offset, $length);
8484
                }
8485
            } else {
8486
                /** @noinspection UnnecessaryCastingInspection */
8487
                $strTmp = self::substr($str, (int) $offset, $length, $encoding);
8488
            }
8489
8490 3
            if ($strTmp === false) {
8491
                return 0;
8492
            }
8493
8494 3
            $str = $strTmp;
8495
        }
8496
8497 11
        if ($str === '') {
8498 2
            return 0;
8499
        }
8500
8501 10
        $matches = [];
8502 10
        if (\preg_match('/^(.*?)' . self::rxClass($charList) . '/us', $str, $matches)) {
8503 9
            $return = self::strlen($matches[1], $encoding);
8504 9
            if ($return === false) {
8505
                return 0;
8506
            }
8507
8508 9
            return $return;
8509
        }
8510
8511 2
        return (int) self::strlen($str, $encoding);
8512
    }
8513
8514
    /**
8515
     * alias for "UTF8::stristr()"
8516
     *
8517
     * @param string $haystack
8518
     * @param string $needle
8519
     * @param bool   $before_needle
8520
     * @param string $encoding
8521
     * @param bool   $cleanUtf8
8522
     *
8523
     * @return false|string
8524
     *
8525
     * @see UTF8::stristr()
8526
     */
8527
    public static function strichr(
8528
        string $haystack,
8529
        string $needle,
8530
        bool $before_needle = false,
8531
        string $encoding = 'UTF-8',
8532
        bool $cleanUtf8 = false
8533
    ) {
8534 1
        return self::stristr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
8535
    }
8536
8537
    /**
8538
     * Create a UTF-8 string from code points.
8539
     *
8540
     * INFO: opposite to UTF8::codepoints()
8541
     *
8542
     * @param array $array <p>Integer or Hexadecimal codepoints.</p>
8543
     *
8544
     * @return string UTF-8 encoded string
8545
     */
8546
    public static function string(array $array): string
8547
    {
8548 4
        return \implode(
8549 4
            '',
8550 4
            \array_map(
8551
                [
8552 4
                    self::class,
8553
                    'chr',
8554
                ],
8555 4
                $array
8556
            )
8557
        );
8558
    }
8559
8560
    /**
8561
     * Checks if string starts with "BOM" (Byte Order Mark Character) character.
8562
     *
8563
     * @param string $str <p>The input string.</p>
8564
     *
8565
     * @return bool
8566
     *              <strong>true</strong> if the string has BOM at the start,<br>
8567
     *              <strong>false</strong> otherwise
8568
     */
8569
    public static function string_has_bom(string $str): bool
8570
    {
8571
        /** @noinspection PhpUnusedLocalVariableInspection */
8572 6
        foreach (self::$BOM as $bomString => &$bomByteLength) {
8573 6
            if (\strpos($str, $bomString) === 0) {
8574 6
                return true;
8575
            }
8576
        }
8577
8578 6
        return false;
8579
    }
8580
8581
    /**
8582
     * Strip HTML and PHP tags from a string + clean invalid UTF-8.
8583
     *
8584
     * @see http://php.net/manual/en/function.strip-tags.php
8585
     *
8586
     * @param string $str            <p>
8587
     *                               The input string.
8588
     *                               </p>
8589
     * @param string $allowable_tags [optional] <p>
8590
     *                               You can use the optional second parameter to specify tags which should
8591
     *                               not be stripped.
8592
     *                               </p>
8593
     *                               <p>
8594
     *                               HTML comments and PHP tags are also stripped. This is hardcoded and
8595
     *                               can not be changed with allowable_tags.
8596
     *                               </p>
8597
     * @param bool   $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
8598
     *
8599
     * @return string the stripped string
8600
     */
8601
    public static function strip_tags(string $str, string $allowable_tags = null, bool $cleanUtf8 = false): string
8602
    {
8603 4
        if ($str === '') {
8604 1
            return '';
8605
        }
8606
8607 4
        if ($cleanUtf8 === true) {
8608 2
            $str = self::clean($str);
8609
        }
8610
8611 4
        if ($allowable_tags === null) {
8612 4
            return \strip_tags($str);
8613
        }
8614
8615 2
        return \strip_tags($str, $allowable_tags);
8616
    }
8617
8618
    /**
8619
     * Strip all whitespace characters. This includes tabs and newline
8620
     * characters, as well as multibyte whitespace such as the thin space
8621
     * and ideographic space.
8622
     *
8623
     * @param string $str
8624
     *
8625
     * @return string
8626
     */
8627
    public static function strip_whitespace(string $str): string
8628
    {
8629 36
        if ($str === '') {
8630 3
            return '';
8631
        }
8632
8633 33
        return (string) \preg_replace('/[[:space:]]+/u', '', $str);
8634
    }
8635
8636
    /**
8637
     * Finds position of first occurrence of a string within another, case insensitive.
8638
     *
8639
     * @see http://php.net/manual/en/function.mb-stripos.php
8640
     *
8641
     * @param string $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
8642
     * @param string $needle    <p>The string to find in haystack.</p>
8643
     * @param int    $offset    [optional] <p>The position in haystack to start searching.</p>
8644
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
8645
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8646
     *
8647
     * @return false|int
8648
     *                   Return the <strong>(int)</strong> numeric position of the first occurrence of needle in the
8649
     *                   haystack string,<br> or <strong>false</strong> if needle is not found
8650
     */
8651
    public static function stripos(
8652
        string $haystack,
8653
        string $needle,
8654
        int $offset = 0,
8655
        $encoding = 'UTF-8',
8656
        bool $cleanUtf8 = false
8657
    ) {
8658 24
        if ($haystack === '' || $needle === '') {
8659 5
            return false;
8660
        }
8661
8662 23
        if ($cleanUtf8 === true) {
8663
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8664
            // if invalid characters are found in $haystack before $needle
8665 1
            $haystack = self::clean($haystack);
8666 1
            $needle = self::clean($needle);
8667
        }
8668
8669 23
        if (self::$SUPPORT['mbstring'] === true) {
8670 23
            if ($encoding === 'UTF-8') {
8671 23
                return \mb_stripos($haystack, $needle, $offset);
8672
            }
8673
8674 3
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8675
8676 3
            return \mb_stripos($haystack, $needle, $offset, $encoding);
8677
        }
8678
8679 2
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
8680
8681
        if (
8682 2
            $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
8683
            &&
8684 2
            $offset >= 0 // grapheme_stripos() can't handle negative offset
8685
            &&
8686 2
            self::$SUPPORT['intl'] === true
8687
        ) {
8688
            $returnTmp = \grapheme_stripos($haystack, $needle, $offset);
8689
            if ($returnTmp !== false) {
8690
                return $returnTmp;
8691
            }
8692
        }
8693
8694
        //
8695
        // fallback for ascii only
8696
        //
8697
8698 2
        if (self::is_ascii($haystack . $needle)) {
8699
            return \stripos($haystack, $needle, $offset);
8700
        }
8701
8702
        //
8703
        // fallback via vanilla php
8704
        //
8705
8706 2
        $haystack = self::strtocasefold($haystack, true, false, $encoding, null, false);
8707 2
        $needle = self::strtocasefold($needle, true, false, $encoding, null, false);
8708
8709 2
        return self::strpos($haystack, $needle, $offset, $encoding);
8710
    }
8711
8712
    /**
8713
     * Returns all of haystack starting from and including the first occurrence of needle to the end.
8714
     *
8715
     * @param string $haystack      <p>The input string. Must be valid UTF-8.</p>
8716
     * @param string $needle        <p>The string to look for. Must be valid UTF-8.</p>
8717
     * @param bool   $before_needle [optional] <p>
8718
     *                              If <b>TRUE</b>, it returns the part of the
8719
     *                              haystack before the first occurrence of the needle (excluding the needle).
8720
     *                              </p>
8721
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
8722
     * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
8723
     *
8724
     * @return false|string a sub-string,<br>or <strong>false</strong> if needle is not found
8725
     */
8726
    public static function stristr(
8727
        string $haystack,
8728
        string $needle,
8729
        bool $before_needle = false,
8730
        string $encoding = 'UTF-8',
8731
        bool $cleanUtf8 = false
8732
    ) {
8733 12
        if ($haystack === '' || $needle === '') {
8734 3
            return false;
8735
        }
8736
8737 9
        if ($cleanUtf8 === true) {
8738
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8739
            // if invalid characters are found in $haystack before $needle
8740 1
            $needle = self::clean($needle);
8741 1
            $haystack = self::clean($haystack);
8742
        }
8743
8744 9
        if (!$needle) {
8745
            return $haystack;
8746
        }
8747
8748 9
        if (self::$SUPPORT['mbstring'] === true) {
8749 9
            if ($encoding === 'UTF-8') {
8750 9
                return \mb_stristr($haystack, $needle, $before_needle);
8751
            }
8752
8753 1
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8754
8755 1
            return \mb_stristr($haystack, $needle, $before_needle, $encoding);
8756
        }
8757
8758
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
8759
8760
        if (
8761
            $encoding !== 'UTF-8'
8762
            &&
8763
            self::$SUPPORT['mbstring'] === false
8764
        ) {
8765
            \trigger_error('UTF8::stristr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
8766
        }
8767
8768
        if (
8769
            $encoding === 'UTF-8' // INFO: "grapheme_stristr()" can't handle other encodings
8770
            &&
8771
            self::$SUPPORT['intl'] === true
8772
        ) {
8773
            $returnTmp = \grapheme_stristr($haystack, $needle, $before_needle);
8774
            if ($returnTmp !== false) {
8775
                return $returnTmp;
8776
            }
8777
        }
8778
8779
        if (self::is_ascii($needle . $haystack)) {
8780
            return \stristr($haystack, $needle, $before_needle);
8781
        }
8782
8783
        \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/usi', $haystack, $match);
8784
8785
        if (!isset($match[1])) {
8786
            return false;
8787
        }
8788
8789
        if ($before_needle) {
8790
            return $match[1];
8791
        }
8792
8793
        return self::substr($haystack, (int) self::strlen($match[1], $encoding), null, $encoding);
8794
    }
8795
8796
    /**
8797
     * Get the string length, not the byte-length!
8798
     *
8799
     * @see http://php.net/manual/en/function.mb-strlen.php
8800
     *
8801
     * @param string $str       <p>The string being checked for length.</p>
8802
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
8803
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8804
     *
8805
     * @return false|int
8806
     *                   The number <strong>(int)</strong> of characters in the string $str having character encoding
8807
     *                   $encoding.
8808
     *                   (One multi-byte character counted as +1).
8809
     *                   <br>
8810
     *                   Can return <strong>false</strong>, if e.g. mbstring is not installed and we process invalid
8811
     *                   chars.
8812
     */
8813
    public static function strlen(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
8814
    {
8815 173
        if ($str === '') {
8816 21
            return 0;
8817
        }
8818
8819 171
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8820 12
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8821
        }
8822
8823 171
        if ($cleanUtf8 === true) {
8824
            // "mb_strlen" and "\iconv_strlen" returns wrong length,
8825
            // if invalid characters are found in $str
8826 4
            $str = self::clean($str);
8827
        }
8828
8829
        //
8830
        // fallback via mbstring
8831
        //
8832
8833 171
        if (self::$SUPPORT['mbstring'] === true) {
8834 165
            if ($encoding === 'UTF-8') {
8835 165
                return \mb_strlen($str);
8836
            }
8837
8838 4
            return \mb_strlen($str, $encoding);
8839
        }
8840
8841
        //
8842
        // fallback for binary || ascii only
8843
        //
8844
8845
        if (
8846 8
            $encoding === 'CP850'
8847
            ||
8848 8
            $encoding === 'ASCII'
8849
        ) {
8850
            return \strlen($str);
8851
        }
8852
8853
        if (
8854 8
            $encoding !== 'UTF-8'
8855
            &&
8856 8
            self::$SUPPORT['mbstring'] === false
8857
            &&
8858 8
            self::$SUPPORT['iconv'] === false
8859
        ) {
8860 2
            \trigger_error('UTF8::strlen() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
8861
        }
8862
8863
        //
8864
        // fallback via iconv
8865
        //
8866
8867 8
        if (self::$SUPPORT['iconv'] === true) {
8868
            $returnTmp = \iconv_strlen($str, $encoding);
8869
            if ($returnTmp !== false) {
8870
                return $returnTmp;
8871
            }
8872
        }
8873
8874
        //
8875
        // fallback via intl
8876
        //
8877
8878
        if (
8879 8
            $encoding === 'UTF-8' // INFO: "grapheme_strlen()" can't handle other encodings
8880
            &&
8881 8
            self::$SUPPORT['intl'] === true
8882
        ) {
8883
            $returnTmp = \grapheme_strlen($str);
8884
            if ($returnTmp !== null) {
8885
                return $returnTmp;
8886
            }
8887
        }
8888
8889
        //
8890
        // fallback for ascii only
8891
        //
8892
8893 8
        if (self::is_ascii($str)) {
8894 4
            return \strlen($str);
8895
        }
8896
8897
        //
8898
        // fallback via vanilla php
8899
        //
8900
8901 8
        \preg_match_all('/./us', $str, $parts);
8902
8903 8
        $returnTmp = \count($parts[0]);
8904 8
        if ($returnTmp === 0) {
8905
            return false;
8906
        }
8907
8908 8
        return $returnTmp;
8909
    }
8910
8911
    /**
8912
     * Get string length in byte.
8913
     *
8914
     * @param string $str
8915
     *
8916
     * @return int
8917
     */
8918
    public static function strlen_in_byte(string $str): int
8919
    {
8920
        if ($str === '') {
8921
            return 0;
8922
        }
8923
8924
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
8925
            // "mb_" is available if overload is used, so use it ...
8926
            return \mb_strlen($str, 'CP850'); // 8-BIT
8927
        }
8928
8929
        return \strlen($str);
8930
    }
8931
8932
    /**
8933
     * Case insensitive string comparisons using a "natural order" algorithm.
8934
     *
8935
     * INFO: natural order version of UTF8::strcasecmp()
8936
     *
8937
     * @param string $str1     <p>The first string.</p>
8938
     * @param string $str2     <p>The second string.</p>
8939
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8940
     *
8941
     * @return int
8942
     *             <strong>&lt; 0</strong> if str1 is less than str2<br>
8943
     *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
8944
     *             <strong>0</strong> if they are equal
8945
     */
8946
    public static function strnatcasecmp(string $str1, string $str2, string $encoding = 'UTF-8'): int
8947
    {
8948 2
        return self::strnatcmp(
8949 2
            self::strtocasefold($str1, true, false, $encoding, null, false),
8950 2
            self::strtocasefold($str2, true, false, $encoding, null, false)
8951
        );
8952
    }
8953
8954
    /**
8955
     * String comparisons using a "natural order" algorithm
8956
     *
8957
     * INFO: natural order version of UTF8::strcmp()
8958
     *
8959
     * @see http://php.net/manual/en/function.strnatcmp.php
8960
     *
8961
     * @param string $str1 <p>The first string.</p>
8962
     * @param string $str2 <p>The second string.</p>
8963
     *
8964
     * @return int
8965
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
8966
     *             <strong>&gt; 0</strong> if str1 is greater than str2;<br>
8967
     *             <strong>0</strong> if they are equal
8968
     */
8969
    public static function strnatcmp(string $str1, string $str2): int
8970
    {
8971 4
        if ($str1 === $str2) {
8972 4
            return 0;
8973
        }
8974
8975 4
        return \strnatcmp(
8976 4
            (string) self::strtonatfold($str1),
8977 4
            (string) self::strtonatfold($str2)
8978
        );
8979
    }
8980
8981
    /**
8982
     * Case-insensitive string comparison of the first n characters.
8983
     *
8984
     * @see http://php.net/manual/en/function.strncasecmp.php
8985
     *
8986
     * @param string $str1     <p>The first string.</p>
8987
     * @param string $str2     <p>The second string.</p>
8988
     * @param int    $len      <p>The length of strings to be used in the comparison.</p>
8989
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8990
     *
8991
     * @return int
8992
     *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
8993
     *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
8994
     *             <strong>0</strong> if they are equal
8995
     */
8996
    public static function strncasecmp(
8997
        string $str1,
8998
        string $str2,
8999
        int $len,
9000
        string $encoding = 'UTF-8'
9001
    ): int {
9002 2
        return self::strncmp(
9003 2
            self::strtocasefold($str1, true, false, $encoding, null, false),
9004 2
            self::strtocasefold($str2, true, false, $encoding, null, false),
9005 2
            $len
9006
        );
9007
    }
9008
9009
    /**
9010
     * String comparison of the first n characters.
9011
     *
9012
     * @see http://php.net/manual/en/function.strncmp.php
9013
     *
9014
     * @param string $str1     <p>The first string.</p>
9015
     * @param string $str2     <p>The second string.</p>
9016
     * @param int    $len      <p>Number of characters to use in the comparison.</p>
9017
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
9018
     *
9019
     * @return int
9020
     *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
9021
     *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
9022
     *             <strong>0</strong> if they are equal
9023
     */
9024
    public static function strncmp(
9025
        string $str1,
9026
        string $str2,
9027
        int $len,
9028
        string $encoding = 'UTF-8'
9029
    ): int {
9030 4
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9031
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9032
        }
9033
9034 4
        if ($encoding === 'UTF-8') {
9035 4
            $str1 = (string) \mb_substr($str1, 0, $len);
9036 4
            $str2 = (string) \mb_substr($str2, 0, $len);
9037
        } else {
9038
            $str1 = (string) self::substr($str1, 0, $len, $encoding);
9039
            $str2 = (string) self::substr($str2, 0, $len, $encoding);
9040
        }
9041
9042 4
        return self::strcmp($str1, $str2);
9043
    }
9044
9045
    /**
9046
     * Search a string for any of a set of characters.
9047
     *
9048
     * @see http://php.net/manual/en/function.strpbrk.php
9049
     *
9050
     * @param string $haystack  <p>The string where char_list is looked for.</p>
9051
     * @param string $char_list <p>This parameter is case sensitive.</p>
9052
     *
9053
     * @return false|string string starting from the character found, or false if it is not found
9054
     */
9055
    public static function strpbrk(string $haystack, string $char_list)
9056
    {
9057 2
        if ($haystack === '' || $char_list === '') {
9058 2
            return false;
9059
        }
9060
9061 2
        if (\preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
9062 2
            return \substr($haystack, (int) \strpos($haystack, $m[0]));
9063
        }
9064
9065 2
        return false;
9066
    }
9067
9068
    /**
9069
     * Find position of first occurrence of string in a string.
9070
     *
9071
     * @see http://php.net/manual/en/function.mb-strpos.php
9072
     *
9073
     * @param string     $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
9074
     * @param int|string $needle    <p>The string to find in haystack.<br>Or a code point as int.</p>
9075
     * @param int        $offset    [optional] <p>The search offset. If it is not specified, 0 is used.</p>
9076
     * @param string     $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
9077
     * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9078
     *
9079
     * @return false|int
9080
     *                   The <strong>(int)</strong> numeric position of the first occurrence of needle in the haystack
9081
     *                   string.<br> If needle is not found it returns false.
9082
     */
9083
    public static function strpos(
9084
        string $haystack,
9085
        $needle,
9086
        int $offset = 0,
9087
        $encoding = 'UTF-8',
9088
        bool $cleanUtf8 = false
9089
    ) {
9090 53
        if ($haystack === '') {
9091 4
            return false;
9092
        }
9093
9094
        // iconv and mbstring do not support integer $needle
9095 52
        if ((int) $needle === $needle) {
9096
            $needle = (string) self::chr($needle);
9097
        }
9098 52
        $needle = (string) $needle;
9099
9100 52
        if ($needle === '') {
9101 2
            return false;
9102
        }
9103
9104 52
        if ($cleanUtf8 === true) {
9105
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9106
            // if invalid characters are found in $haystack before $needle
9107 3
            $needle = self::clean($needle);
9108 3
            $haystack = self::clean($haystack);
9109
        }
9110
9111 52
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9112 11
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9113
        }
9114
9115
        //
9116
        // fallback via mbstring
9117
        //
9118
9119 52
        if (self::$SUPPORT['mbstring'] === true) {
9120 50
            if ($encoding === 'UTF-8') {
9121 50
                return \mb_strpos($haystack, $needle, $offset);
9122
            }
9123
9124 2
            return \mb_strpos($haystack, $needle, $offset, $encoding);
9125
        }
9126
9127
        //
9128
        // fallback for binary || ascii only
9129
        //
9130
        if (
9131 4
            $encoding === 'CP850'
9132
            ||
9133 4
            $encoding === 'ASCII'
9134
        ) {
9135 2
            return \strpos($haystack, $needle, $offset);
9136
        }
9137
9138
        if (
9139 4
            $encoding !== 'UTF-8'
9140
            &&
9141 4
            self::$SUPPORT['iconv'] === false
9142
            &&
9143 4
            self::$SUPPORT['mbstring'] === false
9144
        ) {
9145 2
            \trigger_error('UTF8::strpos() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9146
        }
9147
9148
        //
9149
        // fallback via intl
9150
        //
9151
9152
        if (
9153 4
            $encoding === 'UTF-8' // INFO: "grapheme_strpos()" can't handle other encodings
9154
            &&
9155 4
            $offset >= 0 // grapheme_strpos() can't handle negative offset
9156
            &&
9157 4
            self::$SUPPORT['intl'] === true
9158
        ) {
9159
            $returnTmp = \grapheme_strpos($haystack, $needle, $offset);
9160
            if ($returnTmp !== false) {
9161
                return $returnTmp;
9162
            }
9163
        }
9164
9165
        //
9166
        // fallback via iconv
9167
        //
9168
9169
        if (
9170 4
            $offset >= 0 // iconv_strpos() can't handle negative offset
9171
            &&
9172 4
            self::$SUPPORT['iconv'] === true
9173
        ) {
9174
            // ignore invalid negative offset to keep compatibility
9175
            // with php < 5.5.35, < 5.6.21, < 7.0.6
9176
            $returnTmp = \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
9177
            if ($returnTmp !== false) {
9178
                return $returnTmp;
9179
            }
9180
        }
9181
9182
        //
9183
        // fallback for ascii only
9184
        //
9185
9186 4
        if (self::is_ascii($haystack . $needle)) {
9187 2
            return \strpos($haystack, $needle, $offset);
9188
        }
9189
9190
        //
9191
        // fallback via vanilla php
9192
        //
9193
9194 4
        $haystackTmp = self::substr($haystack, $offset, null, $encoding);
9195 4
        if ($haystackTmp === false) {
9196
            $haystackTmp = '';
9197
        }
9198 4
        $haystack = (string) $haystackTmp;
9199
9200 4
        if ($offset < 0) {
9201
            $offset = 0;
9202
        }
9203
9204 4
        $pos = \strpos($haystack, $needle);
9205 4
        if ($pos === false) {
9206 2
            return false;
9207
        }
9208
9209 4
        if ($pos) {
9210 4
            return $offset + (int) self::strlen(\substr($haystack, 0, $pos), $encoding);
9211
        }
9212
9213 2
        return $offset + 0;
9214
    }
9215
9216
    /**
9217
     * Find position of first occurrence of string in a string.
9218
     *
9219
     * @param string $haystack <p>
9220
     *                         The string being checked.
9221
     *                         </p>
9222
     * @param string $needle   <p>
9223
     *                         The position counted from the beginning of haystack.
9224
     *                         </p>
9225
     * @param int    $offset   [optional] <p>
9226
     *                         The search offset. If it is not specified, 0 is used.
9227
     *                         </p>
9228
     *
9229
     * @return false|int The numeric position of the first occurrence of needle in the
9230
     *                   haystack string. If needle is not found, it returns false.
9231
     */
9232
    public static function strpos_in_byte(string $haystack, string $needle, int $offset = 0)
9233
    {
9234
        if ($haystack === '' || $needle === '') {
9235
            return false;
9236
        }
9237
9238
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
9239
            // "mb_" is available if overload is used, so use it ...
9240
            return \mb_strpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
9241
        }
9242
9243
        return \strpos($haystack, $needle, $offset);
9244
    }
9245
9246
    /**
9247
     * Finds the last occurrence of a character in a string within another.
9248
     *
9249
     * @see http://php.net/manual/en/function.mb-strrchr.php
9250
     *
9251
     * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
9252
     * @param string $needle        <p>The string to find in haystack</p>
9253
     * @param bool   $before_needle [optional] <p>
9254
     *                              Determines which portion of haystack
9255
     *                              this function returns.
9256
     *                              If set to true, it returns all of haystack
9257
     *                              from the beginning to the last occurrence of needle.
9258
     *                              If set to false, it returns all of haystack
9259
     *                              from the last occurrence of needle to the end,
9260
     *                              </p>
9261
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
9262
     * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
9263
     *
9264
     * @return false|string the portion of haystack or false if needle is not found
9265
     */
9266
    public static function strrchr(
9267
        string $haystack,
9268
        string $needle,
9269
        bool $before_needle = false,
9270
        string $encoding = 'UTF-8',
9271
        bool $cleanUtf8 = false
9272
    ) {
9273 2
        if ($haystack === '' || $needle === '') {
9274 2
            return false;
9275
        }
9276
9277 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9278 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9279
        }
9280
9281 2
        if ($cleanUtf8 === true) {
9282
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9283
            // if invalid characters are found in $haystack before $needle
9284 2
            $needle = self::clean($needle);
9285 2
            $haystack = self::clean($haystack);
9286
        }
9287
9288
        //
9289
        // fallback via mbstring
9290
        //
9291
9292 2
        if (self::$SUPPORT['mbstring'] === true) {
9293 2
            if ($encoding === 'UTF-8') {
9294 2
                return \mb_strrchr($haystack, $needle, $before_needle);
9295
            }
9296
9297 2
            return \mb_strrchr($haystack, $needle, $before_needle, $encoding);
9298
        }
9299
9300
        //
9301
        // fallback for binary || ascii only
9302
        //
9303
9304
        if (
9305
            $before_needle === false
9306
            &&
9307
            (
9308
                $encoding === 'CP850'
9309
                ||
9310
                $encoding === 'ASCII'
9311
            )
9312
        ) {
9313
            return \strrchr($haystack, $needle);
9314
        }
9315
9316
        if (
9317
            $encoding !== 'UTF-8'
9318
            &&
9319
            self::$SUPPORT['mbstring'] === false
9320
        ) {
9321
            \trigger_error('UTF8::strrchr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9322
        }
9323
9324
        //
9325
        // fallback via iconv
9326
        //
9327
9328
        if (self::$SUPPORT['iconv'] === true) {
9329
            $needleTmp = self::substr($needle, 0, 1, $encoding);
9330
            if ($needleTmp === false) {
9331
                return false;
9332
            }
9333
            $needle = (string) $needleTmp;
9334
9335
            $pos = \iconv_strrpos($haystack, $needle, $encoding);
9336
            if ($pos === false) {
9337
                return false;
9338
            }
9339
9340
            if ($before_needle) {
9341
                return self::substr($haystack, 0, $pos, $encoding);
9342
            }
9343
9344
            return self::substr($haystack, $pos, null, $encoding);
9345
        }
9346
9347
        //
9348
        // fallback via vanilla php
9349
        //
9350
9351
        $needleTmp = self::substr($needle, 0, 1, $encoding);
9352
        if ($needleTmp === false) {
9353
            return false;
9354
        }
9355
        $needle = (string) $needleTmp;
9356
9357
        $pos = self::strrpos($haystack, $needle, 0, $encoding);
9358
        if ($pos === false) {
9359
            return false;
9360
        }
9361
9362
        if ($before_needle) {
9363
            return self::substr($haystack, 0, $pos, $encoding);
9364
        }
9365
9366
        return self::substr($haystack, $pos, null, $encoding);
9367
    }
9368
9369
    /**
9370
     * Reverses characters order in the string.
9371
     *
9372
     * @param string $str      <p>The input string.</p>
9373
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
9374
     *
9375
     * @return string the string with characters in the reverse sequence
9376
     */
9377
    public static function strrev(string $str, string $encoding = 'UTF-8'): string
9378
    {
9379 10
        if ($str === '') {
9380 4
            return '';
9381
        }
9382
9383
        // init
9384 8
        $reversed = '';
9385
9386 8
        $str = self::emoji_encode($str, true);
9387
9388 8
        if ($encoding === 'UTF-8') {
9389 8
            if (self::$SUPPORT['intl'] === true) {
9390
                // try "grapheme" first: https://stackoverflow.com/questions/17496493/strrev-dosent-support-utf-8
9391 8
                $i = (int) \grapheme_strlen($str);
9392 8
                while ($i--) {
9393 8
                    $reversedTmp = \grapheme_substr($str, $i, 1);
9394 8
                    if ($reversedTmp !== false) {
9395 8
                        $reversed .= $reversedTmp;
9396
                    }
9397
                }
9398
            } else {
9399
                $i = (int) \mb_strlen($str);
9400 8
                while ($i--) {
9401
                    $reversedTmp = \mb_substr($str, $i, 1);
9402
                    if ($reversedTmp !== false) {
9403
                        $reversed .= $reversedTmp;
9404
                    }
9405
                }
9406
            }
9407
        } else {
9408
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9409
9410
            $i = (int) self::strlen($str, $encoding);
9411
            while ($i--) {
9412
                $reversedTmp = self::substr($str, $i, 1, $encoding);
9413
                if ($reversedTmp !== false) {
9414
                    $reversed .= $reversedTmp;
9415
                }
9416
            }
9417
        }
9418
9419 8
        return self::emoji_decode($reversed, true);
9420
    }
9421
9422
    /**
9423
     * Finds the last occurrence of a character in a string within another, case insensitive.
9424
     *
9425
     * @see http://php.net/manual/en/function.mb-strrichr.php
9426
     *
9427
     * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
9428
     * @param string $needle        <p>The string to find in haystack.</p>
9429
     * @param bool   $before_needle [optional] <p>
9430
     *                              Determines which portion of haystack
9431
     *                              this function returns.
9432
     *                              If set to true, it returns all of haystack
9433
     *                              from the beginning to the last occurrence of needle.
9434
     *                              If set to false, it returns all of haystack
9435
     *                              from the last occurrence of needle to the end,
9436
     *                              </p>
9437
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
9438
     * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
9439
     *
9440
     * @return false|string the portion of haystack or<br>false if needle is not found
9441
     */
9442
    public static function strrichr(
9443
        string $haystack,
9444
        string $needle,
9445
        bool $before_needle = false,
9446
        string $encoding = 'UTF-8',
9447
        bool $cleanUtf8 = false
9448
    ) {
9449 3
        if ($haystack === '' || $needle === '') {
9450 2
            return false;
9451
        }
9452
9453 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9454 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9455
        }
9456
9457 3
        if ($cleanUtf8 === true) {
9458
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9459
            // if invalid characters are found in $haystack before $needle
9460 2
            $needle = self::clean($needle);
9461 2
            $haystack = self::clean($haystack);
9462
        }
9463
9464
        //
9465
        // fallback via mbstring
9466
        //
9467
9468 3
        if (self::$SUPPORT['mbstring'] === true) {
9469 3
            if ($encoding === 'UTF-8') {
9470 3
                return \mb_strrichr($haystack, $needle, $before_needle);
9471
            }
9472
9473 2
            return \mb_strrichr($haystack, $needle, $before_needle, $encoding);
9474
        }
9475
9476
        //
9477
        // fallback via vanilla php
9478
        //
9479
9480
        $needleTmp = self::substr($needle, 0, 1, $encoding);
9481
        if ($needleTmp === false) {
9482
            return false;
9483
        }
9484
        $needle = (string) $needleTmp;
9485
9486
        $pos = self::strripos($haystack, $needle, 0, $encoding);
9487
        if ($pos === false) {
9488
            return false;
9489
        }
9490
9491
        if ($before_needle) {
9492
            return self::substr($haystack, 0, $pos, $encoding);
9493
        }
9494
9495
        return self::substr($haystack, $pos, null, $encoding);
9496
    }
9497
9498
    /**
9499
     * Find position of last occurrence of a case-insensitive string.
9500
     *
9501
     * @param string     $haystack  <p>The string to look in.</p>
9502
     * @param int|string $needle    <p>The string to look for.</p>
9503
     * @param int        $offset    [optional] <p>Number of characters to ignore in the beginning or end.</p>
9504
     * @param string     $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
9505
     * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9506
     *
9507
     * @return false|int
9508
     *                   The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
9509
     *                   string.<br>If needle is not found, it returns false.
9510
     */
9511
    public static function strripos(
9512
        string $haystack,
9513
        $needle,
9514
        int $offset = 0,
9515
        string $encoding = 'UTF-8',
9516
        bool $cleanUtf8 = false
9517
    ) {
9518 3
        if ($haystack === '') {
9519
            return false;
9520
        }
9521
9522
        // iconv and mbstring do not support integer $needle
9523 3
        if ((int) $needle === $needle && $needle >= 0) {
9524
            $needle = (string) self::chr($needle);
9525
        }
9526 3
        $needle = (string) $needle;
9527
9528 3
        if ($needle === '') {
9529
            return false;
9530
        }
9531
9532 3
        if ($cleanUtf8 === true) {
9533
            // mb_strripos() && iconv_strripos() is not tolerant to invalid characters
9534 2
            $needle = self::clean($needle);
9535 2
            $haystack = self::clean($haystack);
9536
        }
9537
9538 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9539 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9540
        }
9541
9542
        //
9543
        // fallback via mbstrig
9544
        //
9545
9546 3
        if (self::$SUPPORT['mbstring'] === true) {
9547 3
            if ($encoding === 'UTF-8') {
9548 3
                return \mb_strripos($haystack, $needle, $offset);
9549
            }
9550
9551
            return \mb_strripos($haystack, $needle, $offset, $encoding);
9552
        }
9553
9554
        //
9555
        // fallback for binary || ascii only
9556
        //
9557
9558
        if (
9559
            $encoding === 'CP850'
9560
            ||
9561
            $encoding === 'ASCII'
9562
        ) {
9563
            return \strripos($haystack, $needle, $offset);
9564
        }
9565
9566
        if (
9567
            $encoding !== 'UTF-8'
9568
            &&
9569
            self::$SUPPORT['mbstring'] === false
9570
        ) {
9571
            \trigger_error('UTF8::strripos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9572
        }
9573
9574
        //
9575
        // fallback via intl
9576
        //
9577
9578
        if (
9579
            $encoding === 'UTF-8' // INFO: "grapheme_strripos()" can't handle other encodings
9580
            &&
9581
            $offset >= 0 // grapheme_strripos() can't handle negative offset
9582
            &&
9583
            self::$SUPPORT['intl'] === true
9584
        ) {
9585
            $returnTmp = \grapheme_strripos($haystack, $needle, $offset);
9586
            if ($returnTmp !== false) {
9587
                return $returnTmp;
9588
            }
9589
        }
9590
9591
        //
9592
        // fallback for ascii only
9593
        //
9594
9595
        if (self::is_ascii($haystack . $needle)) {
9596
            return \strripos($haystack, $needle, $offset);
9597
        }
9598
9599
        //
9600
        // fallback via vanilla php
9601
        //
9602
9603
        $haystack = self::strtocasefold($haystack, true, false, $encoding);
9604
        $needle = self::strtocasefold($needle, true, false, $encoding);
9605
9606
        return self::strrpos($haystack, $needle, $offset, $encoding, $cleanUtf8);
9607
    }
9608
9609
    /**
9610
     * Finds position of last occurrence of a string within another, case insensitive.
9611
     *
9612
     * @param string $haystack <p>
9613
     *                         The string from which to get the position of the last occurrence
9614
     *                         of needle.
9615
     *                         </p>
9616
     * @param string $needle   <p>
9617
     *                         The string to find in haystack.
9618
     *                         </p>
9619
     * @param int    $offset   [optional] <p>
9620
     *                         The position in haystack
9621
     *                         to start searching.
9622
     *                         </p>
9623
     *
9624
     * @return false|int return the numeric position of the last occurrence of needle in the
9625
     *                   haystack string, or false if needle is not found
9626
     */
9627
    public static function strripos_in_byte(string $haystack, string $needle, int $offset = 0)
9628
    {
9629
        if ($haystack === '' || $needle === '') {
9630
            return false;
9631
        }
9632
9633
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
9634
            // "mb_" is available if overload is used, so use it ...
9635
            return \mb_strripos($haystack, $needle, $offset, 'CP850'); // 8-BIT
9636
        }
9637
9638
        return \strripos($haystack, $needle, $offset);
9639
    }
9640
9641
    /**
9642
     * Find position of last occurrence of a string in a string.
9643
     *
9644
     * @see http://php.net/manual/en/function.mb-strrpos.php
9645
     *
9646
     * @param string     $haystack  <p>The string being checked, for the last occurrence of needle</p>
9647
     * @param int|string $needle    <p>The string to find in haystack.<br>Or a code point as int.</p>
9648
     * @param int        $offset    [optional] <p>May be specified to begin searching an arbitrary number of characters
9649
     *                              into the string. Negative values will stop searching at an arbitrary point prior to
9650
     *                              the end of the string.
9651
     *                              </p>
9652
     * @param string     $encoding  [optional] <p>Set the charset.</p>
9653
     * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9654
     *
9655
     * @return false|int
9656
     *                   The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
9657
     *                   string.<br>If needle is not found, it returns false.
9658
     */
9659
    public static function strrpos(
9660
        string $haystack,
9661
        $needle,
9662
        int $offset = 0,
9663
        string $encoding = 'UTF-8',
9664
        bool $cleanUtf8 = false
9665
    ) {
9666 35
        if ($haystack === '') {
9667 3
            return false;
9668
        }
9669
9670
        // iconv and mbstring do not support integer $needle
9671 34
        if ((int) $needle === $needle && $needle >= 0) {
9672 2
            $needle = (string) self::chr($needle);
9673
        }
9674 34
        $needle = (string) $needle;
9675
9676 34
        if ($needle === '') {
9677 2
            return false;
9678
        }
9679
9680 34
        if ($cleanUtf8 === true) {
9681
            // \mb_strrpos && iconv_strrpos is not tolerant to invalid characters
9682 4
            $needle = self::clean($needle);
9683 4
            $haystack = self::clean($haystack);
9684
        }
9685
9686 34
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9687 8
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9688
        }
9689
9690
        //
9691
        // fallback via mbstring
9692
        //
9693
9694 34
        if (self::$SUPPORT['mbstring'] === true) {
9695 34
            if ($encoding === 'UTF-8') {
9696 34
                return \mb_strrpos($haystack, $needle, $offset);
9697
            }
9698
9699 2
            return \mb_strrpos($haystack, $needle, $offset, $encoding);
9700
        }
9701
9702
        //
9703
        // fallback for binary || ascii only
9704
        //
9705
9706
        if (
9707
            $encoding === 'CP850'
9708
            ||
9709
            $encoding === 'ASCII'
9710
        ) {
9711
            return \strrpos($haystack, $needle, $offset);
9712
        }
9713
9714
        if (
9715
            $encoding !== 'UTF-8'
9716
            &&
9717
            self::$SUPPORT['mbstring'] === false
9718
        ) {
9719
            \trigger_error('UTF8::strrpos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9720
        }
9721
9722
        //
9723
        // fallback via intl
9724
        //
9725
9726
        if (
9727
            $offset >= 0 // grapheme_strrpos() can't handle negative offset
9728
            &&
9729
            $encoding === 'UTF-8' // INFO: "grapheme_strrpos()" can't handle other encodings
9730
            &&
9731
            self::$SUPPORT['intl'] === true
9732
        ) {
9733
            $returnTmp = \grapheme_strrpos($haystack, $needle, $offset);
9734
            if ($returnTmp !== false) {
9735
                return $returnTmp;
9736
            }
9737
        }
9738
9739
        //
9740
        // fallback for ascii only
9741
        //
9742
9743
        if (self::is_ascii($haystack . $needle)) {
9744
            return \strrpos($haystack, $needle, $offset);
9745
        }
9746
9747
        //
9748
        // fallback via vanilla php
9749
        //
9750
9751
        $haystackTmp = null;
9752
        if ($offset > 0) {
9753
            $haystackTmp = self::substr($haystack, $offset);
9754
        } elseif ($offset < 0) {
9755
            $haystackTmp = self::substr($haystack, 0, $offset);
9756
            $offset = 0;
9757
        }
9758
9759
        if ($haystackTmp !== null) {
9760
            if ($haystackTmp === false) {
9761
                $haystackTmp = '';
9762
            }
9763
            $haystack = (string) $haystackTmp;
9764
        }
9765
9766
        $pos = \strrpos($haystack, $needle);
9767
        if ($pos === false) {
9768
            return false;
9769
        }
9770
9771
        $strTmp = \substr($haystack, 0, $pos);
9772
        if ($strTmp === false) {
9773
            return false;
9774
        }
9775
9776
        return $offset + (int) self::strlen($strTmp);
9777
    }
9778
9779
    /**
9780
     * Find position of last occurrence of a string in a string.
9781
     *
9782
     * @param string $haystack <p>
9783
     *                         The string being checked, for the last occurrence
9784
     *                         of needle.
9785
     *                         </p>
9786
     * @param string $needle   <p>
9787
     *                         The string to find in haystack.
9788
     *                         </p>
9789
     * @param int    $offset   [optional] May be specified to begin searching an arbitrary number of characters into
9790
     *                         the string. Negative values will stop searching at an arbitrary point
9791
     *                         prior to the end of the string.
9792
     *
9793
     * @return false|int The numeric position of the last occurrence of needle in the
9794
     *                   haystack string. If needle is not found, it returns false.
9795
     */
9796
    public static function strrpos_in_byte(string $haystack, string $needle, int $offset = 0)
9797
    {
9798
        if ($haystack === '' || $needle === '') {
9799
            return false;
9800
        }
9801
9802
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
9803
            // "mb_" is available if overload is used, so use it ...
9804
            return \mb_strrpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
9805
        }
9806
9807
        return \strrpos($haystack, $needle, $offset);
9808
    }
9809
9810
    /**
9811
     * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
9812
     * mask.
9813
     *
9814
     * @param string $str      <p>The input string.</p>
9815
     * @param string $mask     <p>The mask of chars</p>
9816
     * @param int    $offset   [optional]
9817
     * @param int    $length   [optional]
9818
     * @param string $encoding [optional] <p>Set the charset.</p>
9819
     *
9820
     * @return false|int
9821
     */
9822
    public static function strspn(
9823
        string $str,
9824
        string $mask,
9825
        int $offset = 0,
9826
        int $length = null,
9827
        string $encoding = 'UTF-8'
9828
    ) {
9829 10
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9830
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9831
        }
9832
9833 10
        if ($offset || $length !== null) {
9834 2
            if ($encoding === 'UTF-8') {
9835 2
                if ($length === null) {
9836
                    $str = (string) \mb_substr($str, $offset);
9837
                } else {
9838 2
                    $str = (string) \mb_substr($str, $offset, $length);
9839
                }
9840
            } else {
9841
                $str = (string) self::substr($str, $offset, $length, $encoding);
9842
            }
9843
        }
9844
9845 10
        if ($str === '' || $mask === '') {
9846 2
            return 0;
9847
        }
9848
9849 8
        $matches = [];
9850
9851 8
        return \preg_match('/^' . self::rxClass($mask) . '+/u', $str, $matches) ? (int) self::strlen($matches[0], $encoding) : 0;
9852
    }
9853
9854
    /**
9855
     * Returns part of haystack string from the first occurrence of needle to the end of haystack.
9856
     *
9857
     * @param string $haystack      <p>The input string. Must be valid UTF-8.</p>
9858
     * @param string $needle        <p>The string to look for. Must be valid UTF-8.</p>
9859
     * @param bool   $before_needle [optional] <p>
9860
     *                              If <b>TRUE</b>, strstr() returns the part of the
9861
     *                              haystack before the first occurrence of the needle (excluding the needle).
9862
     *                              </p>
9863
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
9864
     * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
9865
     *
9866
     * @return false|string
9867
     *                      A sub-string,<br>or <strong>false</strong> if needle is not found
9868
     */
9869
    public static function strstr(
9870
        string $haystack,
9871
        string $needle,
9872
        bool $before_needle = false,
9873
        string $encoding = 'UTF-8',
9874
        $cleanUtf8 = false
9875
    ) {
9876 3
        if ($haystack === '' || $needle === '') {
9877 2
            return false;
9878
        }
9879
9880 3
        if ($cleanUtf8 === true) {
9881
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9882
            // if invalid characters are found in $haystack before $needle
9883
            $needle = self::clean($needle);
9884
            $haystack = self::clean($haystack);
9885
        }
9886
9887 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9888 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9889
        }
9890
9891
        //
9892
        // fallback via mbstring
9893
        //
9894
9895 3
        if (self::$SUPPORT['mbstring'] === true) {
9896 3
            if ($encoding === 'UTF-8') {
9897 3
                return \mb_strstr($haystack, $needle, $before_needle);
9898
            }
9899
9900 2
            return \mb_strstr($haystack, $needle, $before_needle, $encoding);
9901
        }
9902
9903
        //
9904
        // fallback for binary || ascii only
9905
        //
9906
9907
        if (
9908
            $encoding === 'CP850'
9909
            ||
9910
            $encoding === 'ASCII'
9911
        ) {
9912
            return \strstr($haystack, $needle, $before_needle);
9913
        }
9914
9915
        if (
9916
            $encoding !== 'UTF-8'
9917
            &&
9918
            self::$SUPPORT['mbstring'] === false
9919
        ) {
9920
            \trigger_error('UTF8::strstr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9921
        }
9922
9923
        //
9924
        // fallback via intl
9925
        //
9926
9927
        if (
9928
            $encoding === 'UTF-8' // INFO: "grapheme_strstr()" can't handle other encodings
9929
            &&
9930
            self::$SUPPORT['intl'] === true
9931
        ) {
9932
            $returnTmp = \grapheme_strstr($haystack, $needle, $before_needle);
9933
            if ($returnTmp !== false) {
9934
                return $returnTmp;
9935
            }
9936
        }
9937
9938
        //
9939
        // fallback for ascii only
9940
        //
9941
9942
        if (self::is_ascii($haystack . $needle)) {
9943
            return \strstr($haystack, $needle, $before_needle);
9944
        }
9945
9946
        //
9947
        // fallback via vanilla php
9948
        //
9949
9950
        \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/us', $haystack, $match);
9951
9952
        if (!isset($match[1])) {
9953
            return false;
9954
        }
9955
9956
        if ($before_needle) {
9957
            return $match[1];
9958
        }
9959
9960
        return self::substr($haystack, (int) self::strlen($match[1]));
9961
    }
9962
9963
    /**
9964
     *  * Finds first occurrence of a string within another.
9965
     *
9966
     * @param string $haystack      <p>
9967
     *                              The string from which to get the first occurrence
9968
     *                              of needle.
9969
     *                              </p>
9970
     * @param string $needle        <p>
9971
     *                              The string to find in haystack.
9972
     *                              </p>
9973
     * @param bool   $before_needle [optional] <p>
9974
     *                              Determines which portion of haystack
9975
     *                              this function returns.
9976
     *                              If set to true, it returns all of haystack
9977
     *                              from the beginning to the first occurrence of needle.
9978
     *                              If set to false, it returns all of haystack
9979
     *                              from the first occurrence of needle to the end,
9980
     *                              </p>
9981
     *
9982
     * @return false|string the portion of haystack,
9983
     *                      or false if needle is not found
9984
     */
9985
    public static function strstr_in_byte(string $haystack, string $needle, bool $before_needle = false)
9986
    {
9987
        if ($haystack === '' || $needle === '') {
9988
            return false;
9989
        }
9990
9991
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
9992
            // "mb_" is available if overload is used, so use it ...
9993
            return \mb_strstr($haystack, $needle, $before_needle, 'CP850'); // 8-BIT
9994
        }
9995
9996
        return \strstr($haystack, $needle, $before_needle);
9997
    }
9998
9999
    /**
10000
     * Unicode transformation for case-less matching.
10001
     *
10002
     * @see http://unicode.org/reports/tr21/tr21-5.html
10003
     *
10004
     * @param string      $str       <p>The input string.</p>
10005
     * @param bool        $full      [optional] <p>
10006
     *                               <b>true</b>, replace full case folding chars (default)<br>
10007
     *                               <b>false</b>, use only limited static array [UTF8::$COMMON_CASE_FOLD]
10008
     *                               </p>
10009
     * @param bool        $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10010
     * @param string      $encoding  [optional] <p>Set the charset.</p>
10011
     * @param string|null $lang      [optional] <p>Set the language for special cases: az, el, lt, tr</p>
10012
     * @param bool        $lower     [optional] <p>Use lowercase string, otherwise use uppercase string. PS: uppercase
10013
     *                               is for some languages better ...</p>
10014
     *
10015
     * @return string
10016
     */
10017
    public static function strtocasefold(
10018
        string $str,
10019
        bool $full = true,
10020
        bool $cleanUtf8 = false,
10021
        string $encoding = 'UTF-8',
10022
        string $lang = null,
10023
        $lower = true
10024
    ): string {
10025 32
        if ($str === '') {
10026 5
            return '';
10027
        }
10028
10029 31
        if ($cleanUtf8 === true) {
10030
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10031
            // if invalid characters are found in $haystack before $needle
10032 2
            $str = self::clean($str);
10033
        }
10034
10035 31
        $str = self::fixStrCaseHelper($str, $lower, $full);
10036
10037 31
        if ($lang === null && $encoding === 'UTF-8') {
10038 31
            if ($lower === true) {
10039 2
                return \mb_strtolower($str);
10040
            }
10041
10042 29
            return \mb_strtoupper($str);
10043
        }
10044
10045 2
        if ($lower === true) {
10046
            return self::strtolower($str, $encoding, false, $lang);
10047
        }
10048
10049 2
        return self::strtoupper($str, $encoding, false, $lang);
10050
    }
10051
10052
    /**
10053
     * Make a string lowercase.
10054
     *
10055
     * @see http://php.net/manual/en/function.mb-strtolower.php
10056
     *
10057
     * @param string      $str                   <p>The string being lowercased.</p>
10058
     * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
10059
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
10060
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
10061
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
10062
     *
10063
     * @return string
10064
     *                <p>String with all alphabetic characters converted to lowercase.</p>
10065
     */
10066
    public static function strtolower(
10067
        $str,
10068
        string $encoding = 'UTF-8',
10069
        bool $cleanUtf8 = false,
10070
        string $lang = null,
10071
        bool $tryToKeepStringLength = false
10072
    ): string {
10073
        // init
10074 73
        $str = (string) $str;
10075
10076 73
        if ($str === '') {
10077 1
            return '';
10078
        }
10079
10080 72
        if ($cleanUtf8 === true) {
10081
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10082
            // if invalid characters are found in $haystack before $needle
10083 2
            $str = self::clean($str);
10084
        }
10085
10086
        // hack for old php version or for the polyfill ...
10087 72
        if ($tryToKeepStringLength === true) {
10088
            $str = self::fixStrCaseHelper($str, true);
10089
        }
10090
10091 72
        if ($lang === null && $encoding === 'UTF-8') {
10092 13
            return \mb_strtolower($str);
10093
        }
10094
10095 61
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
10096
10097 61
        if ($lang !== null) {
10098 2
            if (self::$SUPPORT['intl'] === true) {
10099 2
                if (self::$INTL_TRANSLITERATOR_LIST === null) {
10100
                    self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
10101
                }
10102
10103 2
                $langCode = $lang . '-Lower';
10104 2
                if (!\in_array($langCode, self::$INTL_TRANSLITERATOR_LIST, true)) {
10105
                    \trigger_error('UTF8::strtolower() cannot handle special language: ' . $lang, \E_USER_WARNING);
10106
10107
                    $langCode = 'Any-Lower';
10108
                }
10109
10110
                /** @noinspection PhpComposerExtensionStubsInspection */
10111
                /** @noinspection UnnecessaryCastingInspection */
10112 2
                return (string) \transliterator_transliterate($langCode, $str);
10113
            }
10114
10115
            \trigger_error('UTF8::strtolower() without intl cannot handle the "lang" parameter: ' . $lang, \E_USER_WARNING);
10116
        }
10117
10118
        // always fallback via symfony polyfill
10119 61
        return \mb_strtolower($str, $encoding);
10120
    }
10121
10122
    /**
10123
     * Make a string uppercase.
10124
     *
10125
     * @see http://php.net/manual/en/function.mb-strtoupper.php
10126
     *
10127
     * @param string      $str                   <p>The string being uppercased.</p>
10128
     * @param string      $encoding              [optional] <p>Set the charset.</p>
10129
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
10130
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
10131
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
10132
     *
10133
     * @return string
10134
     *                <p>String with all alphabetic characters converted to uppercase.</p>
10135
     */
10136
    public static function strtoupper(
10137
        $str,
10138
        string $encoding = 'UTF-8',
10139
        bool $cleanUtf8 = false,
10140
        string $lang = null,
10141
        bool $tryToKeepStringLength = false
10142
    ): string {
10143
        // init
10144 17
        $str = (string) $str;
10145
10146 17
        if ($str === '') {
10147 1
            return '';
10148
        }
10149
10150 16
        if ($cleanUtf8 === true) {
10151
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10152
            // if invalid characters are found in $haystack before $needle
10153 2
            $str = self::clean($str);
10154
        }
10155
10156
        // hack for old php version or for the polyfill ...
10157 16
        if ($tryToKeepStringLength === true) {
10158 2
            $str = self::fixStrCaseHelper($str, false);
10159
        }
10160
10161 16
        if ($lang === null && $encoding === 'UTF-8') {
10162 8
            return \mb_strtoupper($str);
10163
        }
10164
10165 10
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
10166
10167 10
        if ($lang !== null) {
10168 2
            if (self::$SUPPORT['intl'] === true) {
10169 2
                if (self::$INTL_TRANSLITERATOR_LIST === null) {
10170
                    self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
10171
                }
10172
10173 2
                $langCode = $lang . '-Upper';
10174 2
                if (!\in_array($langCode, self::$INTL_TRANSLITERATOR_LIST, true)) {
10175
                    \trigger_error('UTF8::strtoupper() without intl for special language: ' . $lang, \E_USER_WARNING);
10176
10177
                    $langCode = 'Any-Upper';
10178
                }
10179
10180
                /** @noinspection PhpComposerExtensionStubsInspection */
10181
                /** @noinspection UnnecessaryCastingInspection */
10182 2
                return (string) \transliterator_transliterate($langCode, $str);
10183
            }
10184
10185
            \trigger_error('UTF8::strtolower() without intl cannot handle the "lang"-parameter: ' . $lang, \E_USER_WARNING);
10186
        }
10187
10188
        // always fallback via symfony polyfill
10189 10
        return \mb_strtoupper($str, $encoding);
10190
    }
10191
10192
    /**
10193
     * Translate characters or replace sub-strings.
10194
     *
10195
     * @see http://php.net/manual/en/function.strtr.php
10196
     *
10197
     * @param string          $str  <p>The string being translated.</p>
10198
     * @param string|string[] $from <p>The string replacing from.</p>
10199
     * @param string|string[] $to   [optional] <p>The string being translated to to.</p>
10200
     *
10201
     * @return string
10202
     *                This function returns a copy of str, translating all occurrences of each character in from to the
10203
     *                corresponding character in to
10204
     */
10205
    public static function strtr(string $str, $from, $to = ''): string
10206
    {
10207 2
        if ($str === '') {
10208
            return '';
10209
        }
10210
10211 2
        if ($from === $to) {
10212
            return $str;
10213
        }
10214
10215 2
        if ($to !== '') {
10216 2
            $from = self::str_split($from);
10217 2
            $to = self::str_split($to);
10218 2
            $countFrom = \count($from);
10219 2
            $countTo = \count($to);
10220
10221 2
            if ($countFrom > $countTo) {
10222 2
                $from = \array_slice($from, 0, $countTo);
10223 2
            } elseif ($countFrom < $countTo) {
10224 2
                $to = \array_slice($to, 0, $countFrom);
10225
            }
10226
10227 2
            $from = \array_combine($from, $to);
10228 2
            if ($from === false) {
10229
                throw new \InvalidArgumentException('The number of elements for each array isn\'t equal or the arrays are empty: (from: ' . \print_r($from, true) . ' | to: ' . \print_r($to, true) . ')');
10230
            }
10231
        }
10232
10233 2
        if (\is_string($from)) {
10234 2
            return \str_replace($from, '', $str);
10235
        }
10236
10237 2
        return \strtr($str, $from);
10238
    }
10239
10240
    /**
10241
     * Return the width of a string.
10242
     *
10243
     * @param string $str       <p>The input string.</p>
10244
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
10245
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10246
     *
10247
     * @return int
10248
     */
10249
    public static function strwidth(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): int
10250
    {
10251 2
        if ($str === '') {
10252 2
            return 0;
10253
        }
10254
10255 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10256 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10257
        }
10258
10259 2
        if ($cleanUtf8 === true) {
10260
            // iconv and mbstring are not tolerant to invalid encoding
10261
            // further, their behaviour is inconsistent with that of PHP's substr
10262 2
            $str = self::clean($str);
10263
        }
10264
10265
        //
10266
        // fallback via mbstring
10267
        //
10268
10269 2
        if (self::$SUPPORT['mbstring'] === true) {
10270 2
            if ($encoding === 'UTF-8') {
10271 2
                return \mb_strwidth($str);
10272
            }
10273
10274
            return \mb_strwidth($str, $encoding);
10275
        }
10276
10277
        //
10278
        // fallback via vanilla php
10279
        //
10280
10281
        if ($encoding !== 'UTF-8') {
10282
            $str = self::encode('UTF-8', $str, false, $encoding);
10283
        }
10284
10285
        $wide = 0;
10286
        $str = (string) \preg_replace('/[\x{1100}-\x{115F}\x{2329}\x{232A}\x{2E80}-\x{303E}\x{3040}-\x{A4CF}\x{AC00}-\x{D7A3}\x{F900}-\x{FAFF}\x{FE10}-\x{FE19}\x{FE30}-\x{FE6F}\x{FF00}-\x{FF60}\x{FFE0}-\x{FFE6}\x{20000}-\x{2FFFD}\x{30000}-\x{3FFFD}]/u', '', $str, -1, $wide);
10287
10288
        return ($wide << 1) + (int) self::strlen($str, 'UTF-8');
10289
    }
10290
10291
    /**
10292
     * Get part of a string.
10293
     *
10294
     * @see http://php.net/manual/en/function.mb-substr.php
10295
     *
10296
     * @param string $str       <p>The string being checked.</p>
10297
     * @param int    $offset    <p>The first position used in str.</p>
10298
     * @param int    $length    [optional] <p>The maximum length of the returned string.</p>
10299
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
10300
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10301
     *
10302
     * @return false|string
10303
     *                      The portion of <i>str</i> specified by the <i>offset</i> and
10304
     *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
10305
     *                      characters long, <b>FALSE</b> will be returned.
10306
     */
10307
    public static function substr(
10308
        string $str,
10309
        int $offset = 0,
10310
        int $length = null,
10311
        string $encoding = 'UTF-8',
10312
        bool $cleanUtf8 = false
10313
    ) {
10314
        // empty string
10315 172
        if ($str === '' || $length === 0) {
10316 8
            return '';
10317
        }
10318
10319 168
        if ($cleanUtf8 === true) {
10320
            // iconv and mbstring are not tolerant to invalid encoding
10321
            // further, their behaviour is inconsistent with that of PHP's substr
10322 2
            $str = self::clean($str);
10323
        }
10324
10325
        // whole string
10326 168
        if (!$offset && $length === null) {
10327 7
            return $str;
10328
        }
10329
10330 163
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10331 19
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10332
        }
10333
10334
        //
10335
        // fallback via mbstring
10336
        //
10337
10338 163
        if (self::$SUPPORT['mbstring'] === true) {
10339 161
            if ($encoding === 'UTF-8') {
10340 161
                if ($length === null) {
10341 64
                    return \mb_substr($str, $offset);
10342
                }
10343
10344 102
                return \mb_substr($str, $offset, $length);
10345
            }
10346
10347
            return self::substr($str, $offset, $length, $encoding);
10348
        }
10349
10350
        //
10351
        // fallback for binary || ascii only
10352
        //
10353
10354
        if (
10355 4
            $encoding === 'CP850'
10356
            ||
10357 4
            $encoding === 'ASCII'
10358
        ) {
10359
            if ($length === null) {
10360
                return \substr($str, $offset);
10361
            }
10362
10363
            return \substr($str, $offset, $length);
10364
        }
10365
10366
        // otherwise we need the string-length
10367 4
        $str_length = 0;
10368 4
        if ($offset || $length === null) {
10369 4
            $str_length = self::strlen($str, $encoding);
10370
        }
10371
10372
        // e.g.: invalid chars + mbstring not installed
10373 4
        if ($str_length === false) {
10374
            return false;
10375
        }
10376
10377
        // empty string
10378 4
        if ($offset === $str_length && !$length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type integer|null is loosely compared to false; this is ambiguous if the integer can be 0. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
10379
            return '';
10380
        }
10381
10382
        // impossible
10383 4
        if ($offset && $offset > $str_length) {
10384
            return '';
10385
        }
10386
10387 4
        if ($length === null) {
10388 4
            $length = (int) $str_length;
10389
        } else {
10390 2
            $length = (int) $length;
10391
        }
10392
10393
        if (
10394 4
            $encoding !== 'UTF-8'
10395
            &&
10396 4
            self::$SUPPORT['mbstring'] === false
10397
        ) {
10398 2
            \trigger_error('UTF8::substr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10399
        }
10400
10401
        //
10402
        // fallback via intl
10403
        //
10404
10405
        if (
10406 4
            $encoding === 'UTF-8' // INFO: "grapheme_substr()" can't handle other encodings
10407
            &&
10408 4
            $offset >= 0 // grapheme_substr() can't handle negative offset
10409
            &&
10410 4
            self::$SUPPORT['intl'] === true
10411
        ) {
10412
            $returnTmp = \grapheme_substr($str, $offset, $length);
10413
            if ($returnTmp !== false) {
10414
                return $returnTmp;
10415
            }
10416
        }
10417
10418
        //
10419
        // fallback via iconv
10420
        //
10421
10422
        if (
10423 4
            $length >= 0 // "iconv_substr()" can't handle negative length
10424
            &&
10425 4
            self::$SUPPORT['iconv'] === true
10426
        ) {
10427
            $returnTmp = \iconv_substr($str, $offset, $length);
10428
            if ($returnTmp !== false) {
10429
                return $returnTmp;
10430
            }
10431
        }
10432
10433
        //
10434
        // fallback for ascii only
10435
        //
10436
10437 4
        if (self::is_ascii($str)) {
10438
            return \substr($str, $offset, $length);
10439
        }
10440
10441
        //
10442
        // fallback via vanilla php
10443
        //
10444
10445
        // split to array, and remove invalid characters
10446 4
        $array = self::str_split($str);
10447
10448
        // extract relevant part, and join to make sting again
10449 4
        return \implode('', \array_slice($array, $offset, $length));
10450
    }
10451
10452
    /**
10453
     * Binary safe comparison of two strings from an offset, up to length characters.
10454
     *
10455
     * @param string   $str1               <p>The main string being compared.</p>
10456
     * @param string   $str2               <p>The secondary string being compared.</p>
10457
     * @param int      $offset             [optional] <p>The start position for the comparison. If negative, it starts
10458
     *                                     counting from the end of the string.</p>
10459
     * @param int|null $length             [optional] <p>The length of the comparison. The default value is the largest
10460
     *                                     of the length of the str compared to the length of main_str less the
10461
     *                                     offset.</p>
10462
     * @param bool     $case_insensitivity [optional] <p>If case_insensitivity is TRUE, comparison is case
10463
     *                                     insensitive.</p>
10464
     * @param string   $encoding           [optional] <p>Set the charset for e.g. "mb_" function</p>
10465
     *
10466
     * @return int
10467
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
10468
     *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
10469
     *             <strong>0</strong> if they are equal
10470
     */
10471
    public static function substr_compare(
10472
        string $str1,
10473
        string $str2,
10474
        int $offset = 0,
10475
        int $length = null,
10476
        bool $case_insensitivity = false,
10477
        string $encoding = 'UTF-8'
10478
    ): int {
10479
        if (
10480 2
            $offset !== 0
10481
            ||
10482 2
            $length !== null
10483
        ) {
10484 2
            if ($encoding === 'UTF-8') {
10485 2
                if ($length === null) {
10486 2
                    $str1 = (string) \mb_substr($str1, $offset);
10487
                } else {
10488 2
                    $str1 = (string) \mb_substr($str1, $offset, $length);
10489
                }
10490 2
                $str2 = (string) \mb_substr($str2, 0, (int) self::strlen($str1));
10491
            } else {
10492
                $encoding = self::normalize_encoding($encoding, 'UTF-8');
10493
10494
                $str1 = (string) self::substr($str1, $offset, $length, $encoding);
10495
                $str2 = (string) self::substr($str2, 0, (int) self::strlen($str1), $encoding);
10496
            }
10497
        }
10498
10499 2
        if ($case_insensitivity === true) {
10500 2
            return self::strcasecmp($str1, $str2, $encoding);
10501
        }
10502
10503 2
        return self::strcmp($str1, $str2);
10504
    }
10505
10506
    /**
10507
     * Count the number of substring occurrences.
10508
     *
10509
     * @see http://php.net/manual/en/function.substr-count.php
10510
     *
10511
     * @param string $haystack  <p>The string to search in.</p>
10512
     * @param string $needle    <p>The substring to search for.</p>
10513
     * @param int    $offset    [optional] <p>The offset where to start counting.</p>
10514
     * @param int    $length    [optional] <p>
10515
     *                          The maximum length after the specified offset to search for the
10516
     *                          substring. It outputs a warning if the offset plus the length is
10517
     *                          greater than the haystack length.
10518
     *                          </p>
10519
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
10520
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10521
     *
10522
     * @return false|int this functions returns an integer or false if there isn't a string
10523
     */
10524
    public static function substr_count(
10525
        string $haystack,
10526
        string $needle,
10527
        int $offset = 0,
10528
        int $length = null,
10529
        string $encoding = 'UTF-8',
10530
        bool $cleanUtf8 = false
10531
    ) {
10532 5
        if ($haystack === '' || $needle === '') {
10533 2
            return false;
10534
        }
10535
10536 5
        if ($length === 0) {
10537 2
            return 0;
10538
        }
10539
10540 5
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10541 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10542
        }
10543
10544 5
        if ($cleanUtf8 === true) {
10545
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10546
            // if invalid characters are found in $haystack before $needle
10547
            $needle = self::clean($needle);
10548
            $haystack = self::clean($haystack);
10549
        }
10550
10551 5
        if ($offset || $length > 0) {
10552 2
            if ($length === null) {
10553 2
                $lengthTmp = self::strlen($haystack, $encoding);
10554 2
                if ($lengthTmp === false) {
10555
                    return false;
10556
                }
10557 2
                $length = (int) $lengthTmp;
10558
            }
10559
10560 2
            if ($encoding === 'UTF-8') {
10561 2
                $haystack = (string) \mb_substr($haystack, $offset, $length);
10562
            } else {
10563 2
                $haystack = (string) \mb_substr($haystack, $offset, $length, $encoding);
10564
            }
10565
        }
10566
10567
        if (
10568 5
            $encoding !== 'UTF-8'
10569
            &&
10570 5
            self::$SUPPORT['mbstring'] === false
10571
        ) {
10572
            \trigger_error('UTF8::substr_count() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10573
        }
10574
10575 5
        if (self::$SUPPORT['mbstring'] === true) {
10576 5
            if ($encoding === 'UTF-8') {
10577 5
                return \mb_substr_count($haystack, $needle);
10578
            }
10579
10580 2
            return \mb_substr_count($haystack, $needle, $encoding);
10581
        }
10582
10583
        \preg_match_all('/' . \preg_quote($needle, '/') . '/us', $haystack, $matches, \PREG_SET_ORDER);
10584
10585
        return \count($matches);
10586
    }
10587
10588
    /**
10589
     * Count the number of substring occurrences.
10590
     *
10591
     * @param string $haystack <p>
10592
     *                         The string being checked.
10593
     *                         </p>
10594
     * @param string $needle   <p>
10595
     *                         The string being found.
10596
     *                         </p>
10597
     * @param int    $offset   [optional] <p>
10598
     *                         The offset where to start counting
10599
     *                         </p>
10600
     * @param int    $length   [optional] <p>
10601
     *                         The maximum length after the specified offset to search for the
10602
     *                         substring. It outputs a warning if the offset plus the length is
10603
     *                         greater than the haystack length.
10604
     *                         </p>
10605
     *
10606
     * @return false|int the number of times the
10607
     *                   needle substring occurs in the
10608
     *                   haystack string
10609
     */
10610
    public static function substr_count_in_byte(
10611
        string $haystack,
10612
        string $needle,
10613
        int $offset = 0,
10614
        int $length = null
10615
    ) {
10616
        if ($haystack === '' || $needle === '') {
10617
            return 0;
10618
        }
10619
10620
        if (
10621
            ($offset || $length !== null)
10622
            &&
10623
            self::$SUPPORT['mbstring_func_overload'] === true
10624
        ) {
10625
            if ($length === null) {
10626
                $lengthTmp = self::strlen($haystack);
10627
                if ($lengthTmp === false) {
10628
                    return false;
10629
                }
10630
                $length = (int) $lengthTmp;
10631
            }
10632
10633
            if (
10634
                (
10635
                    $length !== 0
10636
                    &&
10637
                    $offset !== 0
10638
                )
10639
                &&
10640
                ($length + $offset) <= 0
10641
                &&
10642
                Bootup::is_php('7.1') === false // output from "substr_count()" have changed in PHP 7.1
10643
            ) {
10644
                return false;
10645
            }
10646
10647
            $haystackTmp = \substr($haystack, $offset, $length);
10648
            if ($haystackTmp === false) {
10649
                $haystackTmp = '';
10650
            }
10651
            $haystack = (string) $haystackTmp;
10652
        }
10653
10654
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10655
            // "mb_" is available if overload is used, so use it ...
10656
            return \mb_substr_count($haystack, $needle, 'CP850'); // 8-BIT
10657
        }
10658
10659
        if ($length === null) {
10660
            return \substr_count($haystack, $needle, $offset);
10661
        }
10662
10663
        return \substr_count($haystack, $needle, $offset, $length);
10664
    }
10665
10666
    /**
10667
     * Returns the number of occurrences of $substring in the given string.
10668
     * By default, the comparison is case-sensitive, but can be made insensitive
10669
     * by setting $caseSensitive to false.
10670
     *
10671
     * @param string $str           <p>The input string.</p>
10672
     * @param string $substring     <p>The substring to search for.</p>
10673
     * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
10674
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
10675
     *
10676
     * @return int
10677
     */
10678
    public static function substr_count_simple(
10679
        string $str,
10680
        string $substring,
10681
        bool $caseSensitive = true,
10682
        string $encoding = 'UTF-8'
10683
    ): int {
10684 15
        if ($str === '' || $substring === '') {
10685 2
            return 0;
10686
        }
10687
10688 13
        if ($encoding === 'UTF-8') {
10689 7
            if ($caseSensitive) {
10690
                return (int) \mb_substr_count($str, $substring);
10691
            }
10692
10693 7
            return (int) \mb_substr_count(
10694 7
                \mb_strtoupper($str),
10695 7
                \mb_strtoupper($substring)
10696
10697
            );
10698
        }
10699
10700 6
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
10701
10702 6
        if ($caseSensitive) {
10703 3
            return (int) \mb_substr_count($str, $substring, $encoding);
10704
        }
10705
10706 3
        return (int) \mb_substr_count(
10707 3
            self::strtocasefold($str, true, false, $encoding, null, false),
10708 3
            self::strtocasefold($substring, true, false, $encoding, null, false),
10709 3
            $encoding
10710
        );
10711
    }
10712
10713
    /**
10714
     * Removes an prefix ($needle) from start of the string ($haystack), case insensitive.
10715
     *
10716
     * @param string $haystack <p>The string to search in.</p>
10717
     * @param string $needle   <p>The substring to search for.</p>
10718
     *
10719
     * @return string return the sub-string
10720
     */
10721
    public static function substr_ileft(string $haystack, string $needle): string
10722
    {
10723 2
        if ($haystack === '') {
10724 2
            return '';
10725
        }
10726
10727 2
        if ($needle === '') {
10728 2
            return $haystack;
10729
        }
10730
10731 2
        if (self::str_istarts_with($haystack, $needle) === true) {
10732 2
            $haystack = (string) \mb_substr($haystack, (int) self::strlen($needle));
10733
        }
10734
10735 2
        return $haystack;
10736
    }
10737
10738
    /**
10739
     * Get part of a string process in bytes.
10740
     *
10741
     * @param string $str    <p>The string being checked.</p>
10742
     * @param int    $offset <p>The first position used in str.</p>
10743
     * @param int    $length [optional] <p>The maximum length of the returned string.</p>
10744
     *
10745
     * @return false|string
10746
     *                      The portion of <i>str</i> specified by the <i>offset</i> and
10747
     *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
10748
     *                      characters long, <b>FALSE</b> will be returned.
10749
     */
10750
    public static function substr_in_byte(string $str, int $offset = 0, int $length = null)
10751
    {
10752
        // empty string
10753
        if ($str === '' || $length === 0) {
10754
            return '';
10755
        }
10756
10757
        // whole string
10758
        if (!$offset && $length === null) {
10759
            return $str;
10760
        }
10761
10762
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10763
            // "mb_" is available if overload is used, so use it ...
10764
            return \mb_substr($str, $offset, $length, 'CP850'); // 8-BIT
10765
        }
10766
10767
        return \substr($str, $offset, $length ?? 2147483647);
10768
    }
10769
10770
    /**
10771
     * Removes an suffix ($needle) from end of the string ($haystack), case insensitive.
10772
     *
10773
     * @param string $haystack <p>The string to search in.</p>
10774
     * @param string $needle   <p>The substring to search for.</p>
10775
     *
10776
     * @return string return the sub-string
10777
     */
10778
    public static function substr_iright(string $haystack, string $needle): string
10779
    {
10780 2
        if ($haystack === '') {
10781 2
            return '';
10782
        }
10783
10784 2
        if ($needle === '') {
10785 2
            return $haystack;
10786
        }
10787
10788 2
        if (self::str_iends_with($haystack, $needle) === true) {
10789 2
            $haystack = (string) \mb_substr($haystack, 0, (int) self::strlen($haystack) - (int) self::strlen($needle));
10790
        }
10791
10792 2
        return $haystack;
10793
    }
10794
10795
    /**
10796
     * Removes an prefix ($needle) from start of the string ($haystack).
10797
     *
10798
     * @param string $haystack <p>The string to search in.</p>
10799
     * @param string $needle   <p>The substring to search for.</p>
10800
     *
10801
     * @return string return the sub-string
10802
     */
10803
    public static function substr_left(string $haystack, string $needle): string
10804
    {
10805 2
        if ($haystack === '') {
10806 2
            return '';
10807
        }
10808
10809 2
        if ($needle === '') {
10810 2
            return $haystack;
10811
        }
10812
10813 2
        if (self::str_starts_with($haystack, $needle) === true) {
10814 2
            $haystack = (string) \mb_substr($haystack, (int) self::strlen($needle));
10815
        }
10816
10817 2
        return $haystack;
10818
    }
10819
10820
    /**
10821
     * Replace text within a portion of a string.
10822
     *
10823
     * source: https://gist.github.com/stemar/8287074
10824
     *
10825
     * @param string|string[] $str         <p>The input string or an array of stings.</p>
10826
     * @param string|string[] $replacement <p>The replacement string or an array of stings.</p>
10827
     * @param int|int[]       $offset      <p>
10828
     *                                     If start is positive, the replacing will begin at the start'th offset
10829
     *                                     into string.
10830
     *                                     <br><br>
10831
     *                                     If start is negative, the replacing will begin at the start'th character
10832
     *                                     from the end of string.
10833
     *                                     </p>
10834
     * @param int|int[]|null  $length      [optional] <p>If given and is positive, it represents the length of the
10835
     *                                     portion of string which is to be replaced. If it is negative, it
10836
     *                                     represents the number of characters from the end of string at which to
10837
     *                                     stop replacing. If it is not given, then it will default to strlen(
10838
     *                                     string ); i.e. end the replacing at the end of string. Of course, if
10839
     *                                     length is zero then this function will have the effect of inserting
10840
     *                                     replacement into string at the given start offset.</p>
10841
     * @param string          $encoding    [optional] <p>Set the charset for e.g. "mb_" function</p>
10842
     *
10843
     * @return string|string[] The result string is returned. If string is an array then array is returned.
10844
     */
10845
    public static function substr_replace(
10846
        $str,
10847
        $replacement,
10848
        $offset,
10849
        $length = null,
10850
        string $encoding = 'UTF-8'
10851
    ) {
10852 10
        if (\is_array($str) === true) {
10853 1
            $num = \count($str);
10854
10855
            // the replacement
10856 1
            if (\is_array($replacement) === true) {
10857 1
                $replacement = \array_slice($replacement, 0, $num);
10858
            } else {
10859 1
                $replacement = \array_pad([$replacement], $num, $replacement);
10860
            }
10861
10862
            // the offset
10863 1
            if (\is_array($offset) === true) {
10864 1
                $offset = \array_slice($offset, 0, $num);
10865 1
                foreach ($offset as &$valueTmp) {
10866 1
                    $valueTmp = (int) $valueTmp === $valueTmp ? $valueTmp : 0;
10867
                }
10868 1
                unset($valueTmp);
10869
            } else {
10870 1
                $offset = \array_pad([$offset], $num, $offset);
10871
            }
10872
10873
            // the length
10874 1
            if ($length === null) {
10875 1
                $length = \array_fill(0, $num, 0);
10876 1
            } elseif (\is_array($length) === true) {
10877 1
                $length = \array_slice($length, 0, $num);
10878 1
                foreach ($length as &$valueTmpV2) {
10879 1
                    $valueTmpV2 = (int) $valueTmpV2 === $valueTmpV2 ? $valueTmpV2 : $num;
10880
                }
10881 1
                unset($valueTmpV2);
10882
            } else {
10883 1
                $length = \array_pad([$length], $num, $length);
10884
            }
10885
10886
            // recursive call
10887 1
            return \array_map([self::class, 'substr_replace'], $str, $replacement, $offset, $length);
10888
        }
10889
10890 10
        if (\is_array($replacement) === true) {
10891 1
            if (\count($replacement) > 0) {
10892 1
                $replacement = $replacement[0];
10893
            } else {
10894 1
                $replacement = '';
10895
            }
10896
        }
10897
10898
        // init
10899 10
        $str = (string) $str;
10900 10
        $replacement = (string) $replacement;
10901
10902 10
        if (\is_array($length) === true) {
10903
            throw new \InvalidArgumentException('Parameter "$length" can only be an array, if "$str" is also an array.');
10904
        }
10905
10906 10
        if (\is_array($offset) === true) {
10907
            throw new \InvalidArgumentException('Parameter "$offset" can only be an array, if "$str" is also an array.');
10908
        }
10909
10910 10
        if ($str === '') {
10911 1
            return $replacement;
10912
        }
10913
10914 9
        if (self::$SUPPORT['mbstring'] === true) {
10915 9
            $string_length = (int) self::strlen($str, $encoding);
10916
10917 9
            if ($offset < 0) {
10918 1
                $offset = (int) \max(0, $string_length + $offset);
10919 9
            } elseif ($offset > $string_length) {
10920 1
                $offset = $string_length;
10921
            }
10922
10923 9
            if ($length !== null && $length < 0) {
10924 1
                $length = (int) \max(0, $string_length - $offset + $length);
10925 9
            } elseif ($length === null || $length > $string_length) {
10926 4
                $length = $string_length;
10927
            }
10928
10929
            /** @noinspection AdditionOperationOnArraysInspection */
10930 9
            if (($offset + $length) > $string_length) {
10931 4
                $length = $string_length - $offset;
10932
            }
10933
10934
            /** @noinspection AdditionOperationOnArraysInspection */
10935 9
            return ((string) \mb_substr($str, 0, $offset, $encoding)) .
10936 9
                   $replacement .
10937 9
                   ((string) \mb_substr($str, $offset + $length, $string_length - $offset - $length, $encoding));
10938
        }
10939
10940
        //
10941
        // fallback for ascii only
10942
        //
10943
10944
        if (self::is_ascii($str)) {
10945
            return ($length === null) ?
10946
                \substr_replace($str, $replacement, $offset) :
10947
                \substr_replace($str, $replacement, $offset, $length);
10948
        }
10949
10950
        //
10951
        // fallback via vanilla php
10952
        //
10953
10954
        \preg_match_all('/./us', $str, $smatches);
10955
        \preg_match_all('/./us', $replacement, $rmatches);
10956
10957
        if ($length === null) {
10958
            $lengthTmp = self::strlen($str, $encoding);
10959
            if ($lengthTmp === false) {
10960
                // e.g.: non mbstring support + invalid chars
10961
                return '';
10962
            }
10963
            $length = (int) $lengthTmp;
10964
        }
10965
10966
        \array_splice($smatches[0], $offset, $length, $rmatches[0]);
10967
10968
        return \implode('', $smatches[0]);
10969
    }
10970
10971
    /**
10972
     * Removes an suffix ($needle) from end of the string ($haystack).
10973
     *
10974
     * @param string $haystack <p>The string to search in.</p>
10975
     * @param string $needle   <p>The substring to search for.</p>
10976
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
10977
     *
10978
     * @return string return the sub-string
10979
     */
10980
    public static function substr_right(
10981
        string $haystack,
10982
        string $needle,
10983
        string $encoding = 'UTF-8'
10984
    ): string {
10985 2
        if ($haystack === '') {
10986 2
            return '';
10987
        }
10988
10989 2
        if ($needle === '') {
10990 2
            return $haystack;
10991
        }
10992
10993
        if (
10994 2
            $encoding === 'UTF-8'
10995
            &&
10996 2
            \substr($haystack, -\strlen($needle)) === $needle
10997
        ) {
10998 2
            return (string) \mb_substr($haystack, 0, (int) \mb_strlen($haystack) - (int) \mb_strlen($needle));
10999
        }
11000
11001 2
        if (\substr($haystack, -\strlen($needle)) === $needle) {
11002
            return (string) self::substr(
11003
                $haystack,
11004
                0,
11005
                (int) self::strlen($haystack, $encoding) - (int) self::strlen($needle, $encoding),
11006
                $encoding
11007
            );
11008
        }
11009
11010 2
        return $haystack;
11011
    }
11012
11013
    /**
11014
     * Returns a case swapped version of the string.
11015
     *
11016
     * @param string $str       <p>The input string.</p>
11017
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
11018
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
11019
     *
11020
     * @return string each character's case swapped
11021
     */
11022
    public static function swapCase(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
11023
    {
11024 6
        if ($str === '') {
11025 1
            return '';
11026
        }
11027
11028 6
        if ($cleanUtf8 === true) {
11029
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11030
            // if invalid characters are found in $haystack before $needle
11031 2
            $str = self::clean($str);
11032
        }
11033
11034 6
        if ($encoding === 'UTF-8') {
11035 4
            return (string) (\mb_strtolower($str) ^ \mb_strtoupper($str) ^ $str);
11036
        }
11037
11038 4
        return (string) (self::strtolower($str, $encoding) ^ self::strtoupper($str, $encoding) ^ $str);
11039
    }
11040
11041
    /**
11042
     * Checks whether symfony-polyfills are used.
11043
     *
11044
     * @return bool
11045
     *              <strong>true</strong> if in use, <strong>false</strong> otherwise
11046
     */
11047
    public static function symfony_polyfill_used(): bool
11048
    {
11049
        // init
11050
        $return = false;
11051
11052
        $returnTmp = \extension_loaded('mbstring');
11053
        if ($returnTmp === false && \function_exists('mb_strlen')) {
11054
            $return = true;
11055
        }
11056
11057
        $returnTmp = \extension_loaded('iconv');
11058
        if ($returnTmp === false && \function_exists('iconv')) {
11059
            $return = true;
11060
        }
11061
11062
        return $return;
11063
    }
11064
11065
    /**
11066
     * @param string $str
11067
     * @param int    $tabLength
11068
     *
11069
     * @return string
11070
     */
11071
    public static function tabs_to_spaces(string $str, int $tabLength = 4): string
11072
    {
11073 6
        if ($tabLength === 4) {
11074 3
            $spaces = '    ';
11075 3
        } elseif ($tabLength === 2) {
11076 1
            $spaces = '  ';
11077
        } else {
11078 2
            $spaces = \str_repeat(' ', $tabLength);
11079
        }
11080
11081 6
        return \str_replace("\t", $spaces, $str);
11082
    }
11083
11084
    /**
11085
     * Converts the first character of each word in the string to uppercase
11086
     * and all other chars to lowercase.
11087
     *
11088
     * @param string      $str                   <p>The input string.</p>
11089
     * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
11090
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
11091
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
11092
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
11093
     *
11094
     * @return string string with all characters of $str being title-cased
11095
     */
11096
    public static function titlecase(
11097
        string $str,
11098
        string $encoding = 'UTF-8',
11099
        bool $cleanUtf8 = false,
11100
        string $lang = null,
11101
        bool $tryToKeepStringLength = false
11102
    ): string {
11103 5
        if ($cleanUtf8 === true) {
11104
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11105
            // if invalid characters are found in $haystack before $needle
11106
            $str = self::clean($str);
11107
        }
11108
11109 5
        if ($lang === null && $tryToKeepStringLength === false) {
11110 5
            if ($encoding === 'UTF-8') {
11111 3
                return \mb_convert_case($str, \MB_CASE_TITLE);
11112
            }
11113
11114 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11115
11116 2
            return \mb_convert_case($str, \MB_CASE_TITLE, $encoding);
11117
        }
11118
11119
        return self::str_titleize($str, null, $encoding, false, $lang, $tryToKeepStringLength, false);
11120
    }
11121
11122
    /**
11123
     * alias for "UTF8::to_ascii()"
11124
     *
11125
     * @param string $str
11126
     * @param string $subst_chr
11127
     * @param bool   $strict
11128
     *
11129
     * @return string
11130
     *
11131
     * @see UTF8::to_ascii()
11132
     * @deprecated <p>use "UTF8::to_ascii()"</p>
11133
     */
11134
    public static function toAscii(string $str, string $subst_chr = '?', bool $strict = false): string
11135
    {
11136 7
        return self::to_ascii($str, $subst_chr, $strict);
11137
    }
11138
11139
    /**
11140
     * alias for "UTF8::to_iso8859()"
11141
     *
11142
     * @param string|string[] $str
11143
     *
11144
     * @return string|string[]
11145
     *
11146
     * @see UTF8::to_iso8859()
11147
     * @deprecated <p>use "UTF8::to_iso8859()"</p>
11148
     */
11149
    public static function toIso8859($str)
11150
    {
11151 2
        return self::to_iso8859($str);
11152
    }
11153
11154
    /**
11155
     * alias for "UTF8::to_latin1()"
11156
     *
11157
     * @param string|string[] $str
11158
     *
11159
     * @return string|string[]
11160
     *
11161
     * @see UTF8::to_latin1()
11162
     * @deprecated <p>use "UTF8::to_latin1()"</p>
11163
     */
11164
    public static function toLatin1($str)
11165
    {
11166 2
        return self::to_latin1($str);
11167
    }
11168
11169
    /**
11170
     * alias for "UTF8::to_utf8()"
11171
     *
11172
     * @param string|string[] $str
11173
     *
11174
     * @return string|string[]
11175
     *
11176
     * @see UTF8::to_utf8()
11177
     * @deprecated <p>use "UTF8::to_utf8()"</p>
11178
     */
11179
    public static function toUTF8($str)
11180
    {
11181 2
        return self::to_utf8($str);
11182
    }
11183
11184
    /**
11185
     * Convert a string into ASCII.
11186
     *
11187
     * @param string $str     <p>The input string.</p>
11188
     * @param string $unknown [optional] <p>Character use if character unknown. (default is ?)</p>
11189
     * @param bool   $strict  [optional] <p>Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad
11190
     *                        performance</p>
11191
     *
11192
     * @return string
11193
     */
11194
    public static function to_ascii(string $str, string $unknown = '?', bool $strict = false): string
11195
    {
11196 38
        static $UTF8_TO_ASCII;
11197
11198 38
        if ($str === '') {
11199 3
            return '';
11200
        }
11201
11202
        // check if we only have ASCII, first (better performance)
11203 35
        if (self::is_ascii($str) === true) {
11204 9
            return $str;
11205
        }
11206
11207 28
        $str = self::clean(
11208 28
            $str,
11209 28
            true,
11210 28
            true,
11211 28
            true,
11212 28
            false,
11213 28
            true,
11214 28
            true
11215
        );
11216
11217
        // check again, if we only have ASCII, now ...
11218 28
        if (self::is_ascii($str) === true) {
11219 10
            return $str;
11220
        }
11221
11222
        if (
11223 19
            $strict === true
11224
            &&
11225 19
            self::$SUPPORT['intl'] === true
11226
        ) {
11227
            // INFO: https://unicode.org/cldr/utility/character.jsp?a=%E2%84%8C
11228
            /** @noinspection PhpComposerExtensionStubsInspection */
11229
            /** @noinspection UnnecessaryCastingInspection */
11230 1
            $str = (string) \transliterator_transliterate('NFKC; [:Nonspacing Mark:] Remove; NFKC; Any-Latin; Latin-ASCII;', $str);
11231
11232
            // check again, if we only have ASCII, now ...
11233 1
            if (self::is_ascii($str) === true) {
11234 1
                return $str;
11235
            }
11236
        }
11237
11238 19
        if (self::$ORD === null) {
11239
            self::$ORD = self::getData('ord');
11240
        }
11241
11242 19
        \preg_match_all('/.|[^\x00]$/us', $str, $ar);
11243 19
        $chars = $ar[0];
11244 19
        $ord = null;
11245
        /** @noinspection ForeachSourceInspection */
11246 19
        foreach ($chars as &$c) {
11247 19
            $ordC0 = self::$ORD[$c[0]];
11248
11249 19
            if ($ordC0 >= 0 && $ordC0 <= 127) {
11250 15
                continue;
11251
            }
11252
11253 19
            $ordC1 = self::$ORD[$c[1]];
11254
11255
            // ASCII - next please
11256 19
            if ($ordC0 >= 192 && $ordC0 <= 223) {
11257 17
                $ord = ($ordC0 - 192) * 64 + ($ordC1 - 128);
11258
            }
11259
11260 19
            if ($ordC0 >= 224) {
11261 8
                $ordC2 = self::$ORD[$c[2]];
11262
11263 8
                if ($ordC0 <= 239) {
11264 7
                    $ord = ($ordC0 - 224) * 4096 + ($ordC1 - 128) * 64 + ($ordC2 - 128);
11265
                }
11266
11267 8
                if ($ordC0 >= 240) {
11268 2
                    $ordC3 = self::$ORD[$c[3]];
11269
11270 2
                    if ($ordC0 <= 247) {
11271 2
                        $ord = ($ordC0 - 240) * 262144 + ($ordC1 - 128) * 4096 + ($ordC2 - 128) * 64 + ($ordC3 - 128);
11272
                    }
11273
11274 2
                    if ($ordC0 >= 248) {
11275
                        $ordC4 = self::$ORD[$c[4]];
11276
11277
                        if ($ordC0 <= 251) {
11278
                            $ord = ($ordC0 - 248) * 16777216 + ($ordC1 - 128) * 262144 + ($ordC2 - 128) * 4096 + ($ordC3 - 128) * 64 + ($ordC4 - 128);
11279
                        }
11280
11281
                        if ($ordC0 >= 252) {
11282
                            $ordC5 = self::$ORD[$c[5]];
11283
11284
                            if ($ordC0 <= 253) {
11285
                                $ord = ($ordC0 - 252) * 1073741824 + ($ordC1 - 128) * 16777216 + ($ordC2 - 128) * 262144 + ($ordC3 - 128) * 4096 + ($ordC4 - 128) * 64 + ($ordC5 - 128);
11286
                            }
11287
                        }
11288
                    }
11289
                }
11290
            }
11291
11292 19
            if ($ordC0 === 254 || $ordC0 === 255) {
11293
                $c = $unknown;
11294
11295
                continue;
11296
            }
11297
11298 19
            if ($ord === null) {
11299
                $c = $unknown;
11300
11301
                continue;
11302
            }
11303
11304 19
            $bank = $ord >> 8;
11305 19
            if (!isset($UTF8_TO_ASCII[$bank])) {
11306 9
                $UTF8_TO_ASCII[$bank] = self::getDataIfExists(\sprintf('x%02x', $bank));
11307 9
                if ($UTF8_TO_ASCII[$bank] === false) {
11308 2
                    $UTF8_TO_ASCII[$bank] = [];
11309
                }
11310
            }
11311
11312 19
            $newchar = $ord & 255;
11313
11314
            /** @noinspection NullCoalescingOperatorCanBeUsedInspection */
11315 19
            if (isset($UTF8_TO_ASCII[$bank][$newchar])) {
11316
11317
                // keep for debugging
11318
                /*
11319
                echo "file: " . sprintf('x%02x', $bank) . "\n";
11320
                echo "char: " . $c . "\n";
11321
                echo "ord: " . $ord . "\n";
11322
                echo "newchar: " . $newchar . "\n";
11323
                echo "ascii: " . $UTF8_TO_ASCII[$bank][$newchar] . "\n";
11324
                echo "bank:" . $bank . "\n\n";
11325
                 */
11326
11327 18
                $c = $UTF8_TO_ASCII[$bank][$newchar];
11328
            } else {
11329
11330
                // keep for debugging missing chars
11331
                /*
11332
                echo "file: " . sprintf('x%02x', $bank) . "\n";
11333
                echo "char: " . $c . "\n";
11334
                echo "ord: " . $ord . "\n";
11335
                echo "newchar: " . $newchar . "\n";
11336
                echo "bank:" . $bank . "\n\n";
11337
                 */
11338
11339 19
                $c = $unknown;
11340
            }
11341
        }
11342
11343 19
        return \implode('', $chars);
11344
    }
11345
11346
    /**
11347
     * @param mixed $str
11348
     *
11349
     * @return bool
11350
     */
11351
    public static function to_boolean($str): bool
11352
    {
11353
        // init
11354 19
        $str = (string) $str;
11355
11356 19
        if ($str === '') {
11357 2
            return false;
11358
        }
11359
11360
        // Info: http://php.net/manual/en/filter.filters.validate.php
11361
        $map = [
11362 17
            'true'  => true,
11363
            '1'     => true,
11364
            'on'    => true,
11365
            'yes'   => true,
11366
            'false' => false,
11367
            '0'     => false,
11368
            'off'   => false,
11369
            'no'    => false,
11370
        ];
11371
11372 17
        if (isset($map[$str])) {
11373 11
            return $map[$str];
11374
        }
11375
11376 6
        $key = \strtolower($str);
11377 6
        if (isset($map[$key])) {
11378 2
            return $map[$key];
11379
        }
11380
11381 4
        if (\is_numeric($str)) {
11382 2
            return ((float) $str + 0) > 0;
11383
        }
11384
11385 2
        return (bool) \trim($str);
11386
    }
11387
11388
    /**
11389
     * Convert given string to safe filename (and keep string case).
11390
     *
11391
     * @param string $string
11392
     * @param bool   $use_transliterate No transliteration, conversion etc. is done by default - unsafe characters are
11393
     *                                  simply replaced with hyphen.
11394
     * @param string $fallback_char
11395
     *
11396
     * @return string
11397
     */
11398
    public static function to_filename(string $string, bool $use_transliterate = false, string $fallback_char = '-'): string
11399
    {
11400 1
        if ($use_transliterate === true) {
11401 1
            $string = self::str_transliterate($string, $fallback_char);
11402
        }
11403
11404 1
        $fallback_char_escaped = \preg_quote($fallback_char, '/');
11405
11406 1
        $string = (string) \preg_replace(
11407
            [
11408 1
                '/[^' . $fallback_char_escaped . '\\.\\-a-zA-Z0-9\\s]/', // 1) remove un-needed chars
11409 1
                '/[\\s]+/u',                                             // 2) convert spaces to $fallback_char
11410 1
                '/[' . $fallback_char_escaped . ']+/u',                  // 3) remove double $fallback_char's
11411
            ],
11412
            [
11413 1
                '',
11414 1
                $fallback_char,
11415 1
                $fallback_char,
11416
            ],
11417 1
            $string
11418
        );
11419
11420
        // trim "$fallback_char" from beginning and end of the string
11421 1
        return \trim($string, $fallback_char);
11422
    }
11423
11424
    /**
11425
     * Convert a string into "ISO-8859"-encoding (Latin-1).
11426
     *
11427
     * @param string|string[] $str
11428
     *
11429
     * @return string|string[]
11430
     */
11431
    public static function to_iso8859($str)
11432
    {
11433 8
        if (\is_array($str) === true) {
11434 2
            foreach ($str as $k => &$v) {
11435 2
                $v = self::to_iso8859($v);
11436
            }
11437
11438 2
            return $str;
11439
        }
11440
11441 8
        $str = (string) $str;
11442 8
        if ($str === '') {
11443 2
            return '';
11444
        }
11445
11446 8
        return self::utf8_decode($str);
11447
    }
11448
11449
    /**
11450
     * alias for "UTF8::to_iso8859()"
11451
     *
11452
     * @param string|string[] $str
11453
     *
11454
     * @return string|string[]
11455
     *
11456
     * @see UTF8::to_iso8859()
11457
     */
11458
    public static function to_latin1($str)
11459
    {
11460 2
        return self::to_iso8859($str);
11461
    }
11462
11463
    /**
11464
     * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
11465
     *
11466
     * <ul>
11467
     * <li>It decode UTF-8 codepoints and unicode escape sequences.</li>
11468
     * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li>
11469
     * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
11470
     * case.</li>
11471
     * </ul>
11472
     *
11473
     * @param string|string[] $str                    <p>Any string or array.</p>
11474
     * @param bool            $decodeHtmlEntityToUtf8 <p>Set to true, if you need to decode html-entities.</p>
11475
     *
11476
     * @return string|string[] the UTF-8 encoded string
11477
     */
11478
    public static function to_utf8($str, bool $decodeHtmlEntityToUtf8 = false)
11479
    {
11480 41
        if (\is_array($str) === true) {
11481 4
            foreach ($str as $k => &$v) {
11482 4
                $v = self::to_utf8($v, $decodeHtmlEntityToUtf8);
11483
            }
11484
11485 4
            return $str;
11486
        }
11487
11488 41
        $str = (string) $str;
11489 41
        if ($str === '') {
11490 6
            return $str;
11491
        }
11492
11493 41
        $max = \strlen($str);
11494 41
        $buf = '';
11495
11496 41
        for ($i = 0; $i < $max; ++$i) {
11497 41
            $c1 = $str[$i];
11498
11499 41
            if ($c1 >= "\xC0") { // should be converted to UTF8, if it's not UTF8 already
11500
11501 37
                if ($c1 <= "\xDF") { // looks like 2 bytes UTF8
11502
11503 34
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
11504
11505 34
                    if ($c2 >= "\x80" && $c2 <= "\xBF") { // yeah, almost sure it's UTF8 already
11506 20
                        $buf .= $c1 . $c2;
11507 20
                        ++$i;
11508
                    } else { // not valid UTF8 - convert it
11509 34
                        $buf .= self::to_utf8_convert_helper($c1);
11510
                    }
11511 34
                } elseif ($c1 >= "\xE0" && $c1 <= "\xEF") { // looks like 3 bytes UTF8
11512
11513 33
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
11514 33
                    $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
11515
11516 33
                    if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF") { // yeah, almost sure it's UTF8 already
11517 15
                        $buf .= $c1 . $c2 . $c3;
11518 15
                        $i += 2;
11519
                    } else { // not valid UTF8 - convert it
11520 33
                        $buf .= self::to_utf8_convert_helper($c1);
11521
                    }
11522 26
                } elseif ($c1 >= "\xF0" && $c1 <= "\xF7") { // looks like 4 bytes UTF8
11523
11524 26
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
11525 26
                    $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
11526 26
                    $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
11527
11528 26
                    if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF" && $c4 >= "\x80" && $c4 <= "\xBF") { // yeah, almost sure it's UTF8 already
11529 8
                        $buf .= $c1 . $c2 . $c3 . $c4;
11530 8
                        $i += 3;
11531
                    } else { // not valid UTF8 - convert it
11532 26
                        $buf .= self::to_utf8_convert_helper($c1);
11533
                    }
11534
                } else { // doesn't look like UTF8, but should be converted
11535
11536 37
                    $buf .= self::to_utf8_convert_helper($c1);
11537
                }
11538 38
            } elseif (($c1 & "\xC0") === "\x80") { // needs conversion
11539
11540 4
                $buf .= self::to_utf8_convert_helper($c1);
11541
            } else { // it doesn't need conversion
11542
11543 38
                $buf .= $c1;
11544
            }
11545
        }
11546
11547
        // decode unicode escape sequences + unicode surrogate pairs
11548 41
        $buf = \preg_replace_callback(
11549 41
            '/\\\\u([dD][89abAB][0-9a-fA-F]{2})\\\\u([dD][cdefCDEF][\da-fA-F]{2})|\\\\u([0-9a-fA-F]{4})/',
11550
            /**
11551
             * @param array $matches
11552
             *
11553
             * @return string
11554
             */
11555
            static function (array $matches): string {
11556 12
                if (isset($matches[3])) {
11557 12
                    $cp = (int) \hexdec($matches[3]);
11558
                } else {
11559
                    // http://unicode.org/faq/utf_bom.html#utf16-4
11560
                    $cp = ((int) \hexdec($matches[1]) << 10)
11561
                          + (int) \hexdec($matches[2])
11562
                          + 0x10000
11563
                          - (0xD800 << 10)
11564
                          - 0xDC00;
11565
                }
11566
11567
                // https://github.com/php/php-src/blob/php-7.3.2/ext/standard/html.c#L471
11568
                //
11569
                // php_utf32_utf8(unsigned char *buf, unsigned k)
11570
11571 12
                if ($cp < 0x80) {
11572 8
                    return (string) self::chr($cp);
11573
                }
11574
11575 9
                if ($cp < 0xA0) {
11576
                    /** @noinspection UnnecessaryCastingInspection */
11577
                    return (string) self::chr(0xC0 | $cp >> 6) . (string) self::chr(0x80 | $cp & 0x3F);
11578
                }
11579
11580 9
                return self::decimal_to_chr($cp);
11581 41
            },
11582 41
            $buf
11583
        );
11584
11585 41
        if ($buf === null) {
11586
            return '';
11587
        }
11588
11589
        // decode UTF-8 codepoints
11590 41
        if ($decodeHtmlEntityToUtf8 === true) {
11591 2
            $buf = self::html_entity_decode($buf);
11592
        }
11593
11594 41
        return $buf;
11595
    }
11596
11597
    /**
11598
     * Strip whitespace or other characters from beginning or end of a UTF-8 string.
11599
     *
11600
     * INFO: This is slower then "trim()"
11601
     *
11602
     * We can only use the original-function, if we use <= 7-Bit in the string / chars
11603
     * but the check for ACSII (7-Bit) cost more time, then we can safe here.
11604
     *
11605
     * @param string      $str   <p>The string to be trimmed</p>
11606
     * @param string|null $chars [optional] <p>Optional characters to be stripped</p>
11607
     *
11608
     * @return string the trimmed string
11609
     */
11610
    public static function trim(string $str = '', string $chars = null): string
11611
    {
11612 55
        if ($str === '') {
11613 9
            return '';
11614
        }
11615
11616 48
        if ($chars) {
11617 27
            $chars = \preg_quote($chars, '/');
11618 27
            $pattern = "^[${chars}]+|[${chars}]+\$";
11619
        } else {
11620 21
            $pattern = '^[\\s]+|[\\s]+$';
11621
        }
11622
11623 48
        if (self::$SUPPORT['mbstring'] === true) {
11624
            /** @noinspection PhpComposerExtensionStubsInspection */
11625 48
            return (string) \mb_ereg_replace($pattern, '', $str);
11626
        }
11627
11628 8
        return self::regex_replace($str, $pattern, '', '', '/');
11629
    }
11630
11631
    /**
11632
     * Makes string's first char uppercase.
11633
     *
11634
     * @param string      $str                   <p>The input string.</p>
11635
     * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
11636
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
11637
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
11638
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
11639
     *
11640
     * @return string the resulting string
11641
     */
11642
    public static function ucfirst(
11643
        string $str,
11644
        string $encoding = 'UTF-8',
11645
        bool $cleanUtf8 = false,
11646
        string $lang = null,
11647
        bool $tryToKeepStringLength = false
11648
    ): string {
11649 69
        if ($str === '') {
11650 3
            return '';
11651
        }
11652
11653 68
        if ($cleanUtf8 === true) {
11654
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11655
            // if invalid characters are found in $haystack before $needle
11656 1
            $str = self::clean($str);
11657
        }
11658
11659 68
        $useMbFunction = $lang === null && $tryToKeepStringLength === false;
11660
11661 68
        if ($encoding === 'UTF-8') {
11662 22
            $strPartTwo = (string) \mb_substr($str, 1);
11663
11664 22
            if ($useMbFunction === true) {
11665 22
                $strPartOne = \mb_strtoupper(
11666 22
                    (string) \mb_substr($str, 0, 1)
11667
                );
11668
            } else {
11669
                $strPartOne = self::strtoupper(
11670
                    (string) \mb_substr($str, 0, 1),
11671
                    $encoding,
11672
                    false,
11673
                    $lang,
11674 22
                    $tryToKeepStringLength
11675
                );
11676
            }
11677
        } else {
11678 47
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11679
11680 47
            $strPartTwo = (string) self::substr($str, 1, null, $encoding);
11681
11682 47
            if ($useMbFunction === true) {
11683 47
                $strPartOne = \mb_strtoupper(
11684 47
                    (string) \mb_substr($str, 0, 1, $encoding),
11685 47
                    $encoding
11686
                );
11687
            } else {
11688
                $strPartOne = self::strtoupper(
11689
                    (string) self::substr($str, 0, 1, $encoding),
11690
                    $encoding,
11691
                    false,
11692
                    $lang,
11693
                    $tryToKeepStringLength
11694
                );
11695
            }
11696
        }
11697
11698 68
        return $strPartOne . $strPartTwo;
11699
    }
11700
11701
    /**
11702
     * alias for "UTF8::ucfirst()"
11703
     *
11704
     * @param string $str
11705
     * @param string $encoding
11706
     * @param bool   $cleanUtf8
11707
     *
11708
     * @return string
11709
     *
11710
     * @see UTF8::ucfirst()
11711
     */
11712
    public static function ucword(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
11713
    {
11714 1
        return self::ucfirst($str, $encoding, $cleanUtf8);
11715
    }
11716
11717
    /**
11718
     * Uppercase for all words in the string.
11719
     *
11720
     * @param string   $str        <p>The input string.</p>
11721
     * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
11722
     * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new
11723
     *                             word.</p>
11724
     * @param string   $encoding   [optional] <p>Set the charset.</p>
11725
     * @param bool     $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
11726
     *
11727
     * @return string
11728
     */
11729
    public static function ucwords(
11730
        string $str,
11731
        array $exceptions = [],
11732
        string $charlist = '',
11733
        string $encoding = 'UTF-8',
11734
        bool $cleanUtf8 = false
11735
    ): string {
11736 8
        if (!$str) {
11737 2
            return '';
11738
        }
11739
11740
        // INFO: mb_convert_case($str, MB_CASE_TITLE);
11741
        // -> MB_CASE_TITLE didn't only uppercase the first letter, it also lowercase all other letters
11742
11743 7
        if ($cleanUtf8 === true) {
11744
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11745
            // if invalid characters are found in $haystack before $needle
11746 1
            $str = self::clean($str);
11747
        }
11748
11749 7
        $usePhpDefaultFunctions = !(bool) ($charlist . \implode('', $exceptions));
11750
11751
        if (
11752 7
            $usePhpDefaultFunctions === true
11753
            &&
11754 7
            self::is_ascii($str) === true
11755
        ) {
11756
            return \ucwords($str);
11757
        }
11758
11759 7
        $words = self::str_to_words($str, $charlist);
11760 7
        $useExceptions = \count($exceptions) > 0;
11761
11762 7
        foreach ($words as &$word) {
11763 7
            if (!$word) {
11764 7
                continue;
11765
            }
11766
11767
            if (
11768 7
                $useExceptions === false
11769
                ||
11770 7
                !\in_array($word, $exceptions, true)
11771
            ) {
11772 7
                $word = self::ucfirst($word, $encoding);
11773
            }
11774
        }
11775
11776 7
        return \implode('', $words);
11777
    }
11778
11779
    /**
11780
     * Multi decode html entity & fix urlencoded-win1252-chars.
11781
     *
11782
     * e.g:
11783
     * 'test+test'                     => 'test test'
11784
     * 'D&#252;sseldorf'               => 'Düsseldorf'
11785
     * 'D%FCsseldorf'                  => 'Düsseldorf'
11786
     * 'D&#xFC;sseldorf'               => 'Düsseldorf'
11787
     * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
11788
     * 'Düsseldorf'                   => 'Düsseldorf'
11789
     * 'D%C3%BCsseldorf'               => 'Düsseldorf'
11790
     * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
11791
     * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
11792
     *
11793
     * @param string $str          <p>The input string.</p>
11794
     * @param bool   $multi_decode <p>Decode as often as possible.</p>
11795
     *
11796
     * @return string
11797
     */
11798
    public static function urldecode(string $str, bool $multi_decode = true): string
11799
    {
11800 4
        if ($str === '') {
11801 3
            return '';
11802
        }
11803
11804
        if (
11805 4
            \strpos($str, '&') === false
11806
            &&
11807 4
            \strpos($str, '%') === false
11808
            &&
11809 4
            \strpos($str, '+') === false
11810
            &&
11811 4
            \strpos($str, '\u') === false
11812
        ) {
11813 3
            return self::fix_simple_utf8($str);
11814
        }
11815
11816 4
        $str = self::urldecode_unicode_helper($str);
11817
11818
        do {
11819 4
            $str_compare = $str;
11820
11821
            /**
11822
             * @psalm-suppress PossiblyInvalidArgument
11823
             */
11824 4
            $str = self::fix_simple_utf8(
11825 4
                \urldecode(
11826 4
                    self::html_entity_decode(
11827 4
                        self::to_utf8($str),
11828 4
                        \ENT_QUOTES | \ENT_HTML5
11829
                    )
11830
                )
11831
            );
11832 4
        } while ($multi_decode === true && $str_compare !== $str);
11833
11834 4
        return $str;
11835
    }
11836
11837
    /**
11838
     * Return a array with "urlencoded"-win1252 -> UTF-8
11839
     *
11840
     * @return string[]
11841
     *
11842
     * @deprecated <p>use the "UTF8::urldecode()" function to decode a string</p>
11843
     */
11844
    public static function urldecode_fix_win1252_chars(): array
11845
    {
11846
        return [
11847 2
            '%20' => ' ',
11848
            '%21' => '!',
11849
            '%22' => '"',
11850
            '%23' => '#',
11851
            '%24' => '$',
11852
            '%25' => '%',
11853
            '%26' => '&',
11854
            '%27' => "'",
11855
            '%28' => '(',
11856
            '%29' => ')',
11857
            '%2A' => '*',
11858
            '%2B' => '+',
11859
            '%2C' => ',',
11860
            '%2D' => '-',
11861
            '%2E' => '.',
11862
            '%2F' => '/',
11863
            '%30' => '0',
11864
            '%31' => '1',
11865
            '%32' => '2',
11866
            '%33' => '3',
11867
            '%34' => '4',
11868
            '%35' => '5',
11869
            '%36' => '6',
11870
            '%37' => '7',
11871
            '%38' => '8',
11872
            '%39' => '9',
11873
            '%3A' => ':',
11874
            '%3B' => ';',
11875
            '%3C' => '<',
11876
            '%3D' => '=',
11877
            '%3E' => '>',
11878
            '%3F' => '?',
11879
            '%40' => '@',
11880
            '%41' => 'A',
11881
            '%42' => 'B',
11882
            '%43' => 'C',
11883
            '%44' => 'D',
11884
            '%45' => 'E',
11885
            '%46' => 'F',
11886
            '%47' => 'G',
11887
            '%48' => 'H',
11888
            '%49' => 'I',
11889
            '%4A' => 'J',
11890
            '%4B' => 'K',
11891
            '%4C' => 'L',
11892
            '%4D' => 'M',
11893
            '%4E' => 'N',
11894
            '%4F' => 'O',
11895
            '%50' => 'P',
11896
            '%51' => 'Q',
11897
            '%52' => 'R',
11898
            '%53' => 'S',
11899
            '%54' => 'T',
11900
            '%55' => 'U',
11901
            '%56' => 'V',
11902
            '%57' => 'W',
11903
            '%58' => 'X',
11904
            '%59' => 'Y',
11905
            '%5A' => 'Z',
11906
            '%5B' => '[',
11907
            '%5C' => '\\',
11908
            '%5D' => ']',
11909
            '%5E' => '^',
11910
            '%5F' => '_',
11911
            '%60' => '`',
11912
            '%61' => 'a',
11913
            '%62' => 'b',
11914
            '%63' => 'c',
11915
            '%64' => 'd',
11916
            '%65' => 'e',
11917
            '%66' => 'f',
11918
            '%67' => 'g',
11919
            '%68' => 'h',
11920
            '%69' => 'i',
11921
            '%6A' => 'j',
11922
            '%6B' => 'k',
11923
            '%6C' => 'l',
11924
            '%6D' => 'm',
11925
            '%6E' => 'n',
11926
            '%6F' => 'o',
11927
            '%70' => 'p',
11928
            '%71' => 'q',
11929
            '%72' => 'r',
11930
            '%73' => 's',
11931
            '%74' => 't',
11932
            '%75' => 'u',
11933
            '%76' => 'v',
11934
            '%77' => 'w',
11935
            '%78' => 'x',
11936
            '%79' => 'y',
11937
            '%7A' => 'z',
11938
            '%7B' => '{',
11939
            '%7C' => '|',
11940
            '%7D' => '}',
11941
            '%7E' => '~',
11942
            '%7F' => '',
11943
            '%80' => '`',
11944
            '%81' => '',
11945
            '%82' => '‚',
11946
            '%83' => 'ƒ',
11947
            '%84' => '„',
11948
            '%85' => '…',
11949
            '%86' => '†',
11950
            '%87' => '‡',
11951
            '%88' => 'ˆ',
11952
            '%89' => '‰',
11953
            '%8A' => 'Š',
11954
            '%8B' => '‹',
11955
            '%8C' => 'Œ',
11956
            '%8D' => '',
11957
            '%8E' => 'Ž',
11958
            '%8F' => '',
11959
            '%90' => '',
11960
            '%91' => '‘',
11961
            '%92' => '’',
11962
            '%93' => '“',
11963
            '%94' => '”',
11964
            '%95' => '•',
11965
            '%96' => '–',
11966
            '%97' => '—',
11967
            '%98' => '˜',
11968
            '%99' => '™',
11969
            '%9A' => 'š',
11970
            '%9B' => '›',
11971
            '%9C' => 'œ',
11972
            '%9D' => '',
11973
            '%9E' => 'ž',
11974
            '%9F' => 'Ÿ',
11975
            '%A0' => '',
11976
            '%A1' => '¡',
11977
            '%A2' => '¢',
11978
            '%A3' => '£',
11979
            '%A4' => '¤',
11980
            '%A5' => '¥',
11981
            '%A6' => '¦',
11982
            '%A7' => '§',
11983
            '%A8' => '¨',
11984
            '%A9' => '©',
11985
            '%AA' => 'ª',
11986
            '%AB' => '«',
11987
            '%AC' => '¬',
11988
            '%AD' => '',
11989
            '%AE' => '®',
11990
            '%AF' => '¯',
11991
            '%B0' => '°',
11992
            '%B1' => '±',
11993
            '%B2' => '²',
11994
            '%B3' => '³',
11995
            '%B4' => '´',
11996
            '%B5' => 'µ',
11997
            '%B6' => '¶',
11998
            '%B7' => '·',
11999
            '%B8' => '¸',
12000
            '%B9' => '¹',
12001
            '%BA' => 'º',
12002
            '%BB' => '»',
12003
            '%BC' => '¼',
12004
            '%BD' => '½',
12005
            '%BE' => '¾',
12006
            '%BF' => '¿',
12007
            '%C0' => 'À',
12008
            '%C1' => 'Á',
12009
            '%C2' => 'Â',
12010
            '%C3' => 'Ã',
12011
            '%C4' => 'Ä',
12012
            '%C5' => 'Å',
12013
            '%C6' => 'Æ',
12014
            '%C7' => 'Ç',
12015
            '%C8' => 'È',
12016
            '%C9' => 'É',
12017
            '%CA' => 'Ê',
12018
            '%CB' => 'Ë',
12019
            '%CC' => 'Ì',
12020
            '%CD' => 'Í',
12021
            '%CE' => 'Î',
12022
            '%CF' => 'Ï',
12023
            '%D0' => 'Ð',
12024
            '%D1' => 'Ñ',
12025
            '%D2' => 'Ò',
12026
            '%D3' => 'Ó',
12027
            '%D4' => 'Ô',
12028
            '%D5' => 'Õ',
12029
            '%D6' => 'Ö',
12030
            '%D7' => '×',
12031
            '%D8' => 'Ø',
12032
            '%D9' => 'Ù',
12033
            '%DA' => 'Ú',
12034
            '%DB' => 'Û',
12035
            '%DC' => 'Ü',
12036
            '%DD' => 'Ý',
12037
            '%DE' => 'Þ',
12038
            '%DF' => 'ß',
12039
            '%E0' => 'à',
12040
            '%E1' => 'á',
12041
            '%E2' => 'â',
12042
            '%E3' => 'ã',
12043
            '%E4' => 'ä',
12044
            '%E5' => 'å',
12045
            '%E6' => 'æ',
12046
            '%E7' => 'ç',
12047
            '%E8' => 'è',
12048
            '%E9' => 'é',
12049
            '%EA' => 'ê',
12050
            '%EB' => 'ë',
12051
            '%EC' => 'ì',
12052
            '%ED' => 'í',
12053
            '%EE' => 'î',
12054
            '%EF' => 'ï',
12055
            '%F0' => 'ð',
12056
            '%F1' => 'ñ',
12057
            '%F2' => 'ò',
12058
            '%F3' => 'ó',
12059
            '%F4' => 'ô',
12060
            '%F5' => 'õ',
12061
            '%F6' => 'ö',
12062
            '%F7' => '÷',
12063
            '%F8' => 'ø',
12064
            '%F9' => 'ù',
12065
            '%FA' => 'ú',
12066
            '%FB' => 'û',
12067
            '%FC' => 'ü',
12068
            '%FD' => 'ý',
12069
            '%FE' => 'þ',
12070
            '%FF' => 'ÿ',
12071
        ];
12072
    }
12073
12074
    /**
12075
     * Decodes an UTF-8 string to ISO-8859-1.
12076
     *
12077
     * @param string $str           <p>The input string.</p>
12078
     * @param bool   $keepUtf8Chars
12079
     *
12080
     * @return string
12081
     */
12082
    public static function utf8_decode(string $str, bool $keepUtf8Chars = false): string
12083
    {
12084 14
        if ($str === '') {
12085 6
            return '';
12086
        }
12087
12088
        // save for later comparision
12089 14
        $str_backup = $str;
12090 14
        $len = \strlen($str);
12091
12092 14
        if (self::$ORD === null) {
12093
            self::$ORD = self::getData('ord');
12094
        }
12095
12096 14
        if (self::$CHR === null) {
12097
            self::$CHR = self::getData('chr');
12098
        }
12099
12100 14
        $noCharFound = '?';
12101
        /** @noinspection ForeachInvariantsInspection */
12102 14
        for ($i = 0, $j = 0; $i < $len; ++$i, ++$j) {
12103 14
            switch ($str[$i] & "\xF0") {
12104 14
                case "\xC0":
12105 13
                case "\xD0":
12106 13
                    $c = (self::$ORD[$str[$i] & "\x1F"] << 6) | self::$ORD[$str[++$i] & "\x3F"];
12107 13
                    $str[$j] = $c < 256 ? self::$CHR[$c] : $noCharFound;
12108
12109 13
                    break;
12110
12111
                /** @noinspection PhpMissingBreakStatementInspection */
12112 13
                case "\xF0":
12113
                    ++$i;
12114
12115
                // no break
12116
12117 13
                case "\xE0":
12118 11
                    $str[$j] = $noCharFound;
12119 11
                    $i += 2;
12120
12121 11
                    break;
12122
12123
                default:
12124 12
                    $str[$j] = $str[$i];
12125
            }
12126
        }
12127
12128 14
        $return = \substr($str, 0, $j);
12129 14
        if ($return === false) {
12130
            $return = '';
12131
        }
12132
12133
        if (
12134 14
            $keepUtf8Chars === true
12135
            &&
12136 14
            self::strlen($return) >= (int) self::strlen($str_backup)
12137
        ) {
12138 2
            return $str_backup;
12139
        }
12140
12141 14
        return $return;
12142
    }
12143
12144
    /**
12145
     * Encodes an ISO-8859-1 string to UTF-8.
12146
     *
12147
     * @param string $str <p>The input string.</p>
12148
     *
12149
     * @return string
12150
     */
12151
    public static function utf8_encode(string $str): string
12152
    {
12153 14
        if ($str === '') {
12154 14
            return '';
12155
        }
12156
12157 14
        $str = \utf8_encode($str);
12158
12159
        // the polyfill maybe return false
12160
        /** @noinspection CallableParameterUseCaseInTypeContextInspection */
12161
        /** @psalm-suppress TypeDoesNotContainType */
12162 14
        if ($str === false) {
12163
            return '';
12164
        }
12165
12166 14
        return $str;
12167
    }
12168
12169
    /**
12170
     * fix -> utf8-win1252 chars
12171
     *
12172
     * @param string $str <p>The input string.</p>
12173
     *
12174
     * @return string
12175
     *
12176
     * @deprecated <p>use "UTF8::fix_simple_utf8()"</p>
12177
     */
12178
    public static function utf8_fix_win1252_chars(string $str): string
12179
    {
12180 2
        return self::fix_simple_utf8($str);
12181
    }
12182
12183
    /**
12184
     * Returns an array with all utf8 whitespace characters.
12185
     *
12186
     * @see http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
12187
     *
12188
     * @return string[]
12189
     *                  An array with all known whitespace characters as values and the type of whitespace as keys
12190
     *                  as defined in above URL
12191
     */
12192
    public static function whitespace_table(): array
12193
    {
12194 2
        return self::$WHITESPACE_TABLE;
12195
    }
12196
12197
    /**
12198
     * Limit the number of words in a string.
12199
     *
12200
     * @param string $str      <p>The input string.</p>
12201
     * @param int    $limit    <p>The limit of words as integer.</p>
12202
     * @param string $strAddOn <p>Replacement for the striped string.</p>
12203
     *
12204
     * @return string
12205
     */
12206
    public static function words_limit(string $str, int $limit = 100, string $strAddOn = '…'): string
12207
    {
12208 2
        if ($str === '' || $limit < 1) {
12209 2
            return '';
12210
        }
12211
12212 2
        \preg_match('/^\\s*+(?:[^\\s]++\\s*+){1,' . $limit . '}/u', $str, $matches);
12213
12214
        if (
12215 2
            !isset($matches[0])
12216
            ||
12217 2
            \mb_strlen($str) === (int) \mb_strlen($matches[0])
12218
        ) {
12219 2
            return $str;
12220
        }
12221
12222 2
        return \rtrim($matches[0]) . $strAddOn;
12223
    }
12224
12225
    /**
12226
     * Wraps a string to a given number of characters
12227
     *
12228
     * @see http://php.net/manual/en/function.wordwrap.php
12229
     *
12230
     * @param string $str   <p>The input string.</p>
12231
     * @param int    $width [optional] <p>The column width.</p>
12232
     * @param string $break [optional] <p>The line is broken using the optional break parameter.</p>
12233
     * @param bool   $cut   [optional] <p>
12234
     *                      If the cut is set to true, the string is
12235
     *                      always wrapped at or before the specified width. So if you have
12236
     *                      a word that is larger than the given width, it is broken apart.
12237
     *                      </p>
12238
     *
12239
     * @return string
12240
     *                <p>The given string wrapped at the specified column.</p>
12241
     */
12242
    public static function wordwrap(
12243
        string $str,
12244
        int $width = 75,
12245
        string $break = "\n",
12246
        bool $cut = false
12247
    ): string {
12248 12
        if ($str === '' || $break === '') {
12249 4
            return '';
12250
        }
12251
12252 10
        $strSplit = \explode($break, $str);
12253 10
        if ($strSplit === false) {
12254
            return '';
12255
        }
12256
12257 10
        $chars = [];
12258 10
        $wordSplit = '';
12259 10
        foreach ($strSplit as $i => $iValue) {
12260 10
            if ($i) {
12261 3
                $chars[] = $break;
12262 3
                $wordSplit .= '#';
12263
            }
12264
12265 10
            foreach (self::str_split($iValue) as $c) {
12266 10
                $chars[] = $c;
12267 10
                if ($c === ' ') {
12268 3
                    $wordSplit .= ' ';
12269
                } else {
12270 10
                    $wordSplit .= '?';
12271
                }
12272
            }
12273
        }
12274
12275 10
        $strReturn = '';
12276 10
        $j = 0;
12277 10
        $b = -1;
12278 10
        $i = -1;
12279 10
        $wordSplit = \wordwrap($wordSplit, $width, '#', $cut);
12280
12281 10
        $max = \mb_strlen($wordSplit);
12282 10
        while (($b = \mb_strpos($wordSplit, '#', $b + 1)) !== false) {
12283 8
            for (++$i; $i < $b; ++$i) {
12284 8
                $strReturn .= $chars[$j];
12285 8
                unset($chars[$j++]);
12286
12287
                // prevent endless loop, e.g. if there is a error in the "mb_*" polyfill
12288 8
                if ($i > $max) {
12289
                    break 2;
12290
                }
12291
            }
12292
12293
            if (
12294 8
                $break === $chars[$j]
12295
                ||
12296 8
                $chars[$j] === ' '
12297
            ) {
12298 5
                unset($chars[$j++]);
12299
            }
12300
12301 8
            $strReturn .= $break;
12302
12303
            // prevent endless loop, e.g. if there is a error in the "mb_*" polyfill
12304 8
            if ($b > $max) {
12305
                break;
12306
            }
12307
        }
12308
12309 10
        return $strReturn . \implode('', $chars);
12310
    }
12311
12312
    /**
12313
     * Line-Wrap the string after $limit, but split the string by "$delimiter" before ...
12314
     *    ... so that we wrap the per line.
12315
     *
12316
     * @param string      $str           <p>The input string.</p>
12317
     * @param int         $width         [optional] <p>The column width.</p>
12318
     * @param string      $break         [optional] <p>The line is broken using the optional break parameter.</p>
12319
     * @param bool        $cut           [optional] <p>
12320
     *                                   If the cut is set to true, the string is
12321
     *                                   always wrapped at or before the specified width. So if you have
12322
     *                                   a word that is larger than the given width, it is broken apart.
12323
     *                                   </p>
12324
     * @param bool        $addFinalBreak [optional] <p>
12325
     *                                   If this flag is true, then the method will add a $break at the end
12326
     *                                   of the result string.
12327
     *                                   </p>
12328
     * @param string|null $delimiter     [optional] <p>
12329
     *                                   You can change the default behavior, where we split the string by newline.
12330
     *                                   </p>
12331
     *
12332
     * @return string
12333
     */
12334
    public static function wordwrap_per_line(
12335
        string $str,
12336
        int $width = 75,
12337
        string $break = "\n",
12338
        bool $cut = false,
12339
        bool $addFinalBreak = true,
12340
        string $delimiter = null
12341
    ): string {
12342 1
        if ($delimiter === null) {
12343 1
            $strings = \preg_split('/\\r\\n|\\r|\\n/', $str);
12344
        } else {
12345 1
            $strings = \explode($delimiter, $str);
12346
        }
12347
12348 1
        $stringArray = [];
12349 1
        if ($strings !== false) {
12350 1
            foreach ($strings as $value) {
12351 1
                $stringArray[] = self::wordwrap($value, $width, $break, $cut);
12352
            }
12353
        }
12354
12355 1
        if ($addFinalBreak) {
12356 1
            $finalBreak = $break;
12357
        } else {
12358 1
            $finalBreak = '';
12359
        }
12360
12361 1
        return \implode($delimiter ?? "\n", $stringArray) . $finalBreak;
12362
    }
12363
12364
    /**
12365
     * Returns an array of Unicode White Space characters.
12366
     *
12367
     * @return string[] an array with numeric code point as key and White Space Character as value
12368
     */
12369
    public static function ws(): array
12370
    {
12371 2
        return self::$WHITESPACE;
12372
    }
12373
12374
    /**
12375
     * @param string $str
12376
     * @param bool   $useLower     <p>Use uppercase by default, otherwise use lowecase.</p>
12377
     * @param bool   $fullCaseFold <p>Convert not only common cases.</p>
12378
     *
12379
     * @return string
12380
     */
12381
    private static function fixStrCaseHelper(string $str, $useLower = false, $fullCaseFold = false): string
12382
    {
12383 33
        $upper = self::$COMMON_CASE_FOLD['upper'];
12384 33
        $lower = self::$COMMON_CASE_FOLD['lower'];
12385
12386 33
        if ($useLower === true) {
12387 2
            $str = \str_replace(
12388 2
                $upper,
12389 2
                $lower,
12390 2
                $str
12391
            );
12392
        } else {
12393 31
            $str = \str_replace(
12394 31
                $lower,
12395 31
                $upper,
12396 31
                $str
12397
            );
12398
        }
12399
12400 33
        if ($fullCaseFold) {
12401 31
            static $FULL_CASE_FOLD = null;
12402 31
            if ($FULL_CASE_FOLD === null) {
12403 1
                $FULL_CASE_FOLD = self::getData('caseFolding_full');
12404
            }
12405
12406 31
            if ($useLower === true) {
12407 2
                $str = \str_replace($FULL_CASE_FOLD[0], $FULL_CASE_FOLD[1], $str);
12408
            } else {
12409 29
                $str = \str_replace($FULL_CASE_FOLD[1], $FULL_CASE_FOLD[0], $str);
12410
            }
12411
        }
12412
12413 33
        return $str;
12414
    }
12415
12416
    /**
12417
     * get data from "/data/*.php"
12418
     *
12419
     * @param string $file
12420
     *
12421
     * @return array
12422
     */
12423
    private static function getData(string $file): array
12424
    {
12425
        /** @noinspection PhpIncludeInspection */
12426
        /** @noinspection UsingInclusionReturnValueInspection */
12427
        /** @psalm-suppress UnresolvableInclude */
12428 6
        return include __DIR__ . '/data/' . $file . '.php';
12429
    }
12430
12431
    /**
12432
     * get data from "/data/*.php"
12433
     *
12434
     * @param string $file
12435
     *
12436
     * @return false|mixed will return false on error
12437
     */
12438
    private static function getDataIfExists(string $file)
12439
    {
12440 9
        $file = __DIR__ . '/data/' . $file . '.php';
12441 9
        if (\file_exists($file)) {
12442
            /** @noinspection PhpIncludeInspection */
12443
            /** @noinspection UsingInclusionReturnValueInspection */
12444 8
            return include $file;
12445
        }
12446
12447 2
        return false;
12448
    }
12449
12450
    /**
12451
     * @return true|null
12452
     */
12453
    private static function initEmojiData()
12454
    {
12455 12
        if (self::$EMOJI_KEYS_CACHE === null) {
12456 1
            if (self::$EMOJI === null) {
12457 1
                self::$EMOJI = self::getData('emoji');
12458
            }
12459
12460 1
            \uksort(
12461 1
                self::$EMOJI,
12462
                static function (string $a, string $b): int {
12463 1
                    return \strlen($b) <=> \strlen($a);
12464 1
                }
12465
            );
12466
12467 1
            self::$EMOJI_KEYS_CACHE = \array_keys(self::$EMOJI);
12468 1
            self::$EMOJI_VALUES_CACHE = \array_values(self::$EMOJI);
12469
12470 1
            foreach (self::$EMOJI_KEYS_CACHE as $key) {
12471 1
                $tmpKey = \crc32($key);
12472 1
                self::$EMOJI_KEYS_REVERSIBLE_CACHE[] = '_-_PORTABLE_UTF8_-_' . $tmpKey . '_-_' . \strrev((string) $tmpKey) . '_-_8FTU_ELBATROP_-_';
12473
            }
12474
12475 1
            return true;
12476
        }
12477
12478 12
        return null;
12479
    }
12480
12481
    /**
12482
     * Checks whether mbstring "overloaded" is active on the server.
12483
     *
12484
     * @return bool
12485
     */
12486
    private static function mbstring_overloaded(): bool
12487
    {
12488
        /**
12489
         * INI directive 'mbstring.func_overload' is deprecated since PHP 7.2
12490
         */
12491
12492
        /** @noinspection PhpComposerExtensionStubsInspection */
12493
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
12494
        return \defined('MB_OVERLOAD_STRING')
12495
               &&
12496
               ((int) @\ini_get('mbstring.func_overload') & \MB_OVERLOAD_STRING);
12497
    }
12498
12499
    /**
12500
     * @param array $strings
12501
     * @param bool  $removeEmptyValues
12502
     * @param int   $removeShortValues
12503
     *
12504
     * @return array
12505
     */
12506
    private static function reduce_string_array(array $strings, bool $removeEmptyValues, int $removeShortValues = null): array
12507
    {
12508
        // init
12509 2
        $return = [];
12510
12511 2
        foreach ($strings as &$str) {
12512
            if (
12513 2
                $removeShortValues !== null
12514
                &&
12515 2
                \mb_strlen($str) <= $removeShortValues
12516
            ) {
12517 2
                continue;
12518
            }
12519
12520
            if (
12521 2
                $removeEmptyValues === true
12522
                &&
12523 2
                \trim($str) === ''
12524
            ) {
12525 2
                continue;
12526
            }
12527
12528 2
            $return[] = $str;
12529
        }
12530
12531 2
        return $return;
12532
    }
12533
12534
    /**
12535
     * rxClass
12536
     *
12537
     * @param string $s
12538
     * @param string $class
12539
     *
12540
     * @return string
12541
     */
12542
    private static function rxClass(string $s, string $class = ''): string
12543
    {
12544 33
        static $RX_CLASS_CACHE = [];
12545
12546 33
        $cacheKey = $s . $class;
12547
12548 33
        if (isset($RX_CLASS_CACHE[$cacheKey])) {
12549 21
            return $RX_CLASS_CACHE[$cacheKey];
12550
        }
12551
12552 16
        $classArray = [$class];
12553
12554
        /** @noinspection SuspiciousLoopInspection */
12555
        /** @noinspection AlterInForeachInspection */
12556 16
        foreach (self::str_split($s) as &$s) {
0 ignored issues
show
Bug introduced by
The expression self::str_split($s) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
12557 15
            if ($s === '-') {
12558
                $classArray[0] = '-' . $classArray[0];
12559 15
            } elseif (!isset($s[2])) {
12560 15
                $classArray[0] .= \preg_quote($s, '/');
12561 1
            } elseif (self::strlen($s) === 1) {
12562 1
                $classArray[0] .= $s;
12563
            } else {
12564 15
                $classArray[] = $s;
12565
            }
12566
        }
12567
12568 16
        if ($classArray[0]) {
12569 16
            $classArray[0] = '[' . $classArray[0] . ']';
12570
        }
12571
12572 16
        if (\count($classArray) === 1) {
12573 16
            $return = $classArray[0];
12574
        } else {
12575
            $return = '(?:' . \implode('|', $classArray) . ')';
12576
        }
12577
12578 16
        $RX_CLASS_CACHE[$cacheKey] = $return;
12579
12580 16
        return $return;
12581
    }
12582
12583
    /**
12584
     * Personal names such as "Marcus Aurelius" are sometimes typed incorrectly using lowercase ("marcus aurelius").
12585
     *
12586
     * @param string $names
12587
     * @param string $delimiter
12588
     * @param string $encoding
12589
     *
12590
     * @return string
12591
     */
12592
    private static function str_capitalize_name_helper(string $names, string $delimiter, string $encoding = 'UTF-8'): string
12593
    {
12594
        // init
12595 1
        $namesArray = \explode($delimiter, $names);
12596
12597 1
        if ($namesArray === false) {
12598
            return '';
12599
        }
12600
12601
        $specialCases = [
12602 1
            'names' => [
12603
                'ab',
12604
                'af',
12605
                'al',
12606
                'and',
12607
                'ap',
12608
                'bint',
12609
                'binte',
12610
                'da',
12611
                'de',
12612
                'del',
12613
                'den',
12614
                'der',
12615
                'di',
12616
                'dit',
12617
                'ibn',
12618
                'la',
12619
                'mac',
12620
                'nic',
12621
                'of',
12622
                'ter',
12623
                'the',
12624
                'und',
12625
                'van',
12626
                'von',
12627
                'y',
12628
                'zu',
12629
            ],
12630
            'prefixes' => [
12631
                'al-',
12632
                "d'",
12633
                'ff',
12634
                "l'",
12635
                'mac',
12636
                'mc',
12637
                'nic',
12638
            ],
12639
        ];
12640
12641 1
        foreach ($namesArray as &$name) {
12642 1
            if (\in_array($name, $specialCases['names'], true)) {
12643 1
                continue;
12644
            }
12645
12646 1
            $continue = false;
12647
12648 1
            if ($delimiter === '-') {
12649
                /** @noinspection AlterInForeachInspection */
12650 1
                foreach ((array) $specialCases['names'] as &$beginning) {
12651 1
                    if (self::strpos($name, $beginning, 0, $encoding) === 0) {
12652 1
                        $continue = true;
12653
                    }
12654
                }
12655
            }
12656
12657
            /** @noinspection AlterInForeachInspection */
12658 1
            foreach ((array) $specialCases['prefixes'] as &$beginning) {
12659 1
                if (self::strpos($name, $beginning, 0, $encoding) === 0) {
12660 1
                    $continue = true;
12661
                }
12662
            }
12663
12664 1
            if ($continue === true) {
12665 1
                continue;
12666
            }
12667
12668 1
            $name = self::ucfirst($name);
12669
        }
12670
12671 1
        return \implode($delimiter, $namesArray);
12672
    }
12673
12674
    /**
12675
     * Generic case sensitive transformation for collation matching.
12676
     *
12677
     * @param string $str <p>The input string</p>
12678
     *
12679
     * @return string|null
12680
     */
12681
    private static function strtonatfold(string $str)
12682
    {
12683 6
        return \preg_replace(
12684 6
            '/\p{Mn}+/u',
12685 6
            '',
12686 6
            \Normalizer::normalize($str, \Normalizer::NFD)
12687
        );
12688
    }
12689
12690
    /**
12691
     * @param int|string $input
12692
     *
12693
     * @return string
12694
     */
12695
    private static function to_utf8_convert_helper($input): string
12696
    {
12697
        // init
12698 31
        $buf = '';
12699
12700 31
        if (self::$ORD === null) {
12701 1
            self::$ORD = self::getData('ord');
12702
        }
12703
12704 31
        if (self::$CHR === null) {
12705 1
            self::$CHR = self::getData('chr');
12706
        }
12707
12708 31
        if (self::$WIN1252_TO_UTF8 === null) {
12709 1
            self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
12710
        }
12711
12712 31
        $ordC1 = self::$ORD[$input];
12713 31
        if (isset(self::$WIN1252_TO_UTF8[$ordC1])) { // found in Windows-1252 special cases
12714 31
            $buf .= self::$WIN1252_TO_UTF8[$ordC1];
12715
        } else {
12716
            /** @noinspection OffsetOperationsInspection */
12717 1
            $cc1 = self::$CHR[$ordC1 / 64] | "\xC0";
12718 1
            $cc2 = ((string) $input & "\x3F") | "\x80";
0 ignored issues
show
Bug introduced by
Are you sure you want to use the bitwise | or did you mean ||?
Loading history...
Bug introduced by
Are you sure you want to use the bitwise & or did you mean &&?
Loading history...
12719 1
            $buf .= $cc1 . $cc2;
12720
        }
12721
12722 31
        return $buf;
12723
    }
12724
12725
    /**
12726
     * @param string $str
12727
     *
12728
     * @return string
12729
     */
12730
    private static function urldecode_unicode_helper(string $str): string
12731
    {
12732 9
        $pattern = '/%u([0-9a-fA-F]{3,4})/';
12733 9
        if (\preg_match($pattern, $str)) {
12734 7
            $str = (string) \preg_replace($pattern, '&#x\\1;', $str);
12735
        }
12736
12737 9
        return $str;
12738
    }
12739
}
12740